1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * intel_idle.c - native hardware idle loop for modern Intel processors 4 * 5 * Copyright (c) 2013 - 2020, Intel Corporation. 6 * Len Brown <len.brown@intel.com> 7 * Rafael J. Wysocki <rafael.j.wysocki@intel.com> 8 */ 9 10/* 11 * intel_idle is a cpuidle driver that loads on all Intel CPUs with MWAIT 12 * in lieu of the legacy ACPI processor_idle driver. The intent is to 13 * make Linux more efficient on these processors, as intel_idle knows 14 * more than ACPI, as well as make Linux more immune to ACPI BIOS bugs. 15 */ 16 17/* 18 * Design Assumptions 19 * 20 * All CPUs have same idle states as boot CPU 21 * 22 * Chipset BM_STS (bus master status) bit is a NOP 23 * for preventing entry into deep C-states 24 * 25 * CPU will flush caches as needed when entering a C-state via MWAIT 26 * (in contrast to entering ACPI C3, in which case the WBINVD 27 * instruction needs to be executed to flush the caches) 28 */ 29 30/* 31 * Known limitations 32 * 33 * ACPI has a .suspend hack to turn off deep c-statees during suspend 34 * to avoid complications with the lapic timer workaround. 35 * Have not seen issues with suspend, but may need same workaround here. 36 * 37 */ 38 39/* un-comment DEBUG to enable pr_debug() statements */ 40#define DEBUG 41 42#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44#include <linux/acpi.h> 45#include <linux/kernel.h> 46#include <linux/cpuidle.h> 47#include <linux/tick.h> 48#include <trace/events/power.h> 49#include <linux/sched.h> 50#include <linux/sched/smt.h> 51#include <linux/notifier.h> 52#include <linux/cpu.h> 53#include <linux/moduleparam.h> 54#include <asm/cpu_device_id.h> 55#include <asm/intel-family.h> 56#include <asm/nospec-branch.h> 57#include <asm/mwait.h> 58#include <asm/msr.h> 59 60#define INTEL_IDLE_VERSION "0.5.1" 61 62static struct cpuidle_driver intel_idle_driver = { 63 .name = "intel_idle", 64 .owner = THIS_MODULE, 65}; 66/* intel_idle.max_cstate=0 disables driver */ 67static int max_cstate = CPUIDLE_STATE_MAX - 1; 68static unsigned int disabled_states_mask; 69 70static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; 71 72static unsigned long auto_demotion_disable_flags; 73static bool disable_promotion_to_c1e; 74 75struct idle_cpu { 76 struct cpuidle_state *state_table; 77 78 /* 79 * Hardware C-state auto-demotion may not always be optimal. 80 * Indicate which enable bits to clear here. 81 */ 82 unsigned long auto_demotion_disable_flags; 83 bool byt_auto_demotion_disable_flag; 84 bool disable_promotion_to_c1e; 85 bool use_acpi; 86}; 87 88static const struct idle_cpu *icpu __initdata; 89static struct cpuidle_state *cpuidle_state_table __initdata; 90 91static unsigned int mwait_substates __initdata; 92 93/* 94 * Enable this state by default even if the ACPI _CST does not list it. 95 */ 96#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) 97 98/* 99 * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE 100 * above. 101 */ 102#define CPUIDLE_FLAG_IBRS BIT(16) 103 104/* 105 * MWAIT takes an 8-bit "hint" in EAX "suggesting" 106 * the C-state (top nibble) and sub-state (bottom nibble) 107 * 0x00 means "MWAIT(C1)", 0x10 means "MWAIT(C2)" etc. 108 * 109 * We store the hint at the top of our "flags" for each state. 110 */ 111#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF) 112#define MWAIT2flg(eax) ((eax & 0xFF) << 24) 113 114/** 115 * intel_idle - Ask the processor to enter the given idle state. 116 * @dev: cpuidle device of the target CPU. 117 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 118 * @index: Target idle state index. 119 * 120 * Use the MWAIT instruction to notify the processor that the CPU represented by 121 * @dev is idle and it can try to enter the idle state corresponding to @index. 122 * 123 * If the local APIC timer is not known to be reliable in the target idle state, 124 * enable one-shot tick broadcasting for the target CPU before executing MWAIT. 125 * 126 * Optionally call leave_mm() for the target CPU upfront to avoid wakeups due to 127 * flushing user TLBs. 128 * 129 * Must be called under local_irq_disable(). 130 */ 131static __cpuidle int intel_idle(struct cpuidle_device *dev, 132 struct cpuidle_driver *drv, int index) 133{ 134 struct cpuidle_state *state = &drv->states[index]; 135 unsigned long eax = flg2MWAIT(state->flags); 136 unsigned long ecx = 1; /* break on interrupt flag */ 137 138 mwait_idle_with_hints(eax, ecx); 139 140 return index; 141} 142 143static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, 144 struct cpuidle_driver *drv, int index) 145{ 146 bool smt_active = sched_smt_active(); 147 u64 spec_ctrl = spec_ctrl_current(); 148 int ret; 149 150 if (smt_active) 151 wrmsrl(MSR_IA32_SPEC_CTRL, 0); 152 153 ret = intel_idle(dev, drv, index); 154 155 if (smt_active) 156 wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); 157 158 return ret; 159} 160 161/** 162 * intel_idle_s2idle - Ask the processor to enter the given idle state. 163 * @dev: cpuidle device of the target CPU. 164 * @drv: cpuidle driver (assumed to point to intel_idle_driver). 165 * @index: Target idle state index. 166 * 167 * Use the MWAIT instruction to notify the processor that the CPU represented by 168 * @dev is idle and it can try to enter the idle state corresponding to @index. 169 * 170 * Invoked as a suspend-to-idle callback routine with frozen user space, frozen 171 * scheduler tick and suspended scheduler clock on the target CPU. 172 */ 173static __cpuidle int intel_idle_s2idle(struct cpuidle_device *dev, 174 struct cpuidle_driver *drv, int index) 175{ 176 unsigned long eax = flg2MWAIT(drv->states[index].flags); 177 unsigned long ecx = 1; /* break on interrupt flag */ 178 179 mwait_idle_with_hints(eax, ecx); 180 181 return 0; 182} 183 184/* 185 * States are indexed by the cstate number, 186 * which is also the index into the MWAIT hint array. 187 * Thus C0 is a dummy. 188 */ 189static struct cpuidle_state nehalem_cstates[] __initdata = { 190 { 191 .name = "C1", 192 .desc = "MWAIT 0x00", 193 .flags = MWAIT2flg(0x00), 194 .exit_latency = 3, 195 .target_residency = 6, 196 .enter = &intel_idle, 197 .enter_s2idle = intel_idle_s2idle, }, 198 { 199 .name = "C1E", 200 .desc = "MWAIT 0x01", 201 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 202 .exit_latency = 10, 203 .target_residency = 20, 204 .enter = &intel_idle, 205 .enter_s2idle = intel_idle_s2idle, }, 206 { 207 .name = "C3", 208 .desc = "MWAIT 0x10", 209 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 210 .exit_latency = 20, 211 .target_residency = 80, 212 .enter = &intel_idle, 213 .enter_s2idle = intel_idle_s2idle, }, 214 { 215 .name = "C6", 216 .desc = "MWAIT 0x20", 217 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 218 .exit_latency = 200, 219 .target_residency = 800, 220 .enter = &intel_idle, 221 .enter_s2idle = intel_idle_s2idle, }, 222 { 223 .enter = NULL } 224}; 225 226static struct cpuidle_state snb_cstates[] __initdata = { 227 { 228 .name = "C1", 229 .desc = "MWAIT 0x00", 230 .flags = MWAIT2flg(0x00), 231 .exit_latency = 2, 232 .target_residency = 2, 233 .enter = &intel_idle, 234 .enter_s2idle = intel_idle_s2idle, }, 235 { 236 .name = "C1E", 237 .desc = "MWAIT 0x01", 238 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 239 .exit_latency = 10, 240 .target_residency = 20, 241 .enter = &intel_idle, 242 .enter_s2idle = intel_idle_s2idle, }, 243 { 244 .name = "C3", 245 .desc = "MWAIT 0x10", 246 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 247 .exit_latency = 80, 248 .target_residency = 211, 249 .enter = &intel_idle, 250 .enter_s2idle = intel_idle_s2idle, }, 251 { 252 .name = "C6", 253 .desc = "MWAIT 0x20", 254 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 255 .exit_latency = 104, 256 .target_residency = 345, 257 .enter = &intel_idle, 258 .enter_s2idle = intel_idle_s2idle, }, 259 { 260 .name = "C7", 261 .desc = "MWAIT 0x30", 262 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 263 .exit_latency = 109, 264 .target_residency = 345, 265 .enter = &intel_idle, 266 .enter_s2idle = intel_idle_s2idle, }, 267 { 268 .enter = NULL } 269}; 270 271static struct cpuidle_state byt_cstates[] __initdata = { 272 { 273 .name = "C1", 274 .desc = "MWAIT 0x00", 275 .flags = MWAIT2flg(0x00), 276 .exit_latency = 1, 277 .target_residency = 1, 278 .enter = &intel_idle, 279 .enter_s2idle = intel_idle_s2idle, }, 280 { 281 .name = "C6N", 282 .desc = "MWAIT 0x58", 283 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 284 .exit_latency = 300, 285 .target_residency = 275, 286 .enter = &intel_idle, 287 .enter_s2idle = intel_idle_s2idle, }, 288 { 289 .name = "C6S", 290 .desc = "MWAIT 0x52", 291 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 292 .exit_latency = 500, 293 .target_residency = 560, 294 .enter = &intel_idle, 295 .enter_s2idle = intel_idle_s2idle, }, 296 { 297 .name = "C7", 298 .desc = "MWAIT 0x60", 299 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 300 .exit_latency = 1200, 301 .target_residency = 4000, 302 .enter = &intel_idle, 303 .enter_s2idle = intel_idle_s2idle, }, 304 { 305 .name = "C7S", 306 .desc = "MWAIT 0x64", 307 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 308 .exit_latency = 10000, 309 .target_residency = 20000, 310 .enter = &intel_idle, 311 .enter_s2idle = intel_idle_s2idle, }, 312 { 313 .enter = NULL } 314}; 315 316static struct cpuidle_state cht_cstates[] __initdata = { 317 { 318 .name = "C1", 319 .desc = "MWAIT 0x00", 320 .flags = MWAIT2flg(0x00), 321 .exit_latency = 1, 322 .target_residency = 1, 323 .enter = &intel_idle, 324 .enter_s2idle = intel_idle_s2idle, }, 325 { 326 .name = "C6N", 327 .desc = "MWAIT 0x58", 328 .flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED, 329 .exit_latency = 80, 330 .target_residency = 275, 331 .enter = &intel_idle, 332 .enter_s2idle = intel_idle_s2idle, }, 333 { 334 .name = "C6S", 335 .desc = "MWAIT 0x52", 336 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 337 .exit_latency = 200, 338 .target_residency = 560, 339 .enter = &intel_idle, 340 .enter_s2idle = intel_idle_s2idle, }, 341 { 342 .name = "C7", 343 .desc = "MWAIT 0x60", 344 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 345 .exit_latency = 1200, 346 .target_residency = 4000, 347 .enter = &intel_idle, 348 .enter_s2idle = intel_idle_s2idle, }, 349 { 350 .name = "C7S", 351 .desc = "MWAIT 0x64", 352 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 353 .exit_latency = 10000, 354 .target_residency = 20000, 355 .enter = &intel_idle, 356 .enter_s2idle = intel_idle_s2idle, }, 357 { 358 .enter = NULL } 359}; 360 361static struct cpuidle_state ivb_cstates[] __initdata = { 362 { 363 .name = "C1", 364 .desc = "MWAIT 0x00", 365 .flags = MWAIT2flg(0x00), 366 .exit_latency = 1, 367 .target_residency = 1, 368 .enter = &intel_idle, 369 .enter_s2idle = intel_idle_s2idle, }, 370 { 371 .name = "C1E", 372 .desc = "MWAIT 0x01", 373 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 374 .exit_latency = 10, 375 .target_residency = 20, 376 .enter = &intel_idle, 377 .enter_s2idle = intel_idle_s2idle, }, 378 { 379 .name = "C3", 380 .desc = "MWAIT 0x10", 381 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 382 .exit_latency = 59, 383 .target_residency = 156, 384 .enter = &intel_idle, 385 .enter_s2idle = intel_idle_s2idle, }, 386 { 387 .name = "C6", 388 .desc = "MWAIT 0x20", 389 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 390 .exit_latency = 80, 391 .target_residency = 300, 392 .enter = &intel_idle, 393 .enter_s2idle = intel_idle_s2idle, }, 394 { 395 .name = "C7", 396 .desc = "MWAIT 0x30", 397 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 398 .exit_latency = 87, 399 .target_residency = 300, 400 .enter = &intel_idle, 401 .enter_s2idle = intel_idle_s2idle, }, 402 { 403 .enter = NULL } 404}; 405 406static struct cpuidle_state ivt_cstates[] __initdata = { 407 { 408 .name = "C1", 409 .desc = "MWAIT 0x00", 410 .flags = MWAIT2flg(0x00), 411 .exit_latency = 1, 412 .target_residency = 1, 413 .enter = &intel_idle, 414 .enter_s2idle = intel_idle_s2idle, }, 415 { 416 .name = "C1E", 417 .desc = "MWAIT 0x01", 418 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 419 .exit_latency = 10, 420 .target_residency = 80, 421 .enter = &intel_idle, 422 .enter_s2idle = intel_idle_s2idle, }, 423 { 424 .name = "C3", 425 .desc = "MWAIT 0x10", 426 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 427 .exit_latency = 59, 428 .target_residency = 156, 429 .enter = &intel_idle, 430 .enter_s2idle = intel_idle_s2idle, }, 431 { 432 .name = "C6", 433 .desc = "MWAIT 0x20", 434 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 435 .exit_latency = 82, 436 .target_residency = 300, 437 .enter = &intel_idle, 438 .enter_s2idle = intel_idle_s2idle, }, 439 { 440 .enter = NULL } 441}; 442 443static struct cpuidle_state ivt_cstates_4s[] __initdata = { 444 { 445 .name = "C1", 446 .desc = "MWAIT 0x00", 447 .flags = MWAIT2flg(0x00), 448 .exit_latency = 1, 449 .target_residency = 1, 450 .enter = &intel_idle, 451 .enter_s2idle = intel_idle_s2idle, }, 452 { 453 .name = "C1E", 454 .desc = "MWAIT 0x01", 455 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 456 .exit_latency = 10, 457 .target_residency = 250, 458 .enter = &intel_idle, 459 .enter_s2idle = intel_idle_s2idle, }, 460 { 461 .name = "C3", 462 .desc = "MWAIT 0x10", 463 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 464 .exit_latency = 59, 465 .target_residency = 300, 466 .enter = &intel_idle, 467 .enter_s2idle = intel_idle_s2idle, }, 468 { 469 .name = "C6", 470 .desc = "MWAIT 0x20", 471 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 472 .exit_latency = 84, 473 .target_residency = 400, 474 .enter = &intel_idle, 475 .enter_s2idle = intel_idle_s2idle, }, 476 { 477 .enter = NULL } 478}; 479 480static struct cpuidle_state ivt_cstates_8s[] __initdata = { 481 { 482 .name = "C1", 483 .desc = "MWAIT 0x00", 484 .flags = MWAIT2flg(0x00), 485 .exit_latency = 1, 486 .target_residency = 1, 487 .enter = &intel_idle, 488 .enter_s2idle = intel_idle_s2idle, }, 489 { 490 .name = "C1E", 491 .desc = "MWAIT 0x01", 492 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 493 .exit_latency = 10, 494 .target_residency = 500, 495 .enter = &intel_idle, 496 .enter_s2idle = intel_idle_s2idle, }, 497 { 498 .name = "C3", 499 .desc = "MWAIT 0x10", 500 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 501 .exit_latency = 59, 502 .target_residency = 600, 503 .enter = &intel_idle, 504 .enter_s2idle = intel_idle_s2idle, }, 505 { 506 .name = "C6", 507 .desc = "MWAIT 0x20", 508 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 509 .exit_latency = 88, 510 .target_residency = 700, 511 .enter = &intel_idle, 512 .enter_s2idle = intel_idle_s2idle, }, 513 { 514 .enter = NULL } 515}; 516 517static struct cpuidle_state hsw_cstates[] __initdata = { 518 { 519 .name = "C1", 520 .desc = "MWAIT 0x00", 521 .flags = MWAIT2flg(0x00), 522 .exit_latency = 2, 523 .target_residency = 2, 524 .enter = &intel_idle, 525 .enter_s2idle = intel_idle_s2idle, }, 526 { 527 .name = "C1E", 528 .desc = "MWAIT 0x01", 529 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 530 .exit_latency = 10, 531 .target_residency = 20, 532 .enter = &intel_idle, 533 .enter_s2idle = intel_idle_s2idle, }, 534 { 535 .name = "C3", 536 .desc = "MWAIT 0x10", 537 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 538 .exit_latency = 33, 539 .target_residency = 100, 540 .enter = &intel_idle, 541 .enter_s2idle = intel_idle_s2idle, }, 542 { 543 .name = "C6", 544 .desc = "MWAIT 0x20", 545 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 546 .exit_latency = 133, 547 .target_residency = 400, 548 .enter = &intel_idle, 549 .enter_s2idle = intel_idle_s2idle, }, 550 { 551 .name = "C7s", 552 .desc = "MWAIT 0x32", 553 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 554 .exit_latency = 166, 555 .target_residency = 500, 556 .enter = &intel_idle, 557 .enter_s2idle = intel_idle_s2idle, }, 558 { 559 .name = "C8", 560 .desc = "MWAIT 0x40", 561 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 562 .exit_latency = 300, 563 .target_residency = 900, 564 .enter = &intel_idle, 565 .enter_s2idle = intel_idle_s2idle, }, 566 { 567 .name = "C9", 568 .desc = "MWAIT 0x50", 569 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 570 .exit_latency = 600, 571 .target_residency = 1800, 572 .enter = &intel_idle, 573 .enter_s2idle = intel_idle_s2idle, }, 574 { 575 .name = "C10", 576 .desc = "MWAIT 0x60", 577 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 578 .exit_latency = 2600, 579 .target_residency = 7700, 580 .enter = &intel_idle, 581 .enter_s2idle = intel_idle_s2idle, }, 582 { 583 .enter = NULL } 584}; 585static struct cpuidle_state bdw_cstates[] __initdata = { 586 { 587 .name = "C1", 588 .desc = "MWAIT 0x00", 589 .flags = MWAIT2flg(0x00), 590 .exit_latency = 2, 591 .target_residency = 2, 592 .enter = &intel_idle, 593 .enter_s2idle = intel_idle_s2idle, }, 594 { 595 .name = "C1E", 596 .desc = "MWAIT 0x01", 597 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 598 .exit_latency = 10, 599 .target_residency = 20, 600 .enter = &intel_idle, 601 .enter_s2idle = intel_idle_s2idle, }, 602 { 603 .name = "C3", 604 .desc = "MWAIT 0x10", 605 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 606 .exit_latency = 40, 607 .target_residency = 100, 608 .enter = &intel_idle, 609 .enter_s2idle = intel_idle_s2idle, }, 610 { 611 .name = "C6", 612 .desc = "MWAIT 0x20", 613 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 614 .exit_latency = 133, 615 .target_residency = 400, 616 .enter = &intel_idle, 617 .enter_s2idle = intel_idle_s2idle, }, 618 { 619 .name = "C7s", 620 .desc = "MWAIT 0x32", 621 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED, 622 .exit_latency = 166, 623 .target_residency = 500, 624 .enter = &intel_idle, 625 .enter_s2idle = intel_idle_s2idle, }, 626 { 627 .name = "C8", 628 .desc = "MWAIT 0x40", 629 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 630 .exit_latency = 300, 631 .target_residency = 900, 632 .enter = &intel_idle, 633 .enter_s2idle = intel_idle_s2idle, }, 634 { 635 .name = "C9", 636 .desc = "MWAIT 0x50", 637 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 638 .exit_latency = 600, 639 .target_residency = 1800, 640 .enter = &intel_idle, 641 .enter_s2idle = intel_idle_s2idle, }, 642 { 643 .name = "C10", 644 .desc = "MWAIT 0x60", 645 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 646 .exit_latency = 2600, 647 .target_residency = 7700, 648 .enter = &intel_idle, 649 .enter_s2idle = intel_idle_s2idle, }, 650 { 651 .enter = NULL } 652}; 653 654static struct cpuidle_state skl_cstates[] __initdata = { 655 { 656 .name = "C1", 657 .desc = "MWAIT 0x00", 658 .flags = MWAIT2flg(0x00), 659 .exit_latency = 2, 660 .target_residency = 2, 661 .enter = &intel_idle, 662 .enter_s2idle = intel_idle_s2idle, }, 663 { 664 .name = "C1E", 665 .desc = "MWAIT 0x01", 666 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 667 .exit_latency = 10, 668 .target_residency = 20, 669 .enter = &intel_idle, 670 .enter_s2idle = intel_idle_s2idle, }, 671 { 672 .name = "C3", 673 .desc = "MWAIT 0x10", 674 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 675 .exit_latency = 70, 676 .target_residency = 100, 677 .enter = &intel_idle, 678 .enter_s2idle = intel_idle_s2idle, }, 679 { 680 .name = "C6", 681 .desc = "MWAIT 0x20", 682 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 683 .exit_latency = 85, 684 .target_residency = 200, 685 .enter = &intel_idle, 686 .enter_s2idle = intel_idle_s2idle, }, 687 { 688 .name = "C7s", 689 .desc = "MWAIT 0x33", 690 .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 691 .exit_latency = 124, 692 .target_residency = 800, 693 .enter = &intel_idle, 694 .enter_s2idle = intel_idle_s2idle, }, 695 { 696 .name = "C8", 697 .desc = "MWAIT 0x40", 698 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 699 .exit_latency = 200, 700 .target_residency = 800, 701 .enter = &intel_idle, 702 .enter_s2idle = intel_idle_s2idle, }, 703 { 704 .name = "C9", 705 .desc = "MWAIT 0x50", 706 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 707 .exit_latency = 480, 708 .target_residency = 5000, 709 .enter = &intel_idle, 710 .enter_s2idle = intel_idle_s2idle, }, 711 { 712 .name = "C10", 713 .desc = "MWAIT 0x60", 714 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 715 .exit_latency = 890, 716 .target_residency = 5000, 717 .enter = &intel_idle, 718 .enter_s2idle = intel_idle_s2idle, }, 719 { 720 .enter = NULL } 721}; 722 723static struct cpuidle_state skx_cstates[] __initdata = { 724 { 725 .name = "C1", 726 .desc = "MWAIT 0x00", 727 .flags = MWAIT2flg(0x00), 728 .exit_latency = 2, 729 .target_residency = 2, 730 .enter = &intel_idle, 731 .enter_s2idle = intel_idle_s2idle, }, 732 { 733 .name = "C1E", 734 .desc = "MWAIT 0x01", 735 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 736 .exit_latency = 10, 737 .target_residency = 20, 738 .enter = &intel_idle, 739 .enter_s2idle = intel_idle_s2idle, }, 740 { 741 .name = "C6", 742 .desc = "MWAIT 0x20", 743 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, 744 .exit_latency = 133, 745 .target_residency = 600, 746 .enter = &intel_idle, 747 .enter_s2idle = intel_idle_s2idle, }, 748 { 749 .enter = NULL } 750}; 751 752static struct cpuidle_state icx_cstates[] __initdata = { 753 { 754 .name = "C1", 755 .desc = "MWAIT 0x00", 756 .flags = MWAIT2flg(0x00), 757 .exit_latency = 1, 758 .target_residency = 1, 759 .enter = &intel_idle, 760 .enter_s2idle = intel_idle_s2idle, }, 761 { 762 .name = "C1E", 763 .desc = "MWAIT 0x01", 764 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 765 .exit_latency = 4, 766 .target_residency = 4, 767 .enter = &intel_idle, 768 .enter_s2idle = intel_idle_s2idle, }, 769 { 770 .name = "C6", 771 .desc = "MWAIT 0x20", 772 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 773 .exit_latency = 128, 774 .target_residency = 384, 775 .enter = &intel_idle, 776 .enter_s2idle = intel_idle_s2idle, }, 777 { 778 .enter = NULL } 779}; 780 781static struct cpuidle_state atom_cstates[] __initdata = { 782 { 783 .name = "C1E", 784 .desc = "MWAIT 0x00", 785 .flags = MWAIT2flg(0x00), 786 .exit_latency = 10, 787 .target_residency = 20, 788 .enter = &intel_idle, 789 .enter_s2idle = intel_idle_s2idle, }, 790 { 791 .name = "C2", 792 .desc = "MWAIT 0x10", 793 .flags = MWAIT2flg(0x10), 794 .exit_latency = 20, 795 .target_residency = 80, 796 .enter = &intel_idle, 797 .enter_s2idle = intel_idle_s2idle, }, 798 { 799 .name = "C4", 800 .desc = "MWAIT 0x30", 801 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 802 .exit_latency = 100, 803 .target_residency = 400, 804 .enter = &intel_idle, 805 .enter_s2idle = intel_idle_s2idle, }, 806 { 807 .name = "C6", 808 .desc = "MWAIT 0x52", 809 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 810 .exit_latency = 140, 811 .target_residency = 560, 812 .enter = &intel_idle, 813 .enter_s2idle = intel_idle_s2idle, }, 814 { 815 .enter = NULL } 816}; 817static struct cpuidle_state tangier_cstates[] __initdata = { 818 { 819 .name = "C1", 820 .desc = "MWAIT 0x00", 821 .flags = MWAIT2flg(0x00), 822 .exit_latency = 1, 823 .target_residency = 4, 824 .enter = &intel_idle, 825 .enter_s2idle = intel_idle_s2idle, }, 826 { 827 .name = "C4", 828 .desc = "MWAIT 0x30", 829 .flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED, 830 .exit_latency = 100, 831 .target_residency = 400, 832 .enter = &intel_idle, 833 .enter_s2idle = intel_idle_s2idle, }, 834 { 835 .name = "C6", 836 .desc = "MWAIT 0x52", 837 .flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED, 838 .exit_latency = 140, 839 .target_residency = 560, 840 .enter = &intel_idle, 841 .enter_s2idle = intel_idle_s2idle, }, 842 { 843 .name = "C7", 844 .desc = "MWAIT 0x60", 845 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 846 .exit_latency = 1200, 847 .target_residency = 4000, 848 .enter = &intel_idle, 849 .enter_s2idle = intel_idle_s2idle, }, 850 { 851 .name = "C9", 852 .desc = "MWAIT 0x64", 853 .flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED, 854 .exit_latency = 10000, 855 .target_residency = 20000, 856 .enter = &intel_idle, 857 .enter_s2idle = intel_idle_s2idle, }, 858 { 859 .enter = NULL } 860}; 861static struct cpuidle_state avn_cstates[] __initdata = { 862 { 863 .name = "C1", 864 .desc = "MWAIT 0x00", 865 .flags = MWAIT2flg(0x00), 866 .exit_latency = 2, 867 .target_residency = 2, 868 .enter = &intel_idle, 869 .enter_s2idle = intel_idle_s2idle, }, 870 { 871 .name = "C6", 872 .desc = "MWAIT 0x51", 873 .flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED, 874 .exit_latency = 15, 875 .target_residency = 45, 876 .enter = &intel_idle, 877 .enter_s2idle = intel_idle_s2idle, }, 878 { 879 .enter = NULL } 880}; 881static struct cpuidle_state knl_cstates[] __initdata = { 882 { 883 .name = "C1", 884 .desc = "MWAIT 0x00", 885 .flags = MWAIT2flg(0x00), 886 .exit_latency = 1, 887 .target_residency = 2, 888 .enter = &intel_idle, 889 .enter_s2idle = intel_idle_s2idle }, 890 { 891 .name = "C6", 892 .desc = "MWAIT 0x10", 893 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED, 894 .exit_latency = 120, 895 .target_residency = 500, 896 .enter = &intel_idle, 897 .enter_s2idle = intel_idle_s2idle }, 898 { 899 .enter = NULL } 900}; 901 902static struct cpuidle_state bxt_cstates[] __initdata = { 903 { 904 .name = "C1", 905 .desc = "MWAIT 0x00", 906 .flags = MWAIT2flg(0x00), 907 .exit_latency = 2, 908 .target_residency = 2, 909 .enter = &intel_idle, 910 .enter_s2idle = intel_idle_s2idle, }, 911 { 912 .name = "C1E", 913 .desc = "MWAIT 0x01", 914 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 915 .exit_latency = 10, 916 .target_residency = 20, 917 .enter = &intel_idle, 918 .enter_s2idle = intel_idle_s2idle, }, 919 { 920 .name = "C6", 921 .desc = "MWAIT 0x20", 922 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 923 .exit_latency = 133, 924 .target_residency = 133, 925 .enter = &intel_idle, 926 .enter_s2idle = intel_idle_s2idle, }, 927 { 928 .name = "C7s", 929 .desc = "MWAIT 0x31", 930 .flags = MWAIT2flg(0x31) | CPUIDLE_FLAG_TLB_FLUSHED, 931 .exit_latency = 155, 932 .target_residency = 155, 933 .enter = &intel_idle, 934 .enter_s2idle = intel_idle_s2idle, }, 935 { 936 .name = "C8", 937 .desc = "MWAIT 0x40", 938 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, 939 .exit_latency = 1000, 940 .target_residency = 1000, 941 .enter = &intel_idle, 942 .enter_s2idle = intel_idle_s2idle, }, 943 { 944 .name = "C9", 945 .desc = "MWAIT 0x50", 946 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, 947 .exit_latency = 2000, 948 .target_residency = 2000, 949 .enter = &intel_idle, 950 .enter_s2idle = intel_idle_s2idle, }, 951 { 952 .name = "C10", 953 .desc = "MWAIT 0x60", 954 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, 955 .exit_latency = 10000, 956 .target_residency = 10000, 957 .enter = &intel_idle, 958 .enter_s2idle = intel_idle_s2idle, }, 959 { 960 .enter = NULL } 961}; 962 963static struct cpuidle_state dnv_cstates[] __initdata = { 964 { 965 .name = "C1", 966 .desc = "MWAIT 0x00", 967 .flags = MWAIT2flg(0x00), 968 .exit_latency = 2, 969 .target_residency = 2, 970 .enter = &intel_idle, 971 .enter_s2idle = intel_idle_s2idle, }, 972 { 973 .name = "C1E", 974 .desc = "MWAIT 0x01", 975 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, 976 .exit_latency = 10, 977 .target_residency = 20, 978 .enter = &intel_idle, 979 .enter_s2idle = intel_idle_s2idle, }, 980 { 981 .name = "C6", 982 .desc = "MWAIT 0x20", 983 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, 984 .exit_latency = 50, 985 .target_residency = 500, 986 .enter = &intel_idle, 987 .enter_s2idle = intel_idle_s2idle, }, 988 { 989 .enter = NULL } 990}; 991 992static const struct idle_cpu idle_cpu_nehalem __initconst = { 993 .state_table = nehalem_cstates, 994 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 995 .disable_promotion_to_c1e = true, 996}; 997 998static const struct idle_cpu idle_cpu_nhx __initconst = { 999 .state_table = nehalem_cstates, 1000 .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE, 1001 .disable_promotion_to_c1e = true, 1002 .use_acpi = true, 1003}; 1004 1005static const struct idle_cpu idle_cpu_atom __initconst = { 1006 .state_table = atom_cstates, 1007}; 1008 1009static const struct idle_cpu idle_cpu_tangier __initconst = { 1010 .state_table = tangier_cstates, 1011}; 1012 1013static const struct idle_cpu idle_cpu_lincroft __initconst = { 1014 .state_table = atom_cstates, 1015 .auto_demotion_disable_flags = ATM_LNC_C6_AUTO_DEMOTE, 1016}; 1017 1018static const struct idle_cpu idle_cpu_snb __initconst = { 1019 .state_table = snb_cstates, 1020 .disable_promotion_to_c1e = true, 1021}; 1022 1023static const struct idle_cpu idle_cpu_snx __initconst = { 1024 .state_table = snb_cstates, 1025 .disable_promotion_to_c1e = true, 1026 .use_acpi = true, 1027}; 1028 1029static const struct idle_cpu idle_cpu_byt __initconst = { 1030 .state_table = byt_cstates, 1031 .disable_promotion_to_c1e = true, 1032 .byt_auto_demotion_disable_flag = true, 1033}; 1034 1035static const struct idle_cpu idle_cpu_cht __initconst = { 1036 .state_table = cht_cstates, 1037 .disable_promotion_to_c1e = true, 1038 .byt_auto_demotion_disable_flag = true, 1039}; 1040 1041static const struct idle_cpu idle_cpu_ivb __initconst = { 1042 .state_table = ivb_cstates, 1043 .disable_promotion_to_c1e = true, 1044}; 1045 1046static const struct idle_cpu idle_cpu_ivt __initconst = { 1047 .state_table = ivt_cstates, 1048 .disable_promotion_to_c1e = true, 1049 .use_acpi = true, 1050}; 1051 1052static const struct idle_cpu idle_cpu_hsw __initconst = { 1053 .state_table = hsw_cstates, 1054 .disable_promotion_to_c1e = true, 1055}; 1056 1057static const struct idle_cpu idle_cpu_hsx __initconst = { 1058 .state_table = hsw_cstates, 1059 .disable_promotion_to_c1e = true, 1060 .use_acpi = true, 1061}; 1062 1063static const struct idle_cpu idle_cpu_bdw __initconst = { 1064 .state_table = bdw_cstates, 1065 .disable_promotion_to_c1e = true, 1066}; 1067 1068static const struct idle_cpu idle_cpu_bdx __initconst = { 1069 .state_table = bdw_cstates, 1070 .disable_promotion_to_c1e = true, 1071 .use_acpi = true, 1072}; 1073 1074static const struct idle_cpu idle_cpu_skl __initconst = { 1075 .state_table = skl_cstates, 1076 .disable_promotion_to_c1e = true, 1077}; 1078 1079static const struct idle_cpu idle_cpu_skx __initconst = { 1080 .state_table = skx_cstates, 1081 .disable_promotion_to_c1e = true, 1082 .use_acpi = true, 1083}; 1084 1085static const struct idle_cpu idle_cpu_icx __initconst = { 1086 .state_table = icx_cstates, 1087 .disable_promotion_to_c1e = true, 1088 .use_acpi = true, 1089}; 1090 1091static const struct idle_cpu idle_cpu_avn __initconst = { 1092 .state_table = avn_cstates, 1093 .disable_promotion_to_c1e = true, 1094 .use_acpi = true, 1095}; 1096 1097static const struct idle_cpu idle_cpu_knl __initconst = { 1098 .state_table = knl_cstates, 1099 .use_acpi = true, 1100}; 1101 1102static const struct idle_cpu idle_cpu_bxt __initconst = { 1103 .state_table = bxt_cstates, 1104 .disable_promotion_to_c1e = true, 1105}; 1106 1107static const struct idle_cpu idle_cpu_dnv __initconst = { 1108 .state_table = dnv_cstates, 1109 .disable_promotion_to_c1e = true, 1110 .use_acpi = true, 1111}; 1112 1113static const struct x86_cpu_id intel_idle_ids[] __initconst = { 1114 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EP, &idle_cpu_nhx), 1115 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &idle_cpu_nehalem), 1116 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_G, &idle_cpu_nehalem), 1117 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE, &idle_cpu_nehalem), 1118 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EP, &idle_cpu_nhx), 1119 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM_EX, &idle_cpu_nhx), 1120 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL, &idle_cpu_atom), 1121 X86_MATCH_INTEL_FAM6_MODEL(ATOM_BONNELL_MID, &idle_cpu_lincroft), 1122 X86_MATCH_INTEL_FAM6_MODEL(WESTMERE_EX, &idle_cpu_nhx), 1123 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE, &idle_cpu_snb), 1124 X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X, &idle_cpu_snx), 1125 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SALTWELL, &idle_cpu_atom), 1126 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, &idle_cpu_byt), 1127 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &idle_cpu_tangier), 1128 X86_MATCH_INTEL_FAM6_MODEL(ATOM_AIRMONT, &idle_cpu_cht), 1129 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE, &idle_cpu_ivb), 1130 X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &idle_cpu_ivt), 1131 X86_MATCH_INTEL_FAM6_MODEL(HASWELL, &idle_cpu_hsw), 1132 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &idle_cpu_hsx), 1133 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L, &idle_cpu_hsw), 1134 X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G, &idle_cpu_hsw), 1135 X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_D, &idle_cpu_avn), 1136 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL, &idle_cpu_bdw), 1137 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G, &idle_cpu_bdw), 1138 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &idle_cpu_bdx), 1139 X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &idle_cpu_bdx), 1140 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, &idle_cpu_skl), 1141 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, &idle_cpu_skl), 1142 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, &idle_cpu_skl), 1143 X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, &idle_cpu_skl), 1144 X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &idle_cpu_skx), 1145 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &idle_cpu_icx), 1146 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &idle_cpu_knl), 1147 X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &idle_cpu_knl), 1148 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT, &idle_cpu_bxt), 1149 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &idle_cpu_bxt), 1150 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D, &idle_cpu_dnv), 1151 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &idle_cpu_dnv), 1152 {} 1153}; 1154 1155static const struct x86_cpu_id intel_mwait_ids[] __initconst = { 1156 X86_MATCH_VENDOR_FAM_FEATURE(INTEL, 6, X86_FEATURE_MWAIT, NULL), 1157 {} 1158}; 1159 1160static bool __init intel_idle_max_cstate_reached(int cstate) 1161{ 1162 if (cstate + 1 > max_cstate) { 1163 pr_info("max_cstate %d reached\n", max_cstate); 1164 return true; 1165 } 1166 return false; 1167} 1168 1169static bool __init intel_idle_state_needs_timer_stop(struct cpuidle_state *state) 1170{ 1171 unsigned long eax = flg2MWAIT(state->flags); 1172 1173 if (boot_cpu_has(X86_FEATURE_ARAT)) 1174 return false; 1175 1176 /* 1177 * Switch over to one-shot tick broadcast if the target C-state 1178 * is deeper than C1. 1179 */ 1180 return !!((eax >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK); 1181} 1182 1183#ifdef CONFIG_ACPI_PROCESSOR_CSTATE 1184#include <acpi/processor.h> 1185 1186static bool no_acpi __read_mostly; 1187module_param(no_acpi, bool, 0444); 1188MODULE_PARM_DESC(no_acpi, "Do not use ACPI _CST for building the idle states list"); 1189 1190static bool force_use_acpi __read_mostly; /* No effect if no_acpi is set. */ 1191module_param_named(use_acpi, force_use_acpi, bool, 0444); 1192MODULE_PARM_DESC(use_acpi, "Use ACPI _CST for building the idle states list"); 1193 1194static struct acpi_processor_power acpi_state_table __initdata; 1195 1196/** 1197 * intel_idle_cst_usable - Check if the _CST information can be used. 1198 * 1199 * Check if all of the C-states listed by _CST in the max_cstate range are 1200 * ACPI_CSTATE_FFH, which means that they should be entered via MWAIT. 1201 */ 1202static bool __init intel_idle_cst_usable(void) 1203{ 1204 int cstate, limit; 1205 1206 limit = min_t(int, min_t(int, CPUIDLE_STATE_MAX, max_cstate + 1), 1207 acpi_state_table.count); 1208 1209 for (cstate = 1; cstate < limit; cstate++) { 1210 struct acpi_processor_cx *cx = &acpi_state_table.states[cstate]; 1211 1212 if (cx->entry_method != ACPI_CSTATE_FFH) 1213 return false; 1214 } 1215 1216 return true; 1217} 1218 1219static bool __init intel_idle_acpi_cst_extract(void) 1220{ 1221 unsigned int cpu; 1222 1223 if (no_acpi) { 1224 pr_debug("Not allowed to use ACPI _CST\n"); 1225 return false; 1226 } 1227 1228 for_each_possible_cpu(cpu) { 1229 struct acpi_processor *pr = per_cpu(processors, cpu); 1230 1231 if (!pr) 1232 continue; 1233 1234 if (acpi_processor_evaluate_cst(pr->handle, cpu, &acpi_state_table)) 1235 continue; 1236 1237 acpi_state_table.count++; 1238 1239 if (!intel_idle_cst_usable()) 1240 continue; 1241 1242 if (!acpi_processor_claim_cst_control()) 1243 break; 1244 1245 return true; 1246 } 1247 1248 acpi_state_table.count = 0; 1249 pr_debug("ACPI _CST not found or not usable\n"); 1250 return false; 1251} 1252 1253static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) 1254{ 1255 int cstate, limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1256 1257 /* 1258 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1259 * the interesting states are ACPI_CSTATE_FFH. 1260 */ 1261 for (cstate = 1; cstate < limit; cstate++) { 1262 struct acpi_processor_cx *cx; 1263 struct cpuidle_state *state; 1264 1265 if (intel_idle_max_cstate_reached(cstate - 1)) 1266 break; 1267 1268 cx = &acpi_state_table.states[cstate]; 1269 1270 state = &drv->states[drv->state_count++]; 1271 1272 snprintf(state->name, CPUIDLE_NAME_LEN, "C%d_ACPI", cstate); 1273 strlcpy(state->desc, cx->desc, CPUIDLE_DESC_LEN); 1274 state->exit_latency = cx->latency; 1275 /* 1276 * For C1-type C-states use the same number for both the exit 1277 * latency and target residency, because that is the case for 1278 * C1 in the majority of the static C-states tables above. 1279 * For the other types of C-states, however, set the target 1280 * residency to 3 times the exit latency which should lead to 1281 * a reasonable balance between energy-efficiency and 1282 * performance in the majority of interesting cases. 1283 */ 1284 state->target_residency = cx->latency; 1285 if (cx->type > ACPI_STATE_C1) 1286 state->target_residency *= 3; 1287 1288 state->flags = MWAIT2flg(cx->address); 1289 if (cx->type > ACPI_STATE_C2) 1290 state->flags |= CPUIDLE_FLAG_TLB_FLUSHED; 1291 1292 if (disabled_states_mask & BIT(cstate)) 1293 state->flags |= CPUIDLE_FLAG_OFF; 1294 1295 if (intel_idle_state_needs_timer_stop(state)) 1296 state->flags |= CPUIDLE_FLAG_TIMER_STOP; 1297 1298 state->enter = intel_idle; 1299 state->enter_s2idle = intel_idle_s2idle; 1300 } 1301} 1302 1303static bool __init intel_idle_off_by_default(u32 mwait_hint) 1304{ 1305 int cstate, limit; 1306 1307 /* 1308 * If there are no _CST C-states, do not disable any C-states by 1309 * default. 1310 */ 1311 if (!acpi_state_table.count) 1312 return false; 1313 1314 limit = min_t(int, CPUIDLE_STATE_MAX, acpi_state_table.count); 1315 /* 1316 * If limit > 0, intel_idle_cst_usable() has returned 'true', so all of 1317 * the interesting states are ACPI_CSTATE_FFH. 1318 */ 1319 for (cstate = 1; cstate < limit; cstate++) { 1320 if (acpi_state_table.states[cstate].address == mwait_hint) 1321 return false; 1322 } 1323 return true; 1324} 1325#else /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1326#define force_use_acpi (false) 1327 1328static inline bool intel_idle_acpi_cst_extract(void) { return false; } 1329static inline void intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) { } 1330static inline bool intel_idle_off_by_default(u32 mwait_hint) { return false; } 1331#endif /* !CONFIG_ACPI_PROCESSOR_CSTATE */ 1332 1333/** 1334 * ivt_idle_state_table_update - Tune the idle states table for Ivy Town. 1335 * 1336 * Tune IVT multi-socket targets. 1337 * Assumption: num_sockets == (max_package_num + 1). 1338 */ 1339static void __init ivt_idle_state_table_update(void) 1340{ 1341 /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */ 1342 int cpu, package_num, num_sockets = 1; 1343 1344 for_each_online_cpu(cpu) { 1345 package_num = topology_physical_package_id(cpu); 1346 if (package_num + 1 > num_sockets) { 1347 num_sockets = package_num + 1; 1348 1349 if (num_sockets > 4) { 1350 cpuidle_state_table = ivt_cstates_8s; 1351 return; 1352 } 1353 } 1354 } 1355 1356 if (num_sockets > 2) 1357 cpuidle_state_table = ivt_cstates_4s; 1358 1359 /* else, 1 and 2 socket systems use default ivt_cstates */ 1360} 1361 1362/** 1363 * irtl_2_usec - IRTL to microseconds conversion. 1364 * @irtl: IRTL MSR value. 1365 * 1366 * Translate the IRTL (Interrupt Response Time Limit) MSR value to microseconds. 1367 */ 1368static unsigned long long __init irtl_2_usec(unsigned long long irtl) 1369{ 1370 static const unsigned int irtl_ns_units[] __initconst = { 1371 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 1372 }; 1373 unsigned long long ns; 1374 1375 if (!irtl) 1376 return 0; 1377 1378 ns = irtl_ns_units[(irtl >> 10) & 0x7]; 1379 1380 return div_u64((irtl & 0x3FF) * ns, NSEC_PER_USEC); 1381} 1382 1383/** 1384 * bxt_idle_state_table_update - Fix up the Broxton idle states table. 1385 * 1386 * On BXT, trust the IRTL (Interrupt Response Time Limit) MSR to show the 1387 * definitive maximum latency and use the same value for target_residency. 1388 */ 1389static void __init bxt_idle_state_table_update(void) 1390{ 1391 unsigned long long msr; 1392 unsigned int usec; 1393 1394 rdmsrl(MSR_PKGC6_IRTL, msr); 1395 usec = irtl_2_usec(msr); 1396 if (usec) { 1397 bxt_cstates[2].exit_latency = usec; 1398 bxt_cstates[2].target_residency = usec; 1399 } 1400 1401 rdmsrl(MSR_PKGC7_IRTL, msr); 1402 usec = irtl_2_usec(msr); 1403 if (usec) { 1404 bxt_cstates[3].exit_latency = usec; 1405 bxt_cstates[3].target_residency = usec; 1406 } 1407 1408 rdmsrl(MSR_PKGC8_IRTL, msr); 1409 usec = irtl_2_usec(msr); 1410 if (usec) { 1411 bxt_cstates[4].exit_latency = usec; 1412 bxt_cstates[4].target_residency = usec; 1413 } 1414 1415 rdmsrl(MSR_PKGC9_IRTL, msr); 1416 usec = irtl_2_usec(msr); 1417 if (usec) { 1418 bxt_cstates[5].exit_latency = usec; 1419 bxt_cstates[5].target_residency = usec; 1420 } 1421 1422 rdmsrl(MSR_PKGC10_IRTL, msr); 1423 usec = irtl_2_usec(msr); 1424 if (usec) { 1425 bxt_cstates[6].exit_latency = usec; 1426 bxt_cstates[6].target_residency = usec; 1427 } 1428 1429} 1430 1431/** 1432 * sklh_idle_state_table_update - Fix up the Sky Lake idle states table. 1433 * 1434 * On SKL-H (model 0x5e) skip C8 and C9 if C10 is enabled and SGX disabled. 1435 */ 1436static void __init sklh_idle_state_table_update(void) 1437{ 1438 unsigned long long msr; 1439 unsigned int eax, ebx, ecx, edx; 1440 1441 1442 /* if PC10 disabled via cmdline intel_idle.max_cstate=7 or shallower */ 1443 if (max_cstate <= 7) 1444 return; 1445 1446 /* if PC10 not present in CPUID.MWAIT.EDX */ 1447 if ((mwait_substates & (0xF << 28)) == 0) 1448 return; 1449 1450 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr); 1451 1452 /* PC10 is not enabled in PKG C-state limit */ 1453 if ((msr & 0xF) != 8) 1454 return; 1455 1456 ecx = 0; 1457 cpuid(7, &eax, &ebx, &ecx, &edx); 1458 1459 /* if SGX is present */ 1460 if (ebx & (1 << 2)) { 1461 1462 rdmsrl(MSR_IA32_FEAT_CTL, msr); 1463 1464 /* if SGX is enabled */ 1465 if (msr & (1 << 18)) 1466 return; 1467 } 1468 1469 skl_cstates[5].flags |= CPUIDLE_FLAG_UNUSABLE; /* C8-SKL */ 1470 skl_cstates[6].flags |= CPUIDLE_FLAG_UNUSABLE; /* C9-SKL */ 1471} 1472 1473static bool __init intel_idle_verify_cstate(unsigned int mwait_hint) 1474{ 1475 unsigned int mwait_cstate = MWAIT_HINT2CSTATE(mwait_hint) + 1; 1476 unsigned int num_substates = (mwait_substates >> mwait_cstate * 4) & 1477 MWAIT_SUBSTATE_MASK; 1478 1479 /* Ignore the C-state if there are NO sub-states in CPUID for it. */ 1480 if (num_substates == 0) 1481 return false; 1482 1483 if (mwait_cstate > 2 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1484 mark_tsc_unstable("TSC halts in idle states deeper than C2"); 1485 1486 return true; 1487} 1488 1489static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) 1490{ 1491 int cstate; 1492 1493 switch (boot_cpu_data.x86_model) { 1494 case INTEL_FAM6_IVYBRIDGE_X: 1495 ivt_idle_state_table_update(); 1496 break; 1497 case INTEL_FAM6_ATOM_GOLDMONT: 1498 case INTEL_FAM6_ATOM_GOLDMONT_PLUS: 1499 bxt_idle_state_table_update(); 1500 break; 1501 case INTEL_FAM6_SKYLAKE: 1502 sklh_idle_state_table_update(); 1503 break; 1504 } 1505 1506 for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) { 1507 unsigned int mwait_hint; 1508 1509 if (intel_idle_max_cstate_reached(cstate)) 1510 break; 1511 1512 if (!cpuidle_state_table[cstate].enter && 1513 !cpuidle_state_table[cstate].enter_s2idle) 1514 break; 1515 1516 /* If marked as unusable, skip this state. */ 1517 if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_UNUSABLE) { 1518 pr_debug("state %s is disabled\n", 1519 cpuidle_state_table[cstate].name); 1520 continue; 1521 } 1522 1523 mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags); 1524 if (!intel_idle_verify_cstate(mwait_hint)) 1525 continue; 1526 1527 /* Structure copy. */ 1528 drv->states[drv->state_count] = cpuidle_state_table[cstate]; 1529 1530 if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && 1531 cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { 1532 drv->states[drv->state_count].enter = intel_idle_ibrs; 1533 } 1534 1535 if ((disabled_states_mask & BIT(drv->state_count)) || 1536 ((icpu->use_acpi || force_use_acpi) && 1537 intel_idle_off_by_default(mwait_hint) && 1538 !(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_ALWAYS_ENABLE))) 1539 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_OFF; 1540 1541 if (intel_idle_state_needs_timer_stop(&drv->states[drv->state_count])) 1542 drv->states[drv->state_count].flags |= CPUIDLE_FLAG_TIMER_STOP; 1543 1544 drv->state_count++; 1545 } 1546 1547 if (icpu->byt_auto_demotion_disable_flag) { 1548 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0); 1549 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0); 1550 } 1551} 1552 1553/** 1554 * intel_idle_cpuidle_driver_init - Create the list of available idle states. 1555 * @drv: cpuidle driver structure to initialize. 1556 */ 1557static void __init intel_idle_cpuidle_driver_init(struct cpuidle_driver *drv) 1558{ 1559 cpuidle_poll_state_init(drv); 1560 1561 if (disabled_states_mask & BIT(0)) 1562 drv->states[0].flags |= CPUIDLE_FLAG_OFF; 1563 1564 drv->state_count = 1; 1565 1566 if (icpu) 1567 intel_idle_init_cstates_icpu(drv); 1568 else 1569 intel_idle_init_cstates_acpi(drv); 1570} 1571 1572static void auto_demotion_disable(void) 1573{ 1574 unsigned long long msr_bits; 1575 1576 rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1577 msr_bits &= ~auto_demotion_disable_flags; 1578 wrmsrl(MSR_PKG_CST_CONFIG_CONTROL, msr_bits); 1579} 1580 1581static void c1e_promotion_disable(void) 1582{ 1583 unsigned long long msr_bits; 1584 1585 rdmsrl(MSR_IA32_POWER_CTL, msr_bits); 1586 msr_bits &= ~0x2; 1587 wrmsrl(MSR_IA32_POWER_CTL, msr_bits); 1588} 1589 1590/** 1591 * intel_idle_cpu_init - Register the target CPU with the cpuidle core. 1592 * @cpu: CPU to initialize. 1593 * 1594 * Register a cpuidle device object for @cpu and update its MSRs in accordance 1595 * with the processor model flags. 1596 */ 1597static int intel_idle_cpu_init(unsigned int cpu) 1598{ 1599 struct cpuidle_device *dev; 1600 1601 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1602 dev->cpu = cpu; 1603 1604 if (cpuidle_register_device(dev)) { 1605 pr_debug("cpuidle_register_device %d failed!\n", cpu); 1606 return -EIO; 1607 } 1608 1609 if (auto_demotion_disable_flags) 1610 auto_demotion_disable(); 1611 1612 if (disable_promotion_to_c1e) 1613 c1e_promotion_disable(); 1614 1615 return 0; 1616} 1617 1618static int intel_idle_cpu_online(unsigned int cpu) 1619{ 1620 struct cpuidle_device *dev; 1621 1622 if (!boot_cpu_has(X86_FEATURE_ARAT)) 1623 tick_broadcast_enable(); 1624 1625 /* 1626 * Some systems can hotplug a cpu at runtime after 1627 * the kernel has booted, we have to initialize the 1628 * driver in this case 1629 */ 1630 dev = per_cpu_ptr(intel_idle_cpuidle_devices, cpu); 1631 if (!dev->registered) 1632 return intel_idle_cpu_init(cpu); 1633 1634 return 0; 1635} 1636 1637/** 1638 * intel_idle_cpuidle_devices_uninit - Unregister all cpuidle devices. 1639 */ 1640static void __init intel_idle_cpuidle_devices_uninit(void) 1641{ 1642 int i; 1643 1644 for_each_online_cpu(i) 1645 cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i)); 1646} 1647 1648static int __init intel_idle_init(void) 1649{ 1650 const struct x86_cpu_id *id; 1651 unsigned int eax, ebx, ecx; 1652 int retval; 1653 1654 /* Do not load intel_idle at all for now if idle= is passed */ 1655 if (boot_option_idle_override != IDLE_NO_OVERRIDE) 1656 return -ENODEV; 1657 1658 if (max_cstate == 0) { 1659 pr_debug("disabled\n"); 1660 return -EPERM; 1661 } 1662 1663 id = x86_match_cpu(intel_idle_ids); 1664 if (id) { 1665 if (!boot_cpu_has(X86_FEATURE_MWAIT)) { 1666 pr_debug("Please enable MWAIT in BIOS SETUP\n"); 1667 return -ENODEV; 1668 } 1669 } else { 1670 id = x86_match_cpu(intel_mwait_ids); 1671 if (!id) 1672 return -ENODEV; 1673 } 1674 1675 if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) 1676 return -ENODEV; 1677 1678 cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); 1679 1680 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || 1681 !(ecx & CPUID5_ECX_INTERRUPT_BREAK) || 1682 !mwait_substates) 1683 return -ENODEV; 1684 1685 pr_debug("MWAIT substates: 0x%x\n", mwait_substates); 1686 1687 icpu = (const struct idle_cpu *)id->driver_data; 1688 if (icpu) { 1689 cpuidle_state_table = icpu->state_table; 1690 auto_demotion_disable_flags = icpu->auto_demotion_disable_flags; 1691 disable_promotion_to_c1e = icpu->disable_promotion_to_c1e; 1692 if (icpu->use_acpi || force_use_acpi) 1693 intel_idle_acpi_cst_extract(); 1694 } else if (!intel_idle_acpi_cst_extract()) { 1695 return -ENODEV; 1696 } 1697 1698 pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n", 1699 boot_cpu_data.x86_model); 1700 1701 intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device); 1702 if (!intel_idle_cpuidle_devices) 1703 return -ENOMEM; 1704 1705 intel_idle_cpuidle_driver_init(&intel_idle_driver); 1706 1707 retval = cpuidle_register_driver(&intel_idle_driver); 1708 if (retval) { 1709 struct cpuidle_driver *drv = cpuidle_get_driver(); 1710 printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"), 1711 drv ? drv->name : "none"); 1712 goto init_driver_fail; 1713 } 1714 1715 retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online", 1716 intel_idle_cpu_online, NULL); 1717 if (retval < 0) 1718 goto hp_setup_fail; 1719 1720 pr_debug("Local APIC timer is reliable in %s\n", 1721 boot_cpu_has(X86_FEATURE_ARAT) ? "all C-states" : "C1"); 1722 1723 return 0; 1724 1725hp_setup_fail: 1726 intel_idle_cpuidle_devices_uninit(); 1727 cpuidle_unregister_driver(&intel_idle_driver); 1728init_driver_fail: 1729 free_percpu(intel_idle_cpuidle_devices); 1730 return retval; 1731 1732} 1733device_initcall(intel_idle_init); 1734 1735/* 1736 * We are not really modular, but we used to support that. Meaning we also 1737 * support "intel_idle.max_cstate=..." at boot and also a read-only export of 1738 * it at /sys/module/intel_idle/parameters/max_cstate -- so using module_param 1739 * is the easiest way (currently) to continue doing that. 1740 */ 1741module_param(max_cstate, int, 0444); 1742/* 1743 * The positions of the bits that are set in this number are the indices of the 1744 * idle states to be disabled by default (as reflected by the names of the 1745 * corresponding idle state directories in sysfs, "state0", "state1" ... 1746 * "state<i>" ..., where <i> is the index of the given state). 1747 */ 1748module_param_named(states_off, disabled_states_mask, uint, 0444); 1749MODULE_PARM_DESC(states_off, "Mask of disabled idle states"); 1750