162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Intel SMP support routines. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 662306a36Sopenharmony_ci * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com> 762306a36Sopenharmony_ci * (c) 2002,2003 Andi Kleen, SuSE Labs. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/init.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/delay.h> 1662306a36Sopenharmony_ci#include <linux/spinlock.h> 1762306a36Sopenharmony_ci#include <linux/export.h> 1862306a36Sopenharmony_ci#include <linux/kernel_stat.h> 1962306a36Sopenharmony_ci#include <linux/mc146818rtc.h> 2062306a36Sopenharmony_ci#include <linux/cache.h> 2162306a36Sopenharmony_ci#include <linux/interrupt.h> 2262306a36Sopenharmony_ci#include <linux/cpu.h> 2362306a36Sopenharmony_ci#include <linux/gfp.h> 2462306a36Sopenharmony_ci#include <linux/kexec.h> 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci#include <asm/mtrr.h> 2762306a36Sopenharmony_ci#include <asm/tlbflush.h> 2862306a36Sopenharmony_ci#include <asm/mmu_context.h> 2962306a36Sopenharmony_ci#include <asm/proto.h> 3062306a36Sopenharmony_ci#include <asm/apic.h> 3162306a36Sopenharmony_ci#include <asm/cpu.h> 3262306a36Sopenharmony_ci#include <asm/idtentry.h> 3362306a36Sopenharmony_ci#include <asm/nmi.h> 3462306a36Sopenharmony_ci#include <asm/mce.h> 3562306a36Sopenharmony_ci#include <asm/trace/irq_vectors.h> 3662306a36Sopenharmony_ci#include <asm/kexec.h> 3762306a36Sopenharmony_ci#include <asm/reboot.h> 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* 4062306a36Sopenharmony_ci * Some notes on x86 processor bugs affecting SMP operation: 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * Pentium, Pentium Pro, II, III (and all CPUs) have bugs. 4362306a36Sopenharmony_ci * The Linux implications for SMP are handled as follows: 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * Pentium III / [Xeon] 4662306a36Sopenharmony_ci * None of the E1AP-E3AP errata are visible to the user. 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * E1AP. see PII A1AP 4962306a36Sopenharmony_ci * E2AP. see PII A2AP 5062306a36Sopenharmony_ci * E3AP. see PII A3AP 5162306a36Sopenharmony_ci * 5262306a36Sopenharmony_ci * Pentium II / [Xeon] 5362306a36Sopenharmony_ci * None of the A1AP-A3AP errata are visible to the user. 5462306a36Sopenharmony_ci * 5562306a36Sopenharmony_ci * A1AP. see PPro 1AP 5662306a36Sopenharmony_ci * A2AP. see PPro 2AP 5762306a36Sopenharmony_ci * A3AP. see PPro 7AP 5862306a36Sopenharmony_ci * 5962306a36Sopenharmony_ci * Pentium Pro 6062306a36Sopenharmony_ci * None of 1AP-9AP errata are visible to the normal user, 6162306a36Sopenharmony_ci * except occasional delivery of 'spurious interrupt' as trap #15. 6262306a36Sopenharmony_ci * This is very rare and a non-problem. 6362306a36Sopenharmony_ci * 6462306a36Sopenharmony_ci * 1AP. Linux maps APIC as non-cacheable 6562306a36Sopenharmony_ci * 2AP. worked around in hardware 6662306a36Sopenharmony_ci * 3AP. fixed in C0 and above steppings microcode update. 6762306a36Sopenharmony_ci * Linux does not use excessive STARTUP_IPIs. 6862306a36Sopenharmony_ci * 4AP. worked around in hardware 6962306a36Sopenharmony_ci * 5AP. symmetric IO mode (normal Linux operation) not affected. 7062306a36Sopenharmony_ci * 'noapic' mode has vector 0xf filled out properly. 7162306a36Sopenharmony_ci * 6AP. 'noapic' mode might be affected - fixed in later steppings 7262306a36Sopenharmony_ci * 7AP. We do not assume writes to the LVT deasserting IRQs 7362306a36Sopenharmony_ci * 8AP. We do not enable low power mode (deep sleep) during MP bootup 7462306a36Sopenharmony_ci * 9AP. We do not use mixed mode 7562306a36Sopenharmony_ci * 7662306a36Sopenharmony_ci * Pentium 7762306a36Sopenharmony_ci * There is a marginal case where REP MOVS on 100MHz SMP 7862306a36Sopenharmony_ci * machines with B stepping processors can fail. XXX should provide 7962306a36Sopenharmony_ci * an L1cache=Writethrough or L1cache=off option. 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * B stepping CPUs may hang. There are hardware work arounds 8262306a36Sopenharmony_ci * for this. We warn about it in case your board doesn't have the work 8362306a36Sopenharmony_ci * arounds. Basically that's so I can tell anyone with a B stepping 8462306a36Sopenharmony_ci * CPU and SMP problems "tough". 8562306a36Sopenharmony_ci * 8662306a36Sopenharmony_ci * Specific items [From Pentium Processor Specification Update] 8762306a36Sopenharmony_ci * 8862306a36Sopenharmony_ci * 1AP. Linux doesn't use remote read 8962306a36Sopenharmony_ci * 2AP. Linux doesn't trust APIC errors 9062306a36Sopenharmony_ci * 3AP. We work around this 9162306a36Sopenharmony_ci * 4AP. Linux never generated 3 interrupts of the same priority 9262306a36Sopenharmony_ci * to cause a lost local interrupt. 9362306a36Sopenharmony_ci * 5AP. Remote read is never used 9462306a36Sopenharmony_ci * 6AP. not affected - worked around in hardware 9562306a36Sopenharmony_ci * 7AP. not affected - worked around in hardware 9662306a36Sopenharmony_ci * 8AP. worked around in hardware - we get explicit CS errors if not 9762306a36Sopenharmony_ci * 9AP. only 'noapic' mode affected. Might generate spurious 9862306a36Sopenharmony_ci * interrupts, we log only the first one and count the 9962306a36Sopenharmony_ci * rest silently. 10062306a36Sopenharmony_ci * 10AP. not affected - worked around in hardware 10162306a36Sopenharmony_ci * 11AP. Linux reads the APIC between writes to avoid this, as per 10262306a36Sopenharmony_ci * the documentation. Make sure you preserve this as it affects 10362306a36Sopenharmony_ci * the C stepping chips too. 10462306a36Sopenharmony_ci * 12AP. not affected - worked around in hardware 10562306a36Sopenharmony_ci * 13AP. not affected - worked around in hardware 10662306a36Sopenharmony_ci * 14AP. we always deassert INIT during bootup 10762306a36Sopenharmony_ci * 15AP. not affected - worked around in hardware 10862306a36Sopenharmony_ci * 16AP. not affected - worked around in hardware 10962306a36Sopenharmony_ci * 17AP. not affected - worked around in hardware 11062306a36Sopenharmony_ci * 18AP. not affected - worked around in hardware 11162306a36Sopenharmony_ci * 19AP. not affected - worked around in BIOS 11262306a36Sopenharmony_ci * 11362306a36Sopenharmony_ci * If this sounds worrying believe me these bugs are either ___RARE___, 11462306a36Sopenharmony_ci * or are signal timing bugs worked around in hardware and there's 11562306a36Sopenharmony_ci * about nothing of note with C stepping upwards. 11662306a36Sopenharmony_ci */ 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic atomic_t stopping_cpu = ATOMIC_INIT(-1); 11962306a36Sopenharmony_cistatic bool smp_no_nmi_ipi = false; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_cistatic int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci /* We are registered on stopping cpu too, avoid spurious NMI */ 12462306a36Sopenharmony_ci if (raw_smp_processor_id() == atomic_read(&stopping_cpu)) 12562306a36Sopenharmony_ci return NMI_HANDLED; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci cpu_emergency_disable_virtualization(); 12862306a36Sopenharmony_ci stop_this_cpu(NULL); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci return NMI_HANDLED; 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci/* 13462306a36Sopenharmony_ci * this function calls the 'stop' function on all other CPUs in the system. 13562306a36Sopenharmony_ci */ 13662306a36Sopenharmony_ciDEFINE_IDTENTRY_SYSVEC(sysvec_reboot) 13762306a36Sopenharmony_ci{ 13862306a36Sopenharmony_ci apic_eoi(); 13962306a36Sopenharmony_ci cpu_emergency_disable_virtualization(); 14062306a36Sopenharmony_ci stop_this_cpu(NULL); 14162306a36Sopenharmony_ci} 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_cistatic int register_stop_handler(void) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci return register_nmi_handler(NMI_LOCAL, smp_stop_nmi_callback, 14662306a36Sopenharmony_ci NMI_FLAG_FIRST, "smp_stop"); 14762306a36Sopenharmony_ci} 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_cistatic void native_stop_other_cpus(int wait) 15062306a36Sopenharmony_ci{ 15162306a36Sopenharmony_ci unsigned int cpu = smp_processor_id(); 15262306a36Sopenharmony_ci unsigned long flags, timeout; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if (reboot_force) 15562306a36Sopenharmony_ci return; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* Only proceed if this is the first CPU to reach this code */ 15862306a36Sopenharmony_ci if (atomic_cmpxchg(&stopping_cpu, -1, cpu) != -1) 15962306a36Sopenharmony_ci return; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci /* For kexec, ensure that offline CPUs are out of MWAIT and in HLT */ 16262306a36Sopenharmony_ci if (kexec_in_progress) 16362306a36Sopenharmony_ci smp_kick_mwait_play_dead(); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci /* 16662306a36Sopenharmony_ci * 1) Send an IPI on the reboot vector to all other CPUs. 16762306a36Sopenharmony_ci * 16862306a36Sopenharmony_ci * The other CPUs should react on it after leaving critical 16962306a36Sopenharmony_ci * sections and re-enabling interrupts. They might still hold 17062306a36Sopenharmony_ci * locks, but there is nothing which can be done about that. 17162306a36Sopenharmony_ci * 17262306a36Sopenharmony_ci * 2) Wait for all other CPUs to report that they reached the 17362306a36Sopenharmony_ci * HLT loop in stop_this_cpu() 17462306a36Sopenharmony_ci * 17562306a36Sopenharmony_ci * 3) If #2 timed out send an NMI to the CPUs which did not 17662306a36Sopenharmony_ci * yet report 17762306a36Sopenharmony_ci * 17862306a36Sopenharmony_ci * 4) Wait for all other CPUs to report that they reached the 17962306a36Sopenharmony_ci * HLT loop in stop_this_cpu() 18062306a36Sopenharmony_ci * 18162306a36Sopenharmony_ci * #3 can obviously race against a CPU reaching the HLT loop late. 18262306a36Sopenharmony_ci * That CPU will have reported already and the "have all CPUs 18362306a36Sopenharmony_ci * reached HLT" condition will be true despite the fact that the 18462306a36Sopenharmony_ci * other CPU is still handling the NMI. Again, there is no 18562306a36Sopenharmony_ci * protection against that as "disabled" APICs still respond to 18662306a36Sopenharmony_ci * NMIs. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_ci cpumask_copy(&cpus_stop_mask, cpu_online_mask); 18962306a36Sopenharmony_ci cpumask_clear_cpu(cpu, &cpus_stop_mask); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci if (!cpumask_empty(&cpus_stop_mask)) { 19262306a36Sopenharmony_ci apic_send_IPI_allbutself(REBOOT_VECTOR); 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * Don't wait longer than a second for IPI completion. The 19662306a36Sopenharmony_ci * wait request is not checked here because that would 19762306a36Sopenharmony_ci * prevent an NMI shutdown attempt in case that not all 19862306a36Sopenharmony_ci * CPUs reach shutdown state. 19962306a36Sopenharmony_ci */ 20062306a36Sopenharmony_ci timeout = USEC_PER_SEC; 20162306a36Sopenharmony_ci while (!cpumask_empty(&cpus_stop_mask) && timeout--) 20262306a36Sopenharmony_ci udelay(1); 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* if the REBOOT_VECTOR didn't work, try with the NMI */ 20662306a36Sopenharmony_ci if (!cpumask_empty(&cpus_stop_mask)) { 20762306a36Sopenharmony_ci /* 20862306a36Sopenharmony_ci * If NMI IPI is enabled, try to register the stop handler 20962306a36Sopenharmony_ci * and send the IPI. In any case try to wait for the other 21062306a36Sopenharmony_ci * CPUs to stop. 21162306a36Sopenharmony_ci */ 21262306a36Sopenharmony_ci if (!smp_no_nmi_ipi && !register_stop_handler()) { 21362306a36Sopenharmony_ci pr_emerg("Shutting down cpus with NMI\n"); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci for_each_cpu(cpu, &cpus_stop_mask) 21662306a36Sopenharmony_ci __apic_send_IPI(cpu, NMI_VECTOR); 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci /* 21962306a36Sopenharmony_ci * Don't wait longer than 10 ms if the caller didn't 22062306a36Sopenharmony_ci * request it. If wait is true, the machine hangs here if 22162306a36Sopenharmony_ci * one or more CPUs do not reach shutdown state. 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci timeout = USEC_PER_MSEC * 10; 22462306a36Sopenharmony_ci while (!cpumask_empty(&cpus_stop_mask) && (wait || timeout--)) 22562306a36Sopenharmony_ci udelay(1); 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci local_irq_save(flags); 22962306a36Sopenharmony_ci disable_local_APIC(); 23062306a36Sopenharmony_ci mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); 23162306a36Sopenharmony_ci local_irq_restore(flags); 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci /* 23462306a36Sopenharmony_ci * Ensure that the cpus_stop_mask cache lines are invalidated on 23562306a36Sopenharmony_ci * the other CPUs. See comment vs. SME in stop_this_cpu(). 23662306a36Sopenharmony_ci */ 23762306a36Sopenharmony_ci cpumask_clear(&cpus_stop_mask); 23862306a36Sopenharmony_ci} 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci/* 24162306a36Sopenharmony_ci * Reschedule call back. KVM uses this interrupt to force a cpu out of 24262306a36Sopenharmony_ci * guest mode. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ciDEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_reschedule_ipi) 24562306a36Sopenharmony_ci{ 24662306a36Sopenharmony_ci apic_eoi(); 24762306a36Sopenharmony_ci trace_reschedule_entry(RESCHEDULE_VECTOR); 24862306a36Sopenharmony_ci inc_irq_stat(irq_resched_count); 24962306a36Sopenharmony_ci scheduler_ipi(); 25062306a36Sopenharmony_ci trace_reschedule_exit(RESCHEDULE_VECTOR); 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ciDEFINE_IDTENTRY_SYSVEC(sysvec_call_function) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci apic_eoi(); 25662306a36Sopenharmony_ci trace_call_function_entry(CALL_FUNCTION_VECTOR); 25762306a36Sopenharmony_ci inc_irq_stat(irq_call_count); 25862306a36Sopenharmony_ci generic_smp_call_function_interrupt(); 25962306a36Sopenharmony_ci trace_call_function_exit(CALL_FUNCTION_VECTOR); 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ciDEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single) 26362306a36Sopenharmony_ci{ 26462306a36Sopenharmony_ci apic_eoi(); 26562306a36Sopenharmony_ci trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); 26662306a36Sopenharmony_ci inc_irq_stat(irq_call_count); 26762306a36Sopenharmony_ci generic_smp_call_function_single_interrupt(); 26862306a36Sopenharmony_ci trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR); 26962306a36Sopenharmony_ci} 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_cistatic int __init nonmi_ipi_setup(char *str) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci smp_no_nmi_ipi = true; 27462306a36Sopenharmony_ci return 1; 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci__setup("nonmi_ipi", nonmi_ipi_setup); 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_cistruct smp_ops smp_ops = { 28062306a36Sopenharmony_ci .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, 28162306a36Sopenharmony_ci .smp_prepare_cpus = native_smp_prepare_cpus, 28262306a36Sopenharmony_ci .smp_cpus_done = native_smp_cpus_done, 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci .stop_other_cpus = native_stop_other_cpus, 28562306a36Sopenharmony_ci#if defined(CONFIG_KEXEC_CORE) 28662306a36Sopenharmony_ci .crash_stop_other_cpus = kdump_nmi_shootdown_cpus, 28762306a36Sopenharmony_ci#endif 28862306a36Sopenharmony_ci .smp_send_reschedule = native_smp_send_reschedule, 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci .kick_ap_alive = native_kick_ap, 29162306a36Sopenharmony_ci .cpu_disable = native_cpu_disable, 29262306a36Sopenharmony_ci .play_dead = native_play_dead, 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci .send_call_func_ipi = native_send_call_func_ipi, 29562306a36Sopenharmony_ci .send_call_func_single_ipi = native_send_call_func_single_ipi, 29662306a36Sopenharmony_ci}; 29762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(smp_ops); 298