162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/cpu.h> 462306a36Sopenharmony_ci#include <linux/cpumask.h> 562306a36Sopenharmony_ci#include <linux/kernel.h> 662306a36Sopenharmony_ci#include <linux/nmi.h> 762306a36Sopenharmony_ci#include <linux/percpu-defs.h> 862306a36Sopenharmony_ci 962306a36Sopenharmony_cistatic cpumask_t __read_mostly watchdog_cpus; 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_cistatic unsigned int watchdog_next_cpu(unsigned int cpu) 1262306a36Sopenharmony_ci{ 1362306a36Sopenharmony_ci unsigned int next_cpu; 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci next_cpu = cpumask_next(cpu, &watchdog_cpus); 1662306a36Sopenharmony_ci if (next_cpu >= nr_cpu_ids) 1762306a36Sopenharmony_ci next_cpu = cpumask_first(&watchdog_cpus); 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci if (next_cpu == cpu) 2062306a36Sopenharmony_ci return nr_cpu_ids; 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci return next_cpu; 2362306a36Sopenharmony_ci} 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ciint __init watchdog_hardlockup_probe(void) 2662306a36Sopenharmony_ci{ 2762306a36Sopenharmony_ci return 0; 2862306a36Sopenharmony_ci} 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_civoid watchdog_hardlockup_enable(unsigned int cpu) 3162306a36Sopenharmony_ci{ 3262306a36Sopenharmony_ci unsigned int next_cpu; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci /* 3562306a36Sopenharmony_ci * The new CPU will be marked online before the hrtimer interrupt 3662306a36Sopenharmony_ci * gets a chance to run on it. If another CPU tests for a 3762306a36Sopenharmony_ci * hardlockup on the new CPU before it has run its the hrtimer 3862306a36Sopenharmony_ci * interrupt, it will get a false positive. Touch the watchdog on 3962306a36Sopenharmony_ci * the new CPU to delay the check for at least 3 sampling periods 4062306a36Sopenharmony_ci * to guarantee one hrtimer has run on the new CPU. 4162306a36Sopenharmony_ci */ 4262306a36Sopenharmony_ci watchdog_hardlockup_touch_cpu(cpu); 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci /* 4562306a36Sopenharmony_ci * We are going to check the next CPU. Our watchdog_hrtimer 4662306a36Sopenharmony_ci * need not be zero if the CPU has already been online earlier. 4762306a36Sopenharmony_ci * Touch the watchdog on the next CPU to avoid false positive 4862306a36Sopenharmony_ci * if we try to check it in less then 3 interrupts. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ci next_cpu = watchdog_next_cpu(cpu); 5162306a36Sopenharmony_ci if (next_cpu < nr_cpu_ids) 5262306a36Sopenharmony_ci watchdog_hardlockup_touch_cpu(next_cpu); 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci /* 5562306a36Sopenharmony_ci * Makes sure that watchdog is touched on this CPU before 5662306a36Sopenharmony_ci * other CPUs could see it in watchdog_cpus. The counter 5762306a36Sopenharmony_ci * part is in watchdog_buddy_check_hardlockup(). 5862306a36Sopenharmony_ci */ 5962306a36Sopenharmony_ci smp_wmb(); 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci cpumask_set_cpu(cpu, &watchdog_cpus); 6262306a36Sopenharmony_ci} 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_civoid watchdog_hardlockup_disable(unsigned int cpu) 6562306a36Sopenharmony_ci{ 6662306a36Sopenharmony_ci unsigned int next_cpu = watchdog_next_cpu(cpu); 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci /* 6962306a36Sopenharmony_ci * Offlining this CPU will cause the CPU before this one to start 7062306a36Sopenharmony_ci * checking the one after this one. If this CPU just finished checking 7162306a36Sopenharmony_ci * the next CPU and updating hrtimer_interrupts_saved, and then the 7262306a36Sopenharmony_ci * previous CPU checks it within one sample period, it will trigger a 7362306a36Sopenharmony_ci * false positive. Touch the watchdog on the next CPU to prevent it. 7462306a36Sopenharmony_ci */ 7562306a36Sopenharmony_ci if (next_cpu < nr_cpu_ids) 7662306a36Sopenharmony_ci watchdog_hardlockup_touch_cpu(next_cpu); 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci /* 7962306a36Sopenharmony_ci * Makes sure that watchdog is touched on the next CPU before 8062306a36Sopenharmony_ci * this CPU disappear in watchdog_cpus. The counter part is in 8162306a36Sopenharmony_ci * watchdog_buddy_check_hardlockup(). 8262306a36Sopenharmony_ci */ 8362306a36Sopenharmony_ci smp_wmb(); 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci cpumask_clear_cpu(cpu, &watchdog_cpus); 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_civoid watchdog_buddy_check_hardlockup(int hrtimer_interrupts) 8962306a36Sopenharmony_ci{ 9062306a36Sopenharmony_ci unsigned int next_cpu; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci /* 9362306a36Sopenharmony_ci * Test for hardlockups every 3 samples. The sample period is 9462306a36Sopenharmony_ci * watchdog_thresh * 2 / 5, so 3 samples gets us back to slightly over 9562306a36Sopenharmony_ci * watchdog_thresh (over by 20%). 9662306a36Sopenharmony_ci */ 9762306a36Sopenharmony_ci if (hrtimer_interrupts % 3 != 0) 9862306a36Sopenharmony_ci return; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci /* check for a hardlockup on the next CPU */ 10162306a36Sopenharmony_ci next_cpu = watchdog_next_cpu(smp_processor_id()); 10262306a36Sopenharmony_ci if (next_cpu >= nr_cpu_ids) 10362306a36Sopenharmony_ci return; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci /* 10662306a36Sopenharmony_ci * Make sure that the watchdog was touched on next CPU when 10762306a36Sopenharmony_ci * watchdog_next_cpu() returned another one because of 10862306a36Sopenharmony_ci * a change in watchdog_hardlockup_enable()/disable(). 10962306a36Sopenharmony_ci */ 11062306a36Sopenharmony_ci smp_rmb(); 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci watchdog_hardlockup_check(next_cpu, NULL); 11362306a36Sopenharmony_ci} 114