1/* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6#include <linux/sched/mm.h>
7#include <linux/proc_fs.h>
8#include <linux/smp.h>
9#include <linux/init.h>
10#include <linux/notifier.h>
11#include <linux/sched/signal.h>
12#include <linux/sched/hotplug.h>
13#include <linux/sched/isolation.h>
14#include <linux/sched/task.h>
15#include <linux/sched/smt.h>
16#include <linux/unistd.h>
17#include <linux/cpu.h>
18#include <linux/oom.h>
19#include <linux/rcupdate.h>
20#include <linux/export.h>
21#include <linux/bug.h>
22#include <linux/kthread.h>
23#include <linux/stop_machine.h>
24#include <linux/mutex.h>
25#include <linux/gfp.h>
26#include <linux/suspend.h>
27#include <linux/lockdep.h>
28#include <linux/tick.h>
29#include <linux/irq.h>
30#include <linux/nmi.h>
31#include <linux/smpboot.h>
32#include <linux/relay.h>
33#include <linux/slab.h>
34#include <linux/scs.h>
35#include <linux/percpu-rwsem.h>
36#include <linux/cpuset.h>
37#include <linux/random.h>
38
39#include <trace/events/power.h>
40#define CREATE_TRACE_POINTS
41#include <trace/events/cpuhp.h>
42
43#undef CREATE_TRACE_POINTS
44
45#include "smpboot.h"
46
47#define CPU_PAGE_SIZE_OFF_TWO 2
48
49/**
50 * cpuhp_cpu_state - Per cpu hotplug state storage
51 * @state:    The current cpu state
52 * @target:    The target state
53 * @thread:    Pointer to the hotplug thread
54 * @should_run:    Thread should execute
55 * @rollback:    Perform a rollback
56 * @single:    Single callback invocation
57 * @bringup:    Single callback bringup or teardown selector
58 * @cb_state:    The state for a single callback (install/uninstall)
59 * @result:    Result of the operation
60 * @done_up:    Signal completion to the issuer of the task for cpu-up
61 * @done_down:    Signal completion to the issuer of the task for cpu-down
62 */
63struct cpuhp_cpu_state {
64    enum cpuhp_state state;
65    enum cpuhp_state target;
66    enum cpuhp_state fail;
67#ifdef CONFIG_SMP
68    struct task_struct *thread;
69    bool should_run;
70    bool rollback;
71    bool single;
72    bool bringup;
73    struct hlist_node *node;
74    struct hlist_node *last;
75    enum cpuhp_state cb_state;
76    int result;
77    struct completion done_up;
78    struct completion done_down;
79#endif
80};
81
82static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
83    .fail = CPUHP_INVALID,
84};
85
86#ifdef CONFIG_SMP
87cpumask_t cpus_booted_once_mask;
88#endif
89
90#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
91static struct lockdep_map cpuhp_state_up_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
92static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);
93
94static inline void cpuhp_lock_acquire(bool bringup)
95{
96    lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
97}
98
99static inline void cpuhp_lock_release(bool bringup)
100{
101    lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
102}
103#else
104
105static inline void cpuhp_lock_acquire(bool bringup)
106{
107}
108static inline void cpuhp_lock_release(bool bringup)
109{
110}
111
112#endif
113
114/**
115 * cpuhp_step - Hotplug state machine step
116 * @name:    Name of the step
117 * @startup:    Startup function of the step
118 * @teardown:    Teardown function of the step
119 * @cant_stop:    Bringup/teardown can't be stopped at this step
120 */
121struct cpuhp_step {
122    const char *name;
123    union {
124        int (*single)(unsigned int cpu);
125        int (*multi)(unsigned int cpu, struct hlist_node *node);
126    } startup;
127    union {
128        int (*single)(unsigned int cpu);
129        int (*multi)(unsigned int cpu, struct hlist_node *node);
130    } teardown;
131    struct hlist_head list;
132    bool cant_stop;
133    bool multi_instance;
134};
135
136static DEFINE_MUTEX(cpuhp_state_mutex);
137static struct cpuhp_step cpuhp_hp_states[];
138
139static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
140{
141    return cpuhp_hp_states + state;
142}
143
144/**
145 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
146 * @cpu:    The cpu for which the callback should be invoked
147 * @state:    The state to do callbacks for
148 * @bringup:    True if the bringup callback should be invoked
149 * @node:    For multi-instance, do a single entry callback for install/remove
150 * @lastp:    For multi-instance rollback, remember how far we got
151 *
152 * Called from cpu hotplug and from the state register machinery.
153 */
154static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node,
155                                 struct hlist_node **lastp)
156{
157    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
158    struct cpuhp_step *step = cpuhp_get_step(state);
159    int (*cbm)(unsigned int cpu, struct hlist_node *node);
160    int (*cb)(unsigned int cpu);
161    int ret, cnt;
162
163    if (st->fail == state) {
164        st->fail = CPUHP_INVALID;
165
166        if (!(bringup ? step->startup.single : step->teardown.single)) {
167            return 0;
168        }
169
170        return -EAGAIN;
171    }
172
173    if (!step->multi_instance) {
174        WARN_ON_ONCE(lastp && *lastp);
175        cb = bringup ? step->startup.single : step->teardown.single;
176        if (!cb) {
177            return 0;
178        }
179        trace_cpuhp_enter(cpu, st->target, state, cb);
180        ret = cb(cpu);
181        trace_cpuhp_exit(cpu, st->state, state, ret);
182        return ret;
183    }
184    cbm = bringup ? step->startup.multi : step->teardown.multi;
185    if (!cbm) {
186        return 0;
187    }
188
189    /* Single invocation for instance add/remove */
190    if (node) {
191        WARN_ON_ONCE(lastp && *lastp);
192        trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
193        ret = cbm(cpu, node);
194        trace_cpuhp_exit(cpu, st->state, state, ret);
195        return ret;
196    }
197
198    /* State transition. Invoke on all instances */
199    cnt = 0;
200    hlist_for_each(node, &step->list)
201    {
202        if (lastp && node == *lastp) {
203            break;
204        }
205
206        trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
207        ret = cbm(cpu, node);
208        trace_cpuhp_exit(cpu, st->state, state, ret);
209        if (ret) {
210            if (!lastp) {
211                goto err;
212            }
213
214            *lastp = node;
215            return ret;
216        }
217        cnt++;
218    }
219    if (lastp) {
220        *lastp = NULL;
221    }
222    return 0;
223err:
224    /* Rollback the instances if one failed */
225    cbm = !bringup ? step->startup.multi : step->teardown.multi;
226    if (!cbm) {
227        return ret;
228    }
229
230    hlist_for_each(node, &step->list)
231    {
232        if (!cnt--) {
233            break;
234        }
235
236        trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
237        ret = cbm(cpu, node);
238        trace_cpuhp_exit(cpu, st->state, state, ret);
239        /*
240         * Rollback must not fail,
241         */
242        WARN_ON_ONCE(ret);
243    }
244    return ret;
245}
246
247#ifdef CONFIG_SMP
248static bool cpuhp_is_ap_state(enum cpuhp_state state)
249{
250    /*
251     * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
252     * purposes as that state is handled explicitly in cpu_down.
253     */
254    return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
255}
256
257static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
258{
259    struct completion *done = bringup ? &st->done_up : &st->done_down;
260    wait_for_completion(done);
261}
262
263static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
264{
265    struct completion *done = bringup ? &st->done_up : &st->done_down;
266    complete(done);
267}
268
269/*
270 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
271 */
272static bool cpuhp_is_atomic_state(enum cpuhp_state state)
273{
274    return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
275}
276
277/* Serializes the updates to cpu_online_mask, cpu_present_mask */
278static DEFINE_MUTEX(cpu_add_remove_lock);
279bool cpuhp_tasks_frozen;
280EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
281
282/*
283 * The following two APIs (cpu_maps_update_begin/done) must be used when
284 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
285 */
286void cpu_maps_update_begin(void)
287{
288    mutex_lock(&cpu_add_remove_lock);
289}
290EXPORT_SYMBOL_GPL(cpu_maps_update_begin);
291
292void cpu_maps_update_done(void)
293{
294    mutex_unlock(&cpu_add_remove_lock);
295}
296EXPORT_SYMBOL_GPL(cpu_maps_update_done);
297
298/*
299 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
300 * Should always be manipulated under cpu_add_remove_lock
301 */
302static int cpu_hotplug_disabled;
303
304#ifdef CONFIG_HOTPLUG_CPU
305
306DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
307
308void cpus_read_lock(void)
309{
310    percpu_down_read(&cpu_hotplug_lock);
311}
312EXPORT_SYMBOL_GPL(cpus_read_lock);
313
314int cpus_read_trylock(void)
315{
316    return percpu_down_read_trylock(&cpu_hotplug_lock);
317}
318EXPORT_SYMBOL_GPL(cpus_read_trylock);
319
320void cpus_read_unlock(void)
321{
322    percpu_up_read(&cpu_hotplug_lock);
323}
324EXPORT_SYMBOL_GPL(cpus_read_unlock);
325
326void cpus_write_lock(void)
327{
328    percpu_down_write(&cpu_hotplug_lock);
329}
330
331void cpus_write_unlock(void)
332{
333    percpu_up_write(&cpu_hotplug_lock);
334}
335
336void lockdep_assert_cpus_held(void)
337{
338    /*
339     * We can't have hotplug operations before userspace starts running,
340     * and some init codepaths will knowingly not take the hotplug lock.
341     * This is all valid, so mute lockdep until it makes sense to report
342     * unheld locks.
343     */
344    if (system_state < SYSTEM_RUNNING) {
345        return;
346    }
347
348    percpu_rwsem_assert_held(&cpu_hotplug_lock);
349}
350
351static void lockdep_acquire_cpus_lock(void)
352{
353    rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_);
354}
355
356static void lockdep_release_cpus_lock(void)
357{
358    rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_);
359}
360
361/*
362 * Wait for currently running CPU hotplug operations to complete (if any) and
363 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
364 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
365 * hotplug path before performing hotplug operations. So acquiring that lock
366 * guarantees mutual exclusion from any currently running hotplug operations.
367 */
368void cpu_hotplug_disable(void)
369{
370    cpu_maps_update_begin();
371    cpu_hotplug_disabled++;
372    cpu_maps_update_done();
373}
374EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
375
376static void _cpu_hotplug_enable(void)
377{
378    if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n")) {
379        return;
380    }
381    cpu_hotplug_disabled--;
382}
383
384void cpu_hotplug_enable(void)
385{
386    cpu_maps_update_begin();
387    _cpu_hotplug_enable();
388    cpu_maps_update_done();
389}
390EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
391
392#else
393
394static void lockdep_acquire_cpus_lock(void)
395{
396}
397
398static void lockdep_release_cpus_lock(void)
399{
400}
401
402#endif /* CONFIG_HOTPLUG_CPU */
403
404/*
405 * Architectures that need SMT-specific errata handling during SMT hotplug
406 * should override this.
407 */
408void __weak arch_smt_update(void)
409{
410}
411
412#ifdef CONFIG_HOTPLUG_SMT
413enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
414
415void __init cpu_smt_disable(bool force)
416{
417    if (!cpu_smt_possible()) {
418        return;
419    }
420
421    if (force) {
422        pr_info("SMT: Force disabled\n");
423        cpu_smt_control = CPU_SMT_FORCE_DISABLED;
424    } else {
425        pr_info("SMT: disabled\n");
426        cpu_smt_control = CPU_SMT_DISABLED;
427    }
428}
429
430/*
431 * The decision whether SMT is supported can only be done after the full
432 * CPU identification. Called from architecture code.
433 */
434void __init cpu_smt_check_topology(void)
435{
436    if (!topology_smt_supported()) {
437        cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
438    }
439}
440
441static int __init smt_cmdline_disable(char *str)
442{
443    cpu_smt_disable(str && !strcmp(str, "force"));
444    return 0;
445}
446early_param("nosmt", smt_cmdline_disable);
447
448static inline bool cpu_smt_allowed(unsigned int cpu)
449{
450    if (cpu_smt_control == CPU_SMT_ENABLED) {
451        return true;
452    }
453
454    if (topology_is_primary_thread(cpu)) {
455        return true;
456    }
457
458    /*
459     * On x86 it's required to boot all logical CPUs at least once so
460     * that the init code can get a chance to set CR4.MCE on each
461     * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any
462     * core will shutdown the machine.
463     */
464    return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
465}
466
467/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
468bool cpu_smt_possible(void)
469{
470    return cpu_smt_control != CPU_SMT_FORCE_DISABLED && cpu_smt_control != CPU_SMT_NOT_SUPPORTED;
471}
472EXPORT_SYMBOL_GPL(cpu_smt_possible);
473#else
474static inline bool cpu_smt_allowed(unsigned int cpu)
475{
476    return true;
477}
478#endif
479
480static inline enum cpuhp_state cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
481{
482    enum cpuhp_state prev_state = st->state;
483
484    st->rollback = false;
485    st->last = NULL;
486
487    st->target = target;
488    st->single = false;
489    st->bringup = st->state < target;
490
491    return prev_state;
492}
493
494static inline void cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
495{
496    st->rollback = true;
497
498    /*
499     * If we have st->last we need to undo partial multi_instance of this
500     * state first. Otherwise start undo at the previous state.
501     */
502    if (!st->last) {
503        if (st->bringup) {
504            st->state--;
505        } else {
506            st->state++;
507        }
508    }
509
510    st->target = prev_state;
511    st->bringup = !st->bringup;
512}
513
514/* Regular hotplug invocation of the AP hotplug thread */
515static void _cpuhp_kick_ap(struct cpuhp_cpu_state *st)
516{
517    if (!st->single && st->state == st->target) {
518        return;
519    }
520
521    st->result = 0;
522    /*
523     * Make sure the above stores are visible before should_run becomes
524     * true. Paired with the mb() above in cpuhp_thread_fun()
525     */
526    smp_mb();
527    st->should_run = true;
528    wake_up_process(st->thread);
529    wait_for_ap_thread(st, st->bringup);
530}
531
532static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
533{
534    enum cpuhp_state prev_state;
535    int ret;
536
537    prev_state = cpuhp_set_state(st, target);
538    _cpuhp_kick_ap(st);
539    if ((ret = st->result)) {
540        cpuhp_reset_state(st, prev_state);
541        _cpuhp_kick_ap(st);
542    }
543
544    return ret;
545}
546
547static int bringup_wait_for_ap(unsigned int cpu)
548{
549    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
550
551    /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
552    wait_for_ap_thread(st, true);
553    if (WARN_ON_ONCE((!cpu_online(cpu)))) {
554        return -ECANCELED;
555    }
556
557    /* Unpark the hotplug thread of the target cpu */
558    kthread_unpark(st->thread);
559
560    /*
561     * SMT soft disabling on X86 requires to bring the CPU out of the
562     * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
563     * CPU marked itself as booted_once in notify_cpu_starting() so the
564     * cpu_smt_allowed() check will now return false if this is not the
565     * primary sibling.
566     */
567    if (!cpu_smt_allowed(cpu)) {
568        return -ECANCELED;
569    }
570
571    if (st->target <= CPUHP_AP_ONLINE_IDLE) {
572        return 0;
573    }
574
575    return cpuhp_kick_ap(st, st->target);
576}
577
578static int bringup_cpu(unsigned int cpu)
579{
580    struct task_struct *idle = idle_thread_get(cpu);
581    int ret;
582
583    /*
584     * Reset stale stack state from the last time this CPU was online.
585     */
586    scs_task_reset(idle);
587    kasan_unpoison_task_stack(idle);
588
589    /*
590     * Some architectures have to walk the irq descriptors to
591     * setup the vector space for the cpu which comes online.
592     * Prevent irq alloc/free across the bringup.
593     */
594    irq_lock_sparse();
595
596    /* Arch-specific enabling code. */
597    ret = __cpu_up(cpu, idle);
598    irq_unlock_sparse();
599    if (ret) {
600        return ret;
601    }
602    return bringup_wait_for_ap(cpu);
603}
604
605static int finish_cpu(unsigned int cpu)
606{
607    struct task_struct *idle = idle_thread_get(cpu);
608    struct mm_struct *mm = idle->active_mm;
609
610    /*
611     * idle_task_exit() will have switched to &init_mm, now
612     * clean up any remaining active_mm state.
613     */
614    if (mm != &init_mm) {
615        idle->active_mm = &init_mm;
616    }
617    mmdrop(mm);
618    return 0;
619}
620
621/*
622 * Hotplug state machine related functions
623 */
624
625static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
626{
627    for (st->state--; st->state > st->target; st->state--) {
628        cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
629    }
630}
631
632static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
633{
634    if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
635        return true;
636    }
637    /*
638     * When CPU hotplug is disabled, then taking the CPU down is not
639     * possible because takedown_cpu() and the architecture and
640     * subsystem specific mechanisms are not available. So the CPU
641     * which would be completely unplugged again needs to stay around
642     * in the current state.
643     */
644    return st->state <= CPUHP_BRINGUP_CPU;
645}
646
647static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
648{
649    enum cpuhp_state prev_state = st->state;
650    int ret = 0;
651
652    while (st->state < target) {
653        st->state++;
654        ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
655        if (ret) {
656            if (can_rollback_cpu(st)) {
657                st->target = prev_state;
658                undo_cpu_up(cpu, st);
659            }
660            break;
661        }
662    }
663    return ret;
664}
665
666/*
667 * The cpu hotplug threads manage the bringup and teardown of the cpus
668 */
669static void cpuhp_create(unsigned int cpu)
670{
671    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
672
673    init_completion(&st->done_up);
674    init_completion(&st->done_down);
675}
676
677static int cpuhp_should_run(unsigned int cpu)
678{
679    struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
680
681    return st->should_run;
682}
683
684/*
685 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
686 * callbacks when a state gets [un]installed at runtime.
687 *
688 * Each invocation of this function by the smpboot thread does a single AP
689 * state callback.
690 *
691 * It has 3 modes of operation:
692 *  - single: runs st->cb_state
693 *  - up:     runs ++st->state, while st->state < st->target
694 *  - down:   runs st->state--, while st->state > st->target
695 *
696 * When complete or on error, should_run is cleared and the completion is fired.
697 */
698static void cpuhp_thread_fun(unsigned int cpu)
699{
700    struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
701    bool bringup = st->bringup;
702    enum cpuhp_state state;
703
704    if (WARN_ON_ONCE(!st->should_run)) {
705        return;
706    }
707
708    /*
709     * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
710     * that if we see ->should_run we also see the rest of the state.
711     */
712    smp_mb();
713
714    /*
715     * The BP holds the hotplug lock, but we're now running on the AP,
716     * ensure that anybody asserting the lock is held, will actually find
717     * it so.
718     */
719    lockdep_acquire_cpus_lock();
720    cpuhp_lock_acquire(bringup);
721
722    if (st->single) {
723        state = st->cb_state;
724        st->should_run = false;
725    } else {
726        if (bringup) {
727            st->state++;
728            state = st->state;
729            st->should_run = (st->state < st->target);
730            WARN_ON_ONCE(st->state > st->target);
731        } else {
732            state = st->state;
733            st->state--;
734            st->should_run = (st->state > st->target);
735            WARN_ON_ONCE(st->state < st->target);
736        }
737    }
738
739    WARN_ON_ONCE(!cpuhp_is_ap_state(state));
740
741    if (cpuhp_is_atomic_state(state)) {
742        local_irq_disable();
743        st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
744        local_irq_enable();
745
746        /*
747         * STARTING/DYING must not fail!
748         */
749        WARN_ON_ONCE(st->result);
750    } else {
751        st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
752    }
753
754    if (st->result) {
755        /*
756         * If we fail on a rollback, we're up a creek without no
757         * paddle, no way forward, no way back. We loose, thanks for
758         * playing.
759         */
760        WARN_ON_ONCE(st->rollback);
761        st->should_run = false;
762    }
763
764    cpuhp_lock_release(bringup);
765    lockdep_release_cpus_lock();
766
767    if (!st->should_run) {
768        complete_ap_thread(st, bringup);
769    }
770}
771
772/* Invoke a single callback on a remote cpu */
773static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node)
774{
775    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
776    int ret;
777
778    if (!cpu_online(cpu)) {
779        return 0;
780    }
781
782    cpuhp_lock_acquire(false);
783    cpuhp_lock_release(false);
784
785    cpuhp_lock_acquire(true);
786    cpuhp_lock_release(true);
787
788    /*
789     * If we are up and running, use the hotplug thread. For early calls
790     * we invoke the thread function directly.
791     */
792    if (!st->thread) {
793        return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
794    }
795
796    st->rollback = false;
797    st->last = NULL;
798
799    st->node = node;
800    st->bringup = bringup;
801    st->cb_state = state;
802    st->single = true;
803
804    _cpuhp_kick_ap(st);
805
806    /*
807     * If we failed and did a partial, do a rollback.
808     */
809    if ((ret = st->result) && st->last) {
810        st->rollback = true;
811        st->bringup = !bringup;
812
813        _cpuhp_kick_ap(st);
814    }
815
816    /*
817     * Clean up the leftovers so the next hotplug operation wont use stale
818     * data.
819     */
820    st->node = st->last = NULL;
821    return ret;
822}
823
824static int cpuhp_kick_ap_work(unsigned int cpu)
825{
826    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
827    enum cpuhp_state prev_state = st->state;
828    int ret;
829
830    cpuhp_lock_acquire(false);
831    cpuhp_lock_release(false);
832
833    cpuhp_lock_acquire(true);
834    cpuhp_lock_release(true);
835
836    trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
837    ret = cpuhp_kick_ap(st, st->target);
838    trace_cpuhp_exit(cpu, st->state, prev_state, ret);
839
840    return ret;
841}
842
843static struct smp_hotplug_thread cpuhp_threads = {
844    .store = &cpuhp_state.thread,
845    .create = &cpuhp_create,
846    .thread_should_run = cpuhp_should_run,
847    .thread_fn = cpuhp_thread_fun,
848    .thread_comm = "cpuhp/%u",
849    .selfparking = true,
850};
851
852void __init cpuhp_threads_init(void)
853{
854    BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
855    kthread_unpark(this_cpu_read(cpuhp_state.thread));
856}
857
858/*
859 *
860 * Serialize hotplug trainwrecks outside of the cpu_hotplug_lock
861 * protected region.
862 *
863 * The operation is still serialized against concurrent CPU hotplug via
864 * cpu_add_remove_lock, i.e. CPU map protection.  But it is _not_
865 * serialized against other hotplug related activity like adding or
866 * removing of state callbacks and state instances, which invoke either the
867 * startup or the teardown callback of the affected state.
868 *
869 * This is required for subsystems which are unfixable vs. CPU hotplug and
870 * evade lock inversion problems by scheduling work which has to be
871 * completed _before_ cpu_up()/_cpu_down() returns.
872 *
873 * Don't even think about adding anything to this for any new code or even
874 * drivers. It's only purpose is to keep existing lock order trainwrecks
875 * working.
876 *
877 * For cpu_down() there might be valid reasons to finish cleanups which are
878 * not required to be done under cpu_hotplug_lock, but that's a different
879 * story and would be not invoked via this.
880 */
881static void cpu_up_down_serialize_trainwrecks(bool tasks_frozen)
882{
883    /*
884     * cpusets delegate hotplug operations to a worker to "solve" the
885     * lock order problems. Wait for the worker, but only if tasks are
886     * _not_ frozen (suspend, hibernate) as that would wait forever.
887     *
888     * The wait is required because otherwise the hotplug operation
889     * returns with inconsistent state, which could even be observed in
890     * user space when a new CPU is brought up. The CPU plug uevent
891     * would be delivered and user space reacting on it would fail to
892     * move tasks to the newly plugged CPU up to the point where the
893     * work has finished because up to that point the newly plugged CPU
894     * is not assignable in cpusets/cgroups. On unplug that's not
895     * necessarily a visible issue, but it is still inconsistent state,
896     * which is the real problem which needs to be "fixed". This can't
897     * prevent the transient state between scheduling the work and
898     * returning from waiting for it.
899     */
900    if (!tasks_frozen) {
901        cpuset_wait_for_hotplug();
902    }
903}
904
905#ifdef CONFIG_HOTPLUG_CPU
906#ifndef arch_clear_mm_cpumask_cpu
907#define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm))
908#endif
909
910/**
911 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
912 * @cpu: a CPU id
913 *
914 * This function walks all processes, finds a valid mm struct for each one and
915 * then clears a corresponding bit in mm's cpumask.  While this all sounds
916 * trivial, there are various non-obvious corner cases, which this function
917 * tries to solve in a safe manner.
918 *
919 * Also note that the function uses a somewhat relaxed locking scheme, so it may
920 * be called only for an already offlined CPU.
921 */
922void clear_tasks_mm_cpumask(int cpu)
923{
924    struct task_struct *p;
925
926    /*
927     * This function is called after the cpu is taken down and marked
928     * offline, so its not like new tasks will ever get this cpu set in
929     * their mm mask. -- Peter Zijlstra
930     * Thus, we may use rcu_read_lock() here, instead of grabbing
931     * full-fledged tasklist_lock.
932     */
933    WARN_ON(cpu_online(cpu));
934    rcu_read_lock();
935    for_each_process(p)
936    {
937        struct task_struct *t;
938
939        /*
940         * Main thread might exit, but other threads may still have
941         * a valid mm. Find one.
942         */
943        t = find_lock_task_mm(p);
944        if (!t) {
945            continue;
946        }
947        arch_clear_mm_cpumask_cpu(cpu, t->mm);
948        task_unlock(t);
949    }
950    rcu_read_unlock();
951}
952
953/* Take this CPU down. */
954static int take_cpu_down(void *_param)
955{
956    struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
957    enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
958    int err, cpu = smp_processor_id();
959    int ret;
960
961    /* Ensure this CPU doesn't handle any more interrupts. */
962    err = __cpu_disable();
963    if (err < 0) {
964        return err;
965    }
966
967    /*
968     * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
969     * do this step again.
970     */
971    WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
972    st->state--;
973    /* Invoke the former CPU_DYING callbacks */
974    for (; st->state > target; st->state--) {
975        ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
976        /*
977         * DYING must not fail!
978         */
979        WARN_ON_ONCE(ret);
980    }
981
982    /* Give up timekeeping duties */
983    tick_handover_do_timer();
984    /* Remove CPU from timer broadcasting */
985    tick_offline_cpu(cpu);
986    /* Park the stopper thread */
987    stop_machine_park(cpu);
988    return 0;
989}
990
991static int takedown_cpu(unsigned int cpu)
992{
993    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
994    int err;
995
996    /* Park the smpboot threads */
997    kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
998
999    /*
1000     * Prevent irq alloc/free while the dying cpu reorganizes the
1001     * interrupt affinities.
1002     */
1003    irq_lock_sparse();
1004
1005    /*
1006     * So now all preempt/rcu users must observe !cpu_active().
1007     */
1008    err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
1009    if (err) {
1010        /* CPU refused to die */
1011        irq_unlock_sparse();
1012        /* Unpark the hotplug thread so we can rollback there */
1013        kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
1014        return err;
1015    }
1016    BUG_ON(cpu_online(cpu));
1017
1018    /*
1019     * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
1020     * all runnable tasks from the CPU, there's only the idle task left now
1021     * that the migration thread is done doing the stop_machine thing.
1022     *
1023     * Wait for the stop thread to go away.
1024     */
1025    wait_for_ap_thread(st, false);
1026    BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
1027
1028    /* Interrupts are moved away from the dying cpu, reenable alloc/free */
1029    irq_unlock_sparse();
1030
1031    hotplug_cpu__broadcast_tick_pull(cpu);
1032    /* This actually kills the CPU. */
1033    __cpu_die(cpu);
1034
1035    tick_cleanup_dead_cpu(cpu);
1036    rcutree_migrate_callbacks(cpu);
1037    return 0;
1038}
1039
1040static void cpuhp_complete_idle_dead(void *arg)
1041{
1042    struct cpuhp_cpu_state *st = arg;
1043
1044    complete_ap_thread(st, false);
1045}
1046
1047void cpuhp_report_idle_dead(void)
1048{
1049    struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1050
1051    BUG_ON(st->state != CPUHP_AP_OFFLINE);
1052    rcu_report_dead(smp_processor_id());
1053    st->state = CPUHP_AP_IDLE_DEAD;
1054    /*
1055     * We cannot call complete after rcu_report_dead() so we delegate it
1056     * to an online cpu.
1057     */
1058    smp_call_function_single(cpumask_first(cpu_online_mask), cpuhp_complete_idle_dead, st, 0);
1059}
1060
1061static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
1062{
1063    for (st->state++; st->state < st->target; st->state++) {
1064        cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1065    }
1066}
1067
1068static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target)
1069{
1070    enum cpuhp_state prev_state = st->state;
1071    int ret = 0;
1072
1073    for (; st->state > target; st->state--) {
1074        ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
1075        if (ret) {
1076            st->target = prev_state;
1077            if (st->state < prev_state) {
1078                undo_cpu_down(cpu, st);
1079            }
1080            break;
1081        }
1082    }
1083    return ret;
1084}
1085
1086/* Requires cpu_add_remove_lock to be held */
1087static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1088{
1089    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1090    int prev_state, ret = 0;
1091
1092    if (num_active_cpus() == 1 && cpu_active(cpu)) {
1093        return -EBUSY;
1094    }
1095
1096    if (!cpu_present(cpu)) {
1097        return -EINVAL;
1098    }
1099
1100#ifdef CONFIG_CPU_ISOLATION_OPT
1101    if (!tasks_frozen && !cpu_isolated(cpu) && num_online_uniso_cpus() == 1) {
1102        return -EBUSY;
1103    }
1104#endif
1105
1106    cpus_write_lock();
1107
1108    cpuhp_tasks_frozen = tasks_frozen;
1109
1110    prev_state = cpuhp_set_state(st, target);
1111    /*
1112     * If the current CPU state is in the range of the AP hotplug thread,
1113     * then we need to kick the thread.
1114     */
1115    if (st->state > CPUHP_TEARDOWN_CPU) {
1116        st->target = max((int)target, CPUHP_TEARDOWN_CPU);
1117        ret = cpuhp_kick_ap_work(cpu);
1118        /*
1119         * The AP side has done the error rollback already. Just
1120         * return the error code..
1121         */
1122        if (ret) {
1123            goto out;
1124        }
1125
1126        /*
1127         * We might have stopped still in the range of the AP hotplug
1128         * thread. Nothing to do anymore.
1129         */
1130        if (st->state > CPUHP_TEARDOWN_CPU) {
1131            goto out;
1132        }
1133
1134        st->target = target;
1135    }
1136    /*
1137     * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
1138     * to do the further cleanups.
1139     */
1140    ret = cpuhp_down_callbacks(cpu, st, target);
1141    if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
1142        cpuhp_reset_state(st, prev_state);
1143        _cpuhp_kick_ap(st);
1144    }
1145
1146out:
1147    cpus_write_unlock();
1148    /*
1149     * Do post unplug cleanup. This is still protected against
1150     * concurrent CPU hotplug via cpu_add_remove_lock.
1151     */
1152    lockup_detector_cleanup();
1153    arch_smt_update();
1154    cpu_up_down_serialize_trainwrecks(tasks_frozen);
1155    return ret;
1156}
1157
1158static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
1159{
1160    if (cpu_hotplug_disabled) {
1161        return -EBUSY;
1162    }
1163    return _cpu_down(cpu, 0, target);
1164}
1165
1166static int cpu_down(unsigned int cpu, enum cpuhp_state target)
1167{
1168    int err;
1169
1170    cpu_maps_update_begin();
1171    err = cpu_down_maps_locked(cpu, target);
1172    cpu_maps_update_done();
1173    return err;
1174}
1175
1176/**
1177 * cpu_device_down - Bring down a cpu device
1178 * @dev: Pointer to the cpu device to offline
1179 *
1180 * This function is meant to be used by device core cpu subsystem only.
1181 *
1182 * Other subsystems should use remove_cpu() instead.
1183 */
1184int cpu_device_down(struct device *dev)
1185{
1186    return cpu_down(dev->id, CPUHP_OFFLINE);
1187}
1188
1189int remove_cpu(unsigned int cpu)
1190{
1191    int ret;
1192
1193    lock_device_hotplug();
1194    ret = device_offline(get_cpu_device(cpu));
1195    unlock_device_hotplug();
1196
1197    return ret;
1198}
1199EXPORT_SYMBOL_GPL(remove_cpu);
1200
1201void smp_shutdown_nonboot_cpus(unsigned int primary_cpu)
1202{
1203    unsigned int cpu;
1204    int error;
1205
1206    cpu_maps_update_begin();
1207
1208    /*
1209     * Make certain the cpu I'm about to reboot on is online.
1210     *
1211     * This is inline to what migrate_to_reboot_cpu() already do.
1212     */
1213    if (!cpu_online(primary_cpu)) {
1214        primary_cpu = cpumask_first(cpu_online_mask);
1215    }
1216
1217    for_each_online_cpu(cpu)
1218    {
1219        if (cpu == primary_cpu) {
1220            continue;
1221        }
1222
1223        error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
1224        if (error) {
1225            pr_err("Failed to offline CPU%d - error=%d", cpu, error);
1226            break;
1227        }
1228    }
1229
1230    /*
1231     * Ensure all but the reboot CPU are offline.
1232     */
1233    BUG_ON(num_online_cpus() > 1);
1234
1235    /*
1236     * Make sure the CPUs won't be enabled by someone else after this
1237     * point. Kexec will reboot to a new kernel shortly resetting
1238     * everything along the way.
1239     */
1240    cpu_hotplug_disabled++;
1241
1242    cpu_maps_update_done();
1243}
1244
1245#else
1246#define takedown_cpu NULL
1247#endif /* CONFIG_HOTPLUG_CPU */
1248
1249/**
1250 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1251 * @cpu: cpu that just started
1252 *
1253 * It must be called by the arch code on the new cpu, before the new cpu
1254 * enables interrupts and before the "boot" cpu returns from __cpu_up().
1255 */
1256void notify_cpu_starting(unsigned int cpu)
1257{
1258    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1259    enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1260    int ret;
1261
1262    rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
1263    cpumask_set_cpu(cpu, &cpus_booted_once_mask);
1264    while (st->state < target) {
1265        st->state++;
1266        ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
1267        /*
1268         * STARTING must not fail!
1269         */
1270        WARN_ON_ONCE(ret);
1271    }
1272}
1273
1274/*
1275 * Called from the idle task. Wake up the controlling task which brings the
1276 * hotplug thread of the upcoming CPU up and then delegates the rest of the
1277 * online bringup to the hotplug thread.
1278 */
1279void cpuhp_online_idle(enum cpuhp_state state)
1280{
1281    struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
1282
1283    /* Happens for the boot cpu */
1284    if (state != CPUHP_AP_ONLINE_IDLE) {
1285        return;
1286    }
1287
1288    /*
1289     * Unpart the stopper thread before we start the idle loop (and start
1290     * scheduling); this ensures the stopper task is always available.
1291     */
1292    stop_machine_unpark(smp_processor_id());
1293
1294    st->state = CPUHP_AP_ONLINE_IDLE;
1295    complete_ap_thread(st, true);
1296}
1297
1298/* Requires cpu_add_remove_lock to be held */
1299static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
1300{
1301    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1302    struct task_struct *idle;
1303    int ret = 0;
1304
1305    cpus_write_lock();
1306
1307    if (!cpu_present(cpu)) {
1308        ret = -EINVAL;
1309        goto out;
1310    }
1311
1312    /*
1313     * The caller of cpu_up() might have raced with another
1314     * caller. Nothing to do.
1315     */
1316    if (st->state >= target) {
1317        goto out;
1318    }
1319
1320    if (st->state == CPUHP_OFFLINE) {
1321        /* Let it fail before we try to bring the cpu up */
1322        idle = idle_thread_get(cpu);
1323        if (IS_ERR(idle)) {
1324            ret = PTR_ERR(idle);
1325            goto out;
1326        }
1327    }
1328
1329    cpuhp_tasks_frozen = tasks_frozen;
1330
1331    cpuhp_set_state(st, target);
1332    /*
1333     * If the current CPU state is in the range of the AP hotplug thread,
1334     * then we need to kick the thread once more.
1335     */
1336    if (st->state > CPUHP_BRINGUP_CPU) {
1337        ret = cpuhp_kick_ap_work(cpu);
1338        /*
1339         * The AP side has done the error rollback already. Just
1340         * return the error code..
1341         */
1342        if (ret) {
1343            goto out;
1344        }
1345    }
1346
1347    /*
1348     * Try to reach the target state. We max out on the BP at
1349     * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1350     * responsible for bringing it up to the target state.
1351     */
1352    target = min((int)target, CPUHP_BRINGUP_CPU);
1353    ret = cpuhp_up_callbacks(cpu, st, target);
1354out:
1355    cpus_write_unlock();
1356    arch_smt_update();
1357    cpu_up_down_serialize_trainwrecks(tasks_frozen);
1358    return ret;
1359}
1360
1361static int cpu_up(unsigned int cpu, enum cpuhp_state target)
1362{
1363    int err = 0;
1364
1365    if (!cpu_possible(cpu)) {
1366        pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu);
1367#if defined(CONFIG_IA64)
1368        pr_err("please check additional_cpus= boot parameter\n");
1369#endif
1370        return -EINVAL;
1371    }
1372
1373    err = try_online_node(cpu_to_node(cpu));
1374    if (err) {
1375        return err;
1376    }
1377
1378    cpu_maps_update_begin();
1379
1380    if (cpu_hotplug_disabled) {
1381        err = -EBUSY;
1382        goto out;
1383    }
1384    if (!cpu_smt_allowed(cpu)) {
1385        err = -EPERM;
1386        goto out;
1387    }
1388
1389    err = _cpu_up(cpu, 0, target);
1390out:
1391    cpu_maps_update_done();
1392    return err;
1393}
1394
1395/**
1396 * cpu_device_up - Bring up a cpu device
1397 * @dev: Pointer to the cpu device to online
1398 *
1399 * This function is meant to be used by device core cpu subsystem only.
1400 *
1401 * Other subsystems should use add_cpu() instead.
1402 */
1403int cpu_device_up(struct device *dev)
1404{
1405    return cpu_up(dev->id, CPUHP_ONLINE);
1406}
1407
1408int add_cpu(unsigned int cpu)
1409{
1410    int ret;
1411
1412    lock_device_hotplug();
1413    ret = device_online(get_cpu_device(cpu));
1414    unlock_device_hotplug();
1415
1416    return ret;
1417}
1418EXPORT_SYMBOL_GPL(add_cpu);
1419
1420/**
1421 * bringup_hibernate_cpu - Bring up the CPU that we hibernated on
1422 * @sleep_cpu: The cpu we hibernated on and should be brought up.
1423 *
1424 * On some architectures like arm64, we can hibernate on any CPU, but on
1425 * wake up the CPU we hibernated on might be offline as a side effect of
1426 * using maxcpus= for example.
1427 */
1428int bringup_hibernate_cpu(unsigned int sleep_cpu)
1429{
1430    int ret;
1431
1432    if (!cpu_online(sleep_cpu)) {
1433        pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n");
1434        ret = cpu_up(sleep_cpu, CPUHP_ONLINE);
1435        if (ret) {
1436            pr_err("Failed to bring hibernate-CPU up!\n");
1437            return ret;
1438        }
1439    }
1440    return 0;
1441}
1442
1443void bringup_nonboot_cpus(unsigned int setup_max_cpus)
1444{
1445    unsigned int cpu;
1446
1447    for_each_present_cpu(cpu)
1448    {
1449        if (num_online_cpus() >= setup_max_cpus) {
1450            break;
1451        }
1452        if (!cpu_online(cpu)) {
1453            cpu_up(cpu, CPUHP_ONLINE);
1454        }
1455    }
1456}
1457
1458#ifdef CONFIG_PM_SLEEP_SMP
1459static cpumask_var_t frozen_cpus;
1460
1461int freeze_secondary_cpus(int primary)
1462{
1463    int cpu, error = 0;
1464
1465    cpu_maps_update_begin();
1466    if (primary == -1) {
1467        primary = cpumask_first(cpu_online_mask);
1468        if (!housekeeping_cpu(primary, HK_FLAG_TIMER)) {
1469            primary = housekeeping_any_cpu(HK_FLAG_TIMER);
1470        }
1471    } else {
1472        if (!cpu_online(primary)) {
1473            primary = cpumask_first(cpu_online_mask);
1474        }
1475    }
1476
1477    /*
1478     * We take down all of the non-boot CPUs in one shot to avoid races
1479     * with the userspace trying to use the CPU hotplug at the same time
1480     */
1481    cpumask_clear(frozen_cpus);
1482
1483    pr_info("Disabling non-boot CPUs ...\n");
1484    for_each_online_cpu(cpu)
1485    {
1486        if (cpu == primary) {
1487            continue;
1488        }
1489
1490        if (pm_wakeup_pending()) {
1491            pr_info("Wakeup pending. Abort CPU freeze\n");
1492            error = -EBUSY;
1493            break;
1494        }
1495
1496        trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1497        error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1498        trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1499        if (!error) {
1500            cpumask_set_cpu(cpu, frozen_cpus);
1501        } else {
1502            pr_err("Error taking CPU%d down: %d\n", cpu, error);
1503            break;
1504        }
1505    }
1506
1507    if (!error) {
1508        BUG_ON(num_online_cpus() > 1);
1509    } else {
1510        pr_err("Non-boot CPUs are not disabled\n");
1511    }
1512
1513    /*
1514     * Make sure the CPUs won't be enabled by someone else. We need to do
1515     * this even in case of failure as all freeze_secondary_cpus() users are
1516     * supposed to do thaw_secondary_cpus() on the failure path.
1517     */
1518    cpu_hotplug_disabled++;
1519
1520    cpu_maps_update_done();
1521    return error;
1522}
1523
1524void __weak arch_thaw_secondary_cpus_begin(void)
1525{
1526}
1527
1528void __weak arch_thaw_secondary_cpus_end(void)
1529{
1530}
1531
1532void thaw_secondary_cpus(void)
1533{
1534    int cpu, error;
1535
1536    /* Allow everyone to use the CPU hotplug again */
1537    cpu_maps_update_begin();
1538    _cpu_hotplug_enable();
1539    if (cpumask_empty(frozen_cpus)) {
1540        goto out;
1541    }
1542
1543    pr_info("Enabling non-boot CPUs ...\n");
1544
1545    arch_thaw_secondary_cpus_begin();
1546
1547    for_each_cpu(cpu, frozen_cpus)
1548    {
1549        trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1550        error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1551        trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1552        if (!error) {
1553            pr_info("CPU%d is up\n", cpu);
1554            continue;
1555        }
1556        pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1557    }
1558
1559    arch_thaw_secondary_cpus_end();
1560
1561    cpumask_clear(frozen_cpus);
1562out:
1563    cpu_maps_update_done();
1564}
1565
1566static int __init alloc_frozen_cpus(void)
1567{
1568    if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL | __GFP_ZERO)) {
1569        return -ENOMEM;
1570    }
1571    return 0;
1572}
1573core_initcall(alloc_frozen_cpus);
1574
1575/*
1576 * When callbacks for CPU hotplug notifications are being executed, we must
1577 * ensure that the state of the system with respect to the tasks being frozen
1578 * or not, as reported by the notification, remains unchanged *throughout the
1579 * duration* of the execution of the callbacks.
1580 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
1581 *
1582 * This synchronization is implemented by mutually excluding regular CPU
1583 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
1584 * Hibernate notifications.
1585 */
1586static int cpu_hotplug_pm_callback(struct notifier_block *nb, unsigned long action, void *ptr)
1587{
1588    switch (action) {
1589        case PM_SUSPEND_PREPARE:
1590        case PM_HIBERNATION_PREPARE:
1591            cpu_hotplug_disable();
1592            break;
1593
1594        case PM_POST_SUSPEND:
1595        case PM_POST_HIBERNATION:
1596            cpu_hotplug_enable();
1597            break;
1598
1599        default:
1600            return NOTIFY_DONE;
1601    }
1602
1603    return NOTIFY_OK;
1604}
1605
1606static int __init cpu_hotplug_pm_sync_init(void)
1607{
1608    /*
1609     * cpu_hotplug_pm_callback has higher priority than x86
1610     * bsp_pm_callback which depends on cpu_hotplug_pm_callback
1611     * to disable cpu hotplug to avoid cpu hotplug race.
1612     */
1613    pm_notifier(cpu_hotplug_pm_callback, 0);
1614    return 0;
1615}
1616core_initcall(cpu_hotplug_pm_sync_init);
1617
1618#endif /* CONFIG_PM_SLEEP_SMP */
1619
1620int __boot_cpu_id;
1621
1622#endif /* CONFIG_SMP */
1623
1624/* Boot processor state steps */
1625static struct cpuhp_step cpuhp_hp_states[] = {
1626    [CPUHP_OFFLINE] =
1627        {
1628            .name = "offline",
1629            .startup.single = NULL,
1630            .teardown.single = NULL,
1631        },
1632#ifdef CONFIG_SMP
1633    [CPUHP_CREATE_THREADS] =
1634        {
1635            .name = "threads:prepare",
1636            .startup.single = smpboot_create_threads,
1637            .teardown.single = NULL,
1638            .cant_stop = true,
1639        },
1640    [CPUHP_PERF_PREPARE] =
1641        {
1642            .name = "perf:prepare",
1643            .startup.single = perf_event_init_cpu,
1644            .teardown.single = perf_event_exit_cpu,
1645        },
1646    [CPUHP_RANDOM_PREPARE] = {
1647        .name			= "random:prepare",
1648        .startup.single		= random_prepare_cpu,
1649        .teardown.single	= NULL,
1650    },
1651
1652    [CPUHP_WORKQUEUE_PREP] =
1653        {
1654            .name = "workqueue:prepare",
1655            .startup.single = workqueue_prepare_cpu,
1656            .teardown.single = NULL,
1657        },
1658    [CPUHP_HRTIMERS_PREPARE] =
1659        {
1660            .name = "hrtimers:prepare",
1661            .startup.single = hrtimers_prepare_cpu,
1662            .teardown.single = hrtimers_dead_cpu,
1663        },
1664    [CPUHP_SMPCFD_PREPARE] =
1665        {
1666            .name = "smpcfd:prepare",
1667            .startup.single = smpcfd_prepare_cpu,
1668            .teardown.single = smpcfd_dead_cpu,
1669        },
1670    [CPUHP_RELAY_PREPARE] =
1671        {
1672            .name = "relay:prepare",
1673            .startup.single = relay_prepare_cpu,
1674            .teardown.single = NULL,
1675        },
1676    [CPUHP_SLAB_PREPARE] =
1677        {
1678            .name = "slab:prepare",
1679            .startup.single = slab_prepare_cpu,
1680            .teardown.single = slab_dead_cpu,
1681        },
1682    [CPUHP_RCUTREE_PREP] =
1683        {
1684            .name = "RCU/tree:prepare",
1685            .startup.single = rcutree_prepare_cpu,
1686            .teardown.single = rcutree_dead_cpu,
1687        },
1688    /*
1689     * On the tear-down path, timers_dead_cpu() must be invoked
1690     * before blk_mq_queue_reinit_notify() from notify_dead(),
1691     * otherwise a RCU stall occurs.
1692     */
1693    [CPUHP_TIMERS_PREPARE] =
1694        {
1695            .name = "timers:prepare",
1696            .startup.single = timers_prepare_cpu,
1697            .teardown.single = timers_dead_cpu,
1698        },
1699    /* Kicks the plugged cpu into life */
1700    [CPUHP_BRINGUP_CPU] =
1701        {
1702            .name = "cpu:bringup",
1703            .startup.single = bringup_cpu,
1704            .teardown.single = finish_cpu,
1705            .cant_stop = true,
1706        },
1707    /* Final state before CPU kills itself */
1708    [CPUHP_AP_IDLE_DEAD] =
1709        {
1710            .name = "idle:dead",
1711        },
1712    /*
1713     * Last state before CPU enters the idle loop to die. Transient state
1714     * for synchronization.
1715     */
1716    [CPUHP_AP_OFFLINE] =
1717        {
1718            .name = "ap:offline",
1719            .cant_stop = true,
1720        },
1721    /* First state is scheduler control. Interrupts are disabled */
1722    [CPUHP_AP_SCHED_STARTING] =
1723        {
1724            .name = "sched:starting",
1725            .startup.single = sched_cpu_starting,
1726            .teardown.single = sched_cpu_dying,
1727        },
1728    [CPUHP_AP_RCUTREE_DYING] =
1729        {
1730            .name = "RCU/tree:dying",
1731            .startup.single = NULL,
1732            .teardown.single = rcutree_dying_cpu,
1733        },
1734    [CPUHP_AP_SMPCFD_DYING] =
1735        {
1736            .name = "smpcfd:dying",
1737            .startup.single = NULL,
1738            .teardown.single = smpcfd_dying_cpu,
1739        },
1740    /* Entry state on starting. Interrupts enabled from here on. Transient
1741     * state for synchronsization */
1742    [CPUHP_AP_ONLINE] =
1743        {
1744            .name = "ap:online",
1745        },
1746    /*
1747     * Handled on controll processor until the plugged processor manages
1748     * this itself.
1749     */
1750    [CPUHP_TEARDOWN_CPU] =
1751        {
1752            .name = "cpu:teardown",
1753            .startup.single = NULL,
1754            .teardown.single = takedown_cpu,
1755            .cant_stop = true,
1756        },
1757    /* Handle smpboot threads park/unpark */
1758    [CPUHP_AP_SMPBOOT_THREADS] =
1759        {
1760            .name = "smpboot/threads:online",
1761            .startup.single = smpboot_unpark_threads,
1762            .teardown.single = smpboot_park_threads,
1763        },
1764    [CPUHP_AP_IRQ_AFFINITY_ONLINE] =
1765        {
1766            .name = "irq/affinity:online",
1767            .startup.single = irq_affinity_online_cpu,
1768            .teardown.single = NULL,
1769        },
1770    [CPUHP_AP_PERF_ONLINE] =
1771        {
1772            .name = "perf:online",
1773            .startup.single = perf_event_init_cpu,
1774            .teardown.single = perf_event_exit_cpu,
1775        },
1776    [CPUHP_AP_WATCHDOG_ONLINE] =
1777        {
1778            .name = "lockup_detector:online",
1779            .startup.single = lockup_detector_online_cpu,
1780            .teardown.single = lockup_detector_offline_cpu,
1781        },
1782    [CPUHP_AP_WORKQUEUE_ONLINE] =
1783        {
1784            .name = "workqueue:online",
1785            .startup.single = workqueue_online_cpu,
1786            .teardown.single = workqueue_offline_cpu,
1787        },
1788    [CPUHP_AP_RANDOM_ONLINE] = {
1789        .name			= "random:online",
1790        .startup.single		= random_online_cpu,
1791        .teardown.single	= NULL,
1792    },
1793    [CPUHP_AP_RCUTREE_ONLINE] =
1794        {
1795            .name = "RCU/tree:online",
1796            .startup.single = rcutree_online_cpu,
1797            .teardown.single = rcutree_offline_cpu,
1798        },
1799#endif
1800/*
1801 * The dynamically registered state space is here
1802 */
1803
1804#ifdef CONFIG_SMP
1805    /* Last state is scheduler control setting the cpu active */
1806    [CPUHP_AP_ACTIVE] =
1807        {
1808            .name = "sched:active",
1809            .startup.single = sched_cpu_activate,
1810            .teardown.single = sched_cpu_deactivate,
1811        },
1812#endif
1813
1814    /* CPU is fully up and running. */
1815    [CPUHP_ONLINE] =
1816        {
1817            .name = "online",
1818            .startup.single = NULL,
1819            .teardown.single = NULL,
1820        },
1821};
1822
1823/* Sanity check for callbacks */
1824static int cpuhp_cb_check(enum cpuhp_state state)
1825{
1826    if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE) {
1827        return -EINVAL;
1828    }
1829    return 0;
1830}
1831
1832/*
1833 * Returns a free for dynamic slot assignment of the Online state. The states
1834 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
1835 * by having no name assigned.
1836 */
1837static int cpuhp_reserve_state(enum cpuhp_state state)
1838{
1839    enum cpuhp_state i, end;
1840    struct cpuhp_step *step;
1841
1842    switch (state) {
1843        case CPUHP_AP_ONLINE_DYN:
1844            step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1845            end = CPUHP_AP_ONLINE_DYN_END;
1846            break;
1847        case CPUHP_BP_PREPARE_DYN:
1848            step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1849            end = CPUHP_BP_PREPARE_DYN_END;
1850            break;
1851        default:
1852            return -EINVAL;
1853    }
1854
1855    for (i = state; i <= end; i++, step++) {
1856        if (!step->name) {
1857            return i;
1858        }
1859    }
1860    WARN(1, "No more dynamic states available for CPU hotplug\n");
1861    return -ENOSPC;
1862}
1863
1864static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, int (*startup)(unsigned int cpu),
1865                                 int (*teardown)(unsigned int cpu), bool multi_instance)
1866{
1867    /* (Un)Install the callbacks for further cpu hotplug operations */
1868    struct cpuhp_step *sp;
1869    int ret = 0;
1870
1871    /*
1872     * If name is NULL, then the state gets removed.
1873     *
1874     * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
1875     * the first allocation from these dynamic ranges, so the removal
1876     * would trigger a new allocation and clear the wrong (already
1877     * empty) state, leaving the callbacks of the to be cleared state
1878     * dangling, which causes wreckage on the next hotplug operation.
1879     */
1880    if (name && (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN)) {
1881        ret = cpuhp_reserve_state(state);
1882        if (ret < 0) {
1883            return ret;
1884        }
1885        state = ret;
1886    }
1887    sp = cpuhp_get_step(state);
1888    if (name && sp->name) {
1889        return -EBUSY;
1890    }
1891
1892    sp->startup.single = startup;
1893    sp->teardown.single = teardown;
1894    sp->name = name;
1895    sp->multi_instance = multi_instance;
1896    INIT_HLIST_HEAD(&sp->list);
1897    return ret;
1898}
1899
1900static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
1901{
1902    return cpuhp_get_step(state)->teardown.single;
1903}
1904
1905/*
1906 * Call the startup/teardown function for a step either on the AP or
1907 * on the current CPU.
1908 */
1909static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node)
1910{
1911    struct cpuhp_step *sp = cpuhp_get_step(state);
1912    int ret;
1913
1914    /*
1915     * If there's nothing to do, we done.
1916     * Relies on the union for multi_instance.
1917     */
1918    if ((bringup && !sp->startup.single) || (!bringup && !sp->teardown.single)) {
1919        return 0;
1920    }
1921    /*
1922     * The non AP bound callbacks can fail on bringup. On teardown
1923     * e.g. module removal we crash for now.
1924     */
1925#ifdef CONFIG_SMP
1926    if (cpuhp_is_ap_state(state)) {
1927        ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1928    } else {
1929        ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1930    }
1931#else
1932    ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1933#endif
1934    BUG_ON(ret && !bringup);
1935    return ret;
1936}
1937
1938/*
1939 * Called from __cpuhp_setup_state on a recoverable failure.
1940 *
1941 * Note: The teardown callbacks for rollback are not allowed to fail!
1942 */
1943static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, struct hlist_node *node)
1944{
1945    int cpu;
1946
1947    /* Roll back the already executed steps on the other cpus */
1948    for_each_present_cpu(cpu)
1949    {
1950        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1951        int cpustate = st->state;
1952
1953        if (cpu >= failedcpu) {
1954            break;
1955        }
1956
1957        /* Did we invoke the startup call on that cpu ? */
1958        if (cpustate >= state) {
1959            cpuhp_issue_call(cpu, state, false, node);
1960        }
1961    }
1962}
1963
1964int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, struct hlist_node *node, bool invoke)
1965{
1966    struct cpuhp_step *sp;
1967    int cpu;
1968    int ret;
1969
1970    lockdep_assert_cpus_held();
1971
1972    sp = cpuhp_get_step(state);
1973    if (sp->multi_instance == false) {
1974        return -EINVAL;
1975    }
1976
1977    mutex_lock(&cpuhp_state_mutex);
1978
1979    if (!invoke || !sp->startup.multi) {
1980        goto add_node;
1981    }
1982
1983    /*
1984     * Try to call the startup callback for each present cpu
1985     * depending on the hotplug state of the cpu.
1986     */
1987    for_each_present_cpu(cpu)
1988    {
1989        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1990        int cpustate = st->state;
1991
1992        if (cpustate < state) {
1993            continue;
1994        }
1995
1996        ret = cpuhp_issue_call(cpu, state, true, node);
1997        if (ret) {
1998            if (sp->teardown.multi) {
1999                cpuhp_rollback_install(cpu, state, node);
2000            }
2001            goto unlock;
2002        }
2003    }
2004add_node:
2005    ret = 0;
2006    hlist_add_head(node, &sp->list);
2007unlock:
2008    mutex_unlock(&cpuhp_state_mutex);
2009    return ret;
2010}
2011
2012int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke)
2013{
2014    int ret;
2015
2016    cpus_read_lock();
2017    ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
2018    cpus_read_unlock();
2019    return ret;
2020}
2021EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
2022
2023/**
2024 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
2025 * @state:        The state to setup
2026 * @invoke:        If true, the startup function is invoked for cpus where
2027 *            cpu state >= @state
2028 * @startup:        startup callback function
2029 * @teardown:        teardown callback function
2030 * @multi_instance:    State is set up for multiple instances which get
2031 *            added afterwards.
2032 *
2033 * The caller needs to hold cpus read locked while calling this function.
2034 * Returns:
2035 *   On success:
2036 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
2037 *      0 for all other states
2038 *   On failure: proper (negative) error code
2039 */
2040int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, bool invoke,
2041                                   int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu),
2042                                   bool multi_instance)
2043{
2044    int cpu, ret = 0;
2045    bool dynstate;
2046
2047    lockdep_assert_cpus_held();
2048
2049    if (cpuhp_cb_check(state) || !name) {
2050        return -EINVAL;
2051    }
2052
2053    mutex_lock(&cpuhp_state_mutex);
2054
2055    ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance);
2056
2057    dynstate = state == CPUHP_AP_ONLINE_DYN;
2058    if (ret > 0 && dynstate) {
2059        state = ret;
2060        ret = 0;
2061    }
2062
2063    if (ret || !invoke || !startup) {
2064        goto out;
2065    }
2066
2067    /*
2068     * Try to call the startup callback for each present cpu
2069     * depending on the hotplug state of the cpu.
2070     */
2071    for_each_present_cpu(cpu)
2072    {
2073        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2074        int cpustate = st->state;
2075
2076        if (cpustate < state) {
2077            continue;
2078        }
2079
2080        ret = cpuhp_issue_call(cpu, state, true, NULL);
2081        if (ret) {
2082            if (teardown) {
2083                cpuhp_rollback_install(cpu, state, NULL);
2084            }
2085            cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2086            goto out;
2087        }
2088    }
2089out:
2090    mutex_unlock(&cpuhp_state_mutex);
2091    /*
2092     * If the requested state is CPUHP_AP_ONLINE_DYN, return the
2093     * dynamically allocated state in case of success.
2094     */
2095    if (!ret && dynstate) {
2096        return state;
2097    }
2098    return ret;
2099}
2100EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
2101
2102int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu),
2103                        int (*teardown)(unsigned int cpu), bool multi_instance)
2104{
2105    int ret;
2106
2107    cpus_read_lock();
2108    ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup, teardown, multi_instance);
2109    cpus_read_unlock();
2110    return ret;
2111}
2112EXPORT_SYMBOL(__cpuhp_setup_state);
2113
2114int __cpuhp_state_remove_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke)
2115{
2116    struct cpuhp_step *sp = cpuhp_get_step(state);
2117    int cpu;
2118
2119    BUG_ON(cpuhp_cb_check(state));
2120
2121    if (!sp->multi_instance) {
2122        return -EINVAL;
2123    }
2124
2125    cpus_read_lock();
2126    mutex_lock(&cpuhp_state_mutex);
2127
2128    if (!invoke || !cpuhp_get_teardown_cb(state)) {
2129        goto remove;
2130    }
2131    /*
2132     * Call the teardown callback for each present cpu depending
2133     * on the hotplug state of the cpu. This function is not
2134     * allowed to fail currently!
2135     */
2136    for_each_present_cpu(cpu)
2137    {
2138        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2139        int cpustate = st->state;
2140
2141        if (cpustate >= state) {
2142            cpuhp_issue_call(cpu, state, false, node);
2143        }
2144    }
2145
2146remove:
2147    hlist_del(node);
2148    mutex_unlock(&cpuhp_state_mutex);
2149    cpus_read_unlock();
2150
2151    return 0;
2152}
2153EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
2154
2155/**
2156 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
2157 * @state:    The state to remove
2158 * @invoke:    If true, the teardown function is invoked for cpus where
2159 *        cpu state >= @state
2160 *
2161 * The caller needs to hold cpus read locked while calling this function.
2162 * The teardown callback is currently not allowed to fail. Think
2163 * about module removal!
2164 */
2165void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
2166{
2167    struct cpuhp_step *sp = cpuhp_get_step(state);
2168    int cpu;
2169
2170    BUG_ON(cpuhp_cb_check(state));
2171
2172    lockdep_assert_cpus_held();
2173
2174    mutex_lock(&cpuhp_state_mutex);
2175    if (sp->multi_instance) {
2176        WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", state);
2177        goto remove;
2178    }
2179
2180    if (!invoke || !cpuhp_get_teardown_cb(state)) {
2181        goto remove;
2182    }
2183
2184    /*
2185     * Call the teardown callback for each present cpu depending
2186     * on the hotplug state of the cpu. This function is not
2187     * allowed to fail currently!
2188     */
2189    for_each_present_cpu(cpu)
2190    {
2191        struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
2192        int cpustate = st->state;
2193
2194        if (cpustate >= state) {
2195            cpuhp_issue_call(cpu, state, false, NULL);
2196        }
2197    }
2198remove:
2199    cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
2200    mutex_unlock(&cpuhp_state_mutex);
2201}
2202EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
2203
2204void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
2205{
2206    cpus_read_lock();
2207    __cpuhp_remove_state_cpuslocked(state, invoke);
2208    cpus_read_unlock();
2209}
2210EXPORT_SYMBOL(__cpuhp_remove_state);
2211
2212#ifdef CONFIG_HOTPLUG_SMT
2213static void cpuhp_offline_cpu_device(unsigned int cpu)
2214{
2215    struct device *dev = get_cpu_device(cpu);
2216
2217    dev->offline = true;
2218    /* Tell user space about the state change */
2219    kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
2220}
2221
2222static void cpuhp_online_cpu_device(unsigned int cpu)
2223{
2224    struct device *dev = get_cpu_device(cpu);
2225
2226    dev->offline = false;
2227    /* Tell user space about the state change */
2228    kobject_uevent(&dev->kobj, KOBJ_ONLINE);
2229}
2230
2231int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2232{
2233    int cpu, ret = 0;
2234
2235    cpu_maps_update_begin();
2236    for_each_online_cpu(cpu)
2237    {
2238        if (topology_is_primary_thread(cpu)) {
2239            continue;
2240        }
2241        ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
2242        if (ret) {
2243            break;
2244        }
2245        /*
2246         * As this needs to hold the cpu maps lock it's impossible
2247         * to call device_offline() because that ends up calling
2248         * cpu_down() which takes cpu maps lock. cpu maps lock
2249         * needs to be held as this might race against in kernel
2250         * abusers of the hotplug machinery (thermal management).
2251         *
2252         * So nothing would update device:offline state. That would
2253         * leave the sysfs entry stale and prevent onlining after
2254         * smt control has been changed to 'off' again. This is
2255         * called under the sysfs hotplug lock, so it is properly
2256         * serialized against the regular offline usage.
2257         */
2258        cpuhp_offline_cpu_device(cpu);
2259    }
2260    if (!ret) {
2261        cpu_smt_control = ctrlval;
2262    }
2263    cpu_maps_update_done();
2264    return ret;
2265}
2266
2267int cpuhp_smt_enable(void)
2268{
2269    int cpu, ret = 0;
2270
2271    cpu_maps_update_begin();
2272    cpu_smt_control = CPU_SMT_ENABLED;
2273    for_each_present_cpu(cpu)
2274    {
2275        /* Skip online CPUs and CPUs on offline nodes */
2276        if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) {
2277            continue;
2278        }
2279        ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
2280        if (ret) {
2281            break;
2282        }
2283        /* See comment in cpuhp_smt_disable() */
2284        cpuhp_online_cpu_device(cpu);
2285    }
2286    cpu_maps_update_done();
2287    return ret;
2288}
2289#endif
2290
2291#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
2292static ssize_t show_cpuhp_state(struct device *dev, struct device_attribute *attr, char *buf)
2293{
2294    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2295
2296    return sprintf(buf, "%d\n", st->state);
2297}
2298static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);
2299
2300static ssize_t write_cpuhp_target(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2301{
2302    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2303    struct cpuhp_step *sp;
2304    int target, ret;
2305
2306    ret = kstrtoint(buf, 10, &target);
2307    if (ret) {
2308        return ret;
2309    }
2310
2311#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
2312    if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE) {
2313        return -EINVAL;
2314    }
2315#else
2316    if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE) {
2317        return -EINVAL;
2318    }
2319#endif
2320
2321    ret = lock_device_hotplug_sysfs();
2322    if (ret) {
2323        return ret;
2324    }
2325
2326    mutex_lock(&cpuhp_state_mutex);
2327    sp = cpuhp_get_step(target);
2328    ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
2329    mutex_unlock(&cpuhp_state_mutex);
2330    if (ret) {
2331        goto out;
2332    }
2333
2334    if (st->state < target) {
2335        ret = cpu_up(dev->id, target);
2336    } else {
2337        ret = cpu_down(dev->id, target);
2338    }
2339out:
2340    unlock_device_hotplug();
2341    return ret ? ret : count;
2342}
2343
2344static ssize_t show_cpuhp_target(struct device *dev, struct device_attribute *attr, char *buf)
2345{
2346    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2347
2348    return sprintf(buf, "%d\n", st->target);
2349}
2350static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
2351
2352static ssize_t write_cpuhp_fail(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2353{
2354    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2355    struct cpuhp_step *sp;
2356    int fail, ret;
2357
2358    ret = kstrtoint(buf, 10, &fail);
2359    if (ret) {
2360        return ret;
2361    }
2362
2363    if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) {
2364        return -EINVAL;
2365    }
2366
2367    /*
2368     * Cannot fail STARTING/DYING callbacks.
2369     */
2370    if (cpuhp_is_atomic_state(fail)) {
2371        return -EINVAL;
2372    }
2373
2374    /*
2375     * Cannot fail anything that doesn't have callbacks.
2376     */
2377    mutex_lock(&cpuhp_state_mutex);
2378    sp = cpuhp_get_step(fail);
2379    if (!sp->startup.single && !sp->teardown.single) {
2380        ret = -EINVAL;
2381    }
2382    mutex_unlock(&cpuhp_state_mutex);
2383    if (ret) {
2384        return ret;
2385    }
2386
2387    st->fail = fail;
2388
2389    return count;
2390}
2391
2392static ssize_t show_cpuhp_fail(struct device *dev, struct device_attribute *attr, char *buf)
2393{
2394    struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
2395
2396    return sprintf(buf, "%d\n", st->fail);
2397}
2398
2399static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);
2400
2401static struct attribute *cpuhp_cpu_attrs[] = {&dev_attr_state.attr, &dev_attr_target.attr, &dev_attr_fail.attr, NULL};
2402
2403static const struct attribute_group cpuhp_cpu_attr_group = {.attrs = cpuhp_cpu_attrs, .name = "hotplug", NULL};
2404
2405static ssize_t show_cpuhp_states(struct device *dev, struct device_attribute *attr, char *buf)
2406{
2407    ssize_t cur, res = 0;
2408    int i;
2409
2410    mutex_lock(&cpuhp_state_mutex);
2411    for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
2412        struct cpuhp_step *sp = cpuhp_get_step(i);
2413
2414        if (sp->name) {
2415            cur = sprintf(buf, "%3d: %s\n", i, sp->name);
2416            buf += cur;
2417            res += cur;
2418        }
2419    }
2420    mutex_unlock(&cpuhp_state_mutex);
2421    return res;
2422}
2423static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);
2424
2425static struct attribute *cpuhp_cpu_root_attrs[] = {&dev_attr_states.attr, NULL};
2426
2427static const struct attribute_group cpuhp_cpu_root_attr_group = {
2428    .attrs = cpuhp_cpu_root_attrs, .name = "hotplug", NULL};
2429
2430#ifdef CONFIG_HOTPLUG_SMT
2431
2432static ssize_t _store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2433{
2434    int ctrlval, ret;
2435
2436    if (sysfs_streq(buf, "on")) {
2437        ctrlval = CPU_SMT_ENABLED;
2438    } else if (sysfs_streq(buf, "off")) {
2439        ctrlval = CPU_SMT_DISABLED;
2440    } else if (sysfs_streq(buf, "forceoff")) {
2441        ctrlval = CPU_SMT_FORCE_DISABLED;
2442    } else {
2443        return -EINVAL;
2444    }
2445
2446    if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) {
2447        return -EPERM;
2448    }
2449
2450    if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) {
2451        return -ENODEV;
2452    }
2453
2454    ret = lock_device_hotplug_sysfs();
2455    if (ret) {
2456        return ret;
2457    }
2458
2459    if (ctrlval != cpu_smt_control) {
2460        switch (ctrlval) {
2461            case CPU_SMT_ENABLED:
2462                ret = cpuhp_smt_enable();
2463                break;
2464            case CPU_SMT_DISABLED:
2465            case CPU_SMT_FORCE_DISABLED:
2466                ret = cpuhp_smt_disable(ctrlval);
2467                break;
2468        }
2469    }
2470
2471    unlock_device_hotplug();
2472    return ret ? ret : count;
2473}
2474
2475#else  /* !CONFIG_HOTPLUG_SMT */
2476static ssize_t _store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2477{
2478    return -ENODEV;
2479}
2480#endif /* CONFIG_HOTPLUG_SMT */
2481
2482static const char *smt_states[] = {
2483    [CPU_SMT_ENABLED] = "on",
2484    [CPU_SMT_DISABLED] = "off",
2485    [CPU_SMT_FORCE_DISABLED] = "forceoff",
2486    [CPU_SMT_NOT_SUPPORTED] = "notsupported",
2487    [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented",
2488};
2489
2490static ssize_t show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
2491{
2492    const char *state = smt_states[cpu_smt_control];
2493
2494    return snprintf(buf, PAGE_SIZE - CPU_PAGE_SIZE_OFF_TWO, "%s\n", state);
2495}
2496
2497static ssize_t store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
2498{
2499    return _store_smt_control(dev, attr, buf, count);
2500}
2501static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);
2502
2503static ssize_t show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
2504{
2505    return snprintf(buf, PAGE_SIZE - CPU_PAGE_SIZE_OFF_TWO, "%d\n", sched_smt_active());
2506}
2507static DEVICE_ATTR(active, 0444, show_smt_active, NULL);
2508
2509static struct attribute *cpuhp_smt_attrs[] = {&dev_attr_control.attr, &dev_attr_active.attr, NULL};
2510
2511static const struct attribute_group cpuhp_smt_attr_group = {.attrs = cpuhp_smt_attrs, .name = "smt", NULL};
2512
2513static int __init cpu_smt_sysfs_init(void)
2514{
2515    return sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_smt_attr_group);
2516}
2517
2518static int __init cpuhp_sysfs_init(void)
2519{
2520    int cpu, ret;
2521
2522    ret = cpu_smt_sysfs_init();
2523    if (ret) {
2524        return ret;
2525    }
2526
2527    ret = sysfs_create_group(&cpu_subsys.dev_root->kobj, &cpuhp_cpu_root_attr_group);
2528    if (ret) {
2529        return ret;
2530    }
2531
2532    for_each_possible_cpu(cpu)
2533    {
2534        struct device *dev = get_cpu_device(cpu);
2535
2536        if (!dev) {
2537            continue;
2538        }
2539        ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
2540        if (ret) {
2541            return ret;
2542        }
2543    }
2544    return 0;
2545}
2546device_initcall(cpuhp_sysfs_init);
2547#endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */
2548
2549/*
2550 * cpu_bit_bitmap[] is a special, "compressed" data structure that
2551 * represents all NR_CPUS bits binary values of 1<<nr.
2552 *
2553 * It is used by cpumask_of() to get a constant address to a CPU
2554 * mask value that has a single bit set only.
2555 */
2556
2557/* cpu_bit_bitmap[0] is empty - so we can back into it */
2558#define MASK_DECLARE_1(x) [(x) + 1][0] = (1UL << (x))
2559#define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1((x) + 1)
2560#define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2((x) + 2)
2561#define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4((x) + 4)
2562
2563const unsigned long cpu_bit_bitmap[BITS_PER_LONG + 1][BITS_TO_LONGS(NR_CPUS)] = {
2564
2565    MASK_DECLARE_8(0),  MASK_DECLARE_8(8),  MASK_DECLARE_8(16), MASK_DECLARE_8(24),
2566#if BITS_PER_LONG > 32
2567    MASK_DECLARE_8(32), MASK_DECLARE_8(40), MASK_DECLARE_8(48), MASK_DECLARE_8(56),
2568#endif
2569};
2570EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2571
2572const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
2573EXPORT_SYMBOL(cpu_all_bits);
2574
2575#ifdef CONFIG_INIT_ALL_POSSIBLE
2576struct cpumask __cpu_possible_mask __read_mostly = {CPU_BITS_ALL};
2577#else
2578struct cpumask __cpu_possible_mask __read_mostly;
2579#endif
2580EXPORT_SYMBOL(__cpu_possible_mask);
2581
2582struct cpumask __cpu_online_mask __read_mostly;
2583EXPORT_SYMBOL(__cpu_online_mask);
2584
2585struct cpumask __cpu_present_mask __read_mostly;
2586EXPORT_SYMBOL(__cpu_present_mask);
2587
2588struct cpumask __cpu_active_mask __read_mostly;
2589EXPORT_SYMBOL(__cpu_active_mask);
2590
2591#ifdef CONFIG_CPU_ISOLATION_OPT
2592struct cpumask __cpu_isolated_mask __read_mostly;
2593EXPORT_SYMBOL(__cpu_isolated_mask);
2594#endif
2595
2596atomic_t __num_online_cpus __read_mostly;
2597EXPORT_SYMBOL(__num_online_cpus);
2598
2599void init_cpu_present(const struct cpumask *src)
2600{
2601    cpumask_copy(&__cpu_present_mask, src);
2602}
2603
2604void init_cpu_possible(const struct cpumask *src)
2605{
2606    cpumask_copy(&__cpu_possible_mask, src);
2607}
2608
2609void init_cpu_online(const struct cpumask *src)
2610{
2611    cpumask_copy(&__cpu_online_mask, src);
2612}
2613
2614#ifdef CONFIG_CPU_ISOLATION_OPT
2615void init_cpu_isolated(const struct cpumask *src)
2616{
2617    cpumask_copy(&__cpu_isolated_mask, src);
2618}
2619#endif
2620
2621void set_cpu_online(unsigned int cpu, bool online)
2622{
2623    /*
2624     * atomic_inc/dec() is required to handle the horrid abuse of this
2625     * function by the reboot and kexec code which invoke it from
2626     * IPI/NMI broadcasts when shutting down CPUs. Invocation from
2627     * regular CPU hotplug is properly serialized.
2628     *
2629     * Note, that the fact that __num_online_cpus is of type atomic_t
2630     * does not protect readers which are not serialized against
2631     * concurrent hotplug operations.
2632     */
2633    if (online) {
2634        if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask)) {
2635            atomic_inc(&__num_online_cpus);
2636        }
2637    } else {
2638        if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask)) {
2639            atomic_dec(&__num_online_cpus);
2640        }
2641    }
2642}
2643
2644/*
2645 * Activate the first processor.
2646 */
2647void __init boot_cpu_init(void)
2648{
2649    int cpu = smp_processor_id();
2650
2651    /* Mark the boot cpu "present", "online" etc for SMP and UP case */
2652    set_cpu_online(cpu, true);
2653    set_cpu_active(cpu, true);
2654    set_cpu_present(cpu, true);
2655    set_cpu_possible(cpu, true);
2656
2657#ifdef CONFIG_SMP
2658    __boot_cpu_id = cpu;
2659#endif
2660}
2661
2662/*
2663 * Must be called _AFTER_ setting up the per_cpu areas
2664 */
2665void __init boot_cpu_hotplug_init(void)
2666{
2667#ifdef CONFIG_SMP
2668    cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask);
2669#endif
2670    this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2671}
2672
2673/*
2674 * These are used for a global "mitigations=" cmdline option for toggling
2675 * optional CPU mitigations.
2676 */
2677enum cpu_mitigations {
2678    CPU_MITIGATIONS_OFF,
2679    CPU_MITIGATIONS_AUTO,
2680    CPU_MITIGATIONS_AUTO_NOSMT,
2681};
2682
2683static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO;
2684
2685static int __init mitigations_parse_cmdline(char *arg)
2686{
2687    if (!strcmp(arg, "off")) {
2688        cpu_mitigations = CPU_MITIGATIONS_OFF;
2689    } else if (!strcmp(arg, "auto")) {
2690        cpu_mitigations = CPU_MITIGATIONS_AUTO;
2691    } else if (!strcmp(arg, "auto,nosmt")) {
2692        cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2693    } else {
2694        pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", arg);
2695    }
2696
2697    return 0;
2698}
2699early_param("mitigations", mitigations_parse_cmdline);
2700
2701/* mitigations=off */
2702bool cpu_mitigations_off(void)
2703{
2704    return cpu_mitigations == CPU_MITIGATIONS_OFF;
2705}
2706EXPORT_SYMBOL_GPL(cpu_mitigations_off);
2707
2708/* mitigations=auto,nosmt */
2709bool cpu_mitigations_auto_nosmt(void)
2710{
2711    return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
2712}
2713EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
2714