1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Generic waiting primitives.
4 *
5 * (C) 2004 Nadia Yvette Chambers, Oracle
6 */
7#include "sched.h"
8
9void __init_waitqueue_head(struct wait_queue_head *wq_head, const char *name, struct lock_class_key *key)
10{
11    spin_lock_init(&wq_head->lock);
12    lockdep_set_class_and_name(&wq_head->lock, key, name);
13    INIT_LIST_HEAD(&wq_head->head);
14}
15
16EXPORT_SYMBOL(__init_waitqueue_head);
17
18void add_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
19{
20    unsigned long flags;
21
22    wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
23    spin_lock_irqsave(&wq_head->lock, flags);
24    __add_wait_queue(wq_head, wq_entry);
25    spin_unlock_irqrestore(&wq_head->lock, flags);
26}
27EXPORT_SYMBOL(add_wait_queue);
28
29void add_wait_queue_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
30{
31    unsigned long flags;
32
33    wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
34    spin_lock_irqsave(&wq_head->lock, flags);
35    __add_wait_queue_entry_tail(wq_head, wq_entry);
36    spin_unlock_irqrestore(&wq_head->lock, flags);
37}
38EXPORT_SYMBOL(add_wait_queue_exclusive);
39
40void remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
41{
42    unsigned long flags;
43
44    spin_lock_irqsave(&wq_head->lock, flags);
45    __remove_wait_queue(wq_head, wq_entry);
46    spin_unlock_irqrestore(&wq_head->lock, flags);
47}
48EXPORT_SYMBOL(remove_wait_queue);
49
50/*
51 * Scan threshold to break wait queue walk.
52 * This allows a waker to take a break from holding the
53 * wait queue lock during the wait queue walk.
54 */
55#define WAITQUEUE_WALK_BREAK_CNT 64
56
57/*
58 * The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
59 * wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
60 * number) then we wake all the non-exclusive tasks and one exclusive task.
61 *
62 * There are circumstances in which we can try to wake a task which has already
63 * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
64 * zero in this (rare) case, and we handle it by continuing to scan the queue.
65 */
66static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags,
67                            void *key, wait_queue_entry_t *bookmark)
68{
69    wait_queue_entry_t *curr, *next;
70    int cnt = 0;
71
72    lockdep_assert_held(&wq_head->lock);
73
74    if (bookmark && (bookmark->flags & WQ_FLAG_BOOKMARK)) {
75        curr = list_next_entry(bookmark, entry);
76
77        list_del(&bookmark->entry);
78        bookmark->flags = 0;
79    } else {
80        curr = list_first_entry(&wq_head->head, wait_queue_entry_t, entry);
81    }
82
83    if (&curr->entry == &wq_head->head) {
84        return nr_exclusive;
85    }
86
87    list_for_each_entry_safe_from(curr, next, &wq_head->head, entry)
88    {
89        unsigned flags = curr->flags;
90        int ret;
91
92        if (flags & WQ_FLAG_BOOKMARK) {
93            continue;
94        }
95
96        ret = curr->func(curr, mode, wake_flags, key);
97        if (ret < 0) {
98            break;
99        }
100        if (ret && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) {
101            break;
102        }
103
104        if (bookmark && (++cnt > WAITQUEUE_WALK_BREAK_CNT) && (&next->entry != &wq_head->head)) {
105            bookmark->flags = WQ_FLAG_BOOKMARK;
106            list_add_tail(&bookmark->entry, &next->entry);
107            break;
108        }
109    }
110
111    return nr_exclusive;
112}
113
114static void __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags,
115                                  void *key)
116{
117    unsigned long flags;
118    wait_queue_entry_t bookmark;
119
120    bookmark.flags = 0;
121    bookmark.private = NULL;
122    bookmark.func = NULL;
123    INIT_LIST_HEAD(&bookmark.entry);
124
125    do {
126        spin_lock_irqsave(&wq_head->lock, flags);
127        nr_exclusive = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key, &bookmark);
128        spin_unlock_irqrestore(&wq_head->lock, flags);
129    } while (bookmark.flags & WQ_FLAG_BOOKMARK);
130}
131
132/**
133 * __wake_up - wake up threads blocked on a waitqueue.
134 * @wq_head: the waitqueue
135 * @mode: which threads
136 * @nr_exclusive: how many wake-one or wake-many threads to wake up
137 * @key: is directly passed to the wakeup function
138 *
139 * If this function wakes up a task, it executes a full memory barrier before
140 * accessing the task state.
141 */
142void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, void *key)
143{
144    __wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
145}
146EXPORT_SYMBOL(__wake_up);
147
148/*
149 * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
150 */
151void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr)
152{
153    __wake_up_common(wq_head, mode, nr, 0, NULL, NULL);
154}
155EXPORT_SYMBOL_GPL(__wake_up_locked);
156
157void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
158{
159    __wake_up_common(wq_head, mode, 1, 0, key, NULL);
160}
161EXPORT_SYMBOL_GPL(__wake_up_locked_key);
162
163void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head, unsigned int mode, void *key,
164                                   wait_queue_entry_t *bookmark)
165{
166    __wake_up_common(wq_head, mode, 1, 0, key, bookmark);
167}
168EXPORT_SYMBOL_GPL(__wake_up_locked_key_bookmark);
169
170/**
171 * __wake_up_sync_key - wake up threads blocked on a waitqueue.
172 * @wq_head: the waitqueue
173 * @mode: which threads
174 * @key: opaque value to be passed to wakeup targets
175 *
176 * The sync wakeup differs that the waker knows that it will schedule
177 * away soon, so while the target thread will be woken up, it will not
178 * be migrated to another CPU - ie. the two threads are 'synchronized'
179 * with each other. This can prevent needless bouncing between CPUs.
180 *
181 * On UP it can prevent extra preemption.
182 *
183 * If this function wakes up a task, it executes a full memory barrier before
184 * accessing the task state.
185 */
186void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
187{
188    if (unlikely(!wq_head)) {
189        return;
190    }
191
192    __wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
193}
194EXPORT_SYMBOL_GPL(__wake_up_sync_key);
195
196/**
197 * __wake_up_locked_sync_key - wake up a thread blocked on a locked waitqueue.
198 * @wq_head: the waitqueue
199 * @mode: which threads
200 * @key: opaque value to be passed to wakeup targets
201 *
202 * The sync wakeup differs in that the waker knows that it will schedule
203 * away soon, so while the target thread will be woken up, it will not
204 * be migrated to another CPU - ie. the two threads are 'synchronized'
205 * with each other. This can prevent needless bouncing between CPUs.
206 *
207 * On UP it can prevent extra preemption.
208 *
209 * If this function wakes up a task, it executes a full memory barrier before
210 * accessing the task state.
211 */
212void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key)
213{
214    __wake_up_common(wq_head, mode, 1, WF_SYNC, key, NULL);
215}
216EXPORT_SYMBOL_GPL(__wake_up_locked_sync_key);
217
218/*
219 * __wake_up_sync - see __wake_up_sync_key()
220 */
221void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
222{
223    __wake_up_sync_key(wq_head, mode, NULL);
224}
225EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
226
227void __wake_up_pollfree(struct wait_queue_head *wq_head)
228{
229    __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE));
230    /* POLLFREE must have cleared the queue. */
231    WARN_ON_ONCE(waitqueue_active(wq_head));
232}
233
234/*
235 * Note: we use "set_current_state()" _after_ the wait-queue add,
236 * because we need a memory barrier there on SMP, so that any
237 * wake-function that tests for the wait-queue being active
238 * will be guaranteed to see waitqueue addition _or_ subsequent
239 * tests in this thread will see the wakeup having taken place.
240 *
241 * The spin_unlock() itself is semi-permeable and only protects
242 * one way (it only protects stuff inside the critical region and
243 * stops them from bleeding out - it would still allow subsequent
244 * loads to move into the critical region).
245 */
246void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
247{
248    unsigned long flags;
249
250    wq_entry->flags &= ~WQ_FLAG_EXCLUSIVE;
251    spin_lock_irqsave(&wq_head->lock, flags);
252    if (list_empty(&wq_entry->entry)) {
253        __add_wait_queue(wq_head, wq_entry);
254    }
255    set_current_state(state);
256    spin_unlock_irqrestore(&wq_head->lock, flags);
257}
258EXPORT_SYMBOL(prepare_to_wait);
259
260/* Returns true if we are the first waiter in the queue, false otherwise. */
261bool prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
262{
263    unsigned long flags;
264    bool was_empty = false;
265
266    wq_entry->flags |= WQ_FLAG_EXCLUSIVE;
267    spin_lock_irqsave(&wq_head->lock, flags);
268    if (list_empty(&wq_entry->entry)) {
269        was_empty = list_empty(&wq_head->head);
270        __add_wait_queue_entry_tail(wq_head, wq_entry);
271    }
272    set_current_state(state);
273    spin_unlock_irqrestore(&wq_head->lock, flags);
274    return was_empty;
275}
276EXPORT_SYMBOL(prepare_to_wait_exclusive);
277
278void init_wait_entry(struct wait_queue_entry *wq_entry, int flags)
279{
280    wq_entry->flags = flags;
281    wq_entry->private = current;
282    wq_entry->func = autoremove_wake_function;
283    INIT_LIST_HEAD(&wq_entry->entry);
284}
285EXPORT_SYMBOL(init_wait_entry);
286
287long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state)
288{
289    unsigned long flags;
290    long ret = 0;
291
292    spin_lock_irqsave(&wq_head->lock, flags);
293    if (signal_pending_state(state, current)) {
294        /*
295         * Exclusive waiter must not fail if it was selected by wakeup,
296         * it should "consume" the condition we were waiting for.
297         *
298         * The caller will recheck the condition and return success if
299         * we were already woken up, we can not miss the event because
300         * wakeup locks/unlocks the same wq_head->lock.
301         *
302         * But we need to ensure that set-condition + wakeup after that
303         * can't see us, it should wake up another exclusive waiter if
304         * we fail.
305         */
306        list_del_init(&wq_entry->entry);
307        ret = -ERESTARTSYS;
308    } else {
309        if (list_empty(&wq_entry->entry)) {
310            if (wq_entry->flags & WQ_FLAG_EXCLUSIVE) {
311                __add_wait_queue_entry_tail(wq_head, wq_entry);
312            } else {
313                __add_wait_queue(wq_head, wq_entry);
314            }
315        }
316        set_current_state(state);
317    }
318    spin_unlock_irqrestore(&wq_head->lock, flags);
319
320    return ret;
321}
322EXPORT_SYMBOL(prepare_to_wait_event);
323
324/*
325 * Note! These two wait functions are entered with the
326 * wait-queue lock held (and interrupts off in the _irq
327 * case), so there is no race with testing the wakeup
328 * condition in the caller before they add the wait
329 * entry to the wake queue.
330 */
331int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
332{
333    if (likely(list_empty(&wait->entry))) {
334        __add_wait_queue_entry_tail(wq, wait);
335    }
336
337    set_current_state(TASK_INTERRUPTIBLE);
338    if (signal_pending(current)) {
339        return -ERESTARTSYS;
340    }
341
342    spin_unlock(&wq->lock);
343    schedule();
344    spin_lock(&wq->lock);
345
346    return 0;
347}
348EXPORT_SYMBOL(do_wait_intr);
349
350int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
351{
352    if (likely(list_empty(&wait->entry))) {
353        __add_wait_queue_entry_tail(wq, wait);
354    }
355
356    set_current_state(TASK_INTERRUPTIBLE);
357    if (signal_pending(current)) {
358        return -ERESTARTSYS;
359    }
360
361    spin_unlock_irq(&wq->lock);
362    schedule();
363    spin_lock_irq(&wq->lock);
364
365    return 0;
366}
367EXPORT_SYMBOL(do_wait_intr_irq);
368
369/**
370 * finish_wait - clean up after waiting in a queue
371 * @wq_head: waitqueue waited on
372 * @wq_entry: wait descriptor
373 *
374 * Sets current thread back to running state and removes
375 * the wait descriptor from the given waitqueue if still
376 * queued.
377 */
378void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry)
379{
380    unsigned long flags;
381
382    __set_current_state(TASK_RUNNING);
383    /*
384     * We can check for list emptiness outside the lock
385     * IFF:
386     *  - we use the "careful" check that verifies both
387     *    the next and prev pointers, so that there cannot
388     *    be any half-pending updates in progress on other
389     *    CPU's that we haven't seen yet (and that might
390     *    still change the stack area.
391     * and
392     *  - all other users take the lock (ie we can only
393     *    have _one_ other CPU that looks at or modifies
394     *    the list).
395     */
396    if (!list_empty_careful(&wq_entry->entry)) {
397        spin_lock_irqsave(&wq_head->lock, flags);
398        list_del_init(&wq_entry->entry);
399        spin_unlock_irqrestore(&wq_head->lock, flags);
400    }
401}
402EXPORT_SYMBOL(finish_wait);
403
404__sched int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode, int sync, void *key)
405{
406    int ret = default_wake_function(wq_entry, mode, sync, key);
407    if (ret) {
408        list_del_init_careful(&wq_entry->entry);
409    }
410
411    return ret;
412}
413EXPORT_SYMBOL(autoremove_wake_function);
414
415static inline bool is_kthread_should_stop(void)
416{
417    return (current->flags & PF_KTHREAD) && kthread_should_stop();
418}
419
420/*
421 * DEFINE_WAIT_FUNC(wait, woken_wake_func);
422 *
423 * add_wait_queue(&wq_head, &wait);
424 * for (;;) {
425 *     if (condition)
426 *         break;
427 *
428 *     // in wait_woken()            // in woken_wake_function()
429 *
430 *     p->state = mode;                wq_entry->flags |= WQ_FLAG_WOKEN;
431 *     smp_mb(); // A                try_to_wake_up():
432 *     if (!(wq_entry->flags & WQ_FLAG_WOKEN))       <full barrier>
433 *         schedule()                   if (p->state & mode)
434 *     p->state = TASK_RUNNING;                  p->state = TASK_RUNNING;
435 *     wq_entry->flags &= ~WQ_FLAG_WOKEN;    ~~~~~~~~~~~~~~~~~~
436 *     smp_mb(); // B                condition = true;
437 * }                        smp_mb(); // C
438 * remove_wait_queue(&wq_head, &wait);        wq_entry->flags |= WQ_FLAG_WOKEN;
439 */
440__sched long wait_woken(struct wait_queue_entry *wq_entry, unsigned int mode, long timeout)
441{
442    /*
443     * The below executes an smp_mb(), which matches with the full barrier
444     * executed by the try_to_wake_up() in woken_wake_function() such that
445     * either we see the store to wq_entry->flags in woken_wake_function()
446     * or woken_wake_function() sees our store to current->state.
447     */
448    set_current_state(mode); /* A */
449    if (!(wq_entry->flags & WQ_FLAG_WOKEN) && !is_kthread_should_stop()) {
450        timeout = schedule_timeout(timeout);
451    }
452    __set_current_state(TASK_RUNNING);
453
454    /*
455     * The below executes an smp_mb(), which matches with the smp_mb() (C)
456     * in woken_wake_function() such that either we see the wait condition
457     * being true or the store to wq_entry->flags in woken_wake_function()
458     * follows ours in the coherence order.
459     */
460    smp_store_mb(wq_entry->flags, wq_entry->flags & ~WQ_FLAG_WOKEN); /* B */
461
462    return timeout;
463}
464EXPORT_SYMBOL(wait_woken);
465
466__sched int woken_wake_function(struct wait_queue_entry *wq_entry, unsigned int mode, int sync, void *key)
467{
468    /* Pairs with the smp_store_mb() in wait_woken(). */
469    smp_mb(); /* C */
470    wq_entry->flags |= WQ_FLAG_WOKEN;
471
472    return default_wake_function(wq_entry, mode, sync, key);
473}
474EXPORT_SYMBOL(woken_wake_function);
475