18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Basic worker thread pool for io_uring 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2019 Jens Axboe 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci#include <linux/kernel.h> 98c2ecf20Sopenharmony_ci#include <linux/init.h> 108c2ecf20Sopenharmony_ci#include <linux/errno.h> 118c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 128c2ecf20Sopenharmony_ci#include <linux/percpu.h> 138c2ecf20Sopenharmony_ci#include <linux/slab.h> 148c2ecf20Sopenharmony_ci#include <linux/rculist_nulls.h> 158c2ecf20Sopenharmony_ci#include <linux/cpu.h> 168c2ecf20Sopenharmony_ci#include <linux/tracehook.h> 178c2ecf20Sopenharmony_ci#include <uapi/linux/io_uring.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci#include "io-wq.h" 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci#define WORKER_IDLE_TIMEOUT (5 * HZ) 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_cienum { 248c2ecf20Sopenharmony_ci IO_WORKER_F_UP = 1, /* up and active */ 258c2ecf20Sopenharmony_ci IO_WORKER_F_RUNNING = 2, /* account as running */ 268c2ecf20Sopenharmony_ci IO_WORKER_F_FREE = 4, /* worker on free list */ 278c2ecf20Sopenharmony_ci IO_WORKER_F_BOUND = 8, /* is doing bounded work */ 288c2ecf20Sopenharmony_ci}; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_cienum { 318c2ecf20Sopenharmony_ci IO_WQ_BIT_EXIT = 0, /* wq exiting */ 328c2ecf20Sopenharmony_ci}; 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_cienum { 358c2ecf20Sopenharmony_ci IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ 368c2ecf20Sopenharmony_ci}; 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci/* 398c2ecf20Sopenharmony_ci * One for each thread in a wqe pool 408c2ecf20Sopenharmony_ci */ 418c2ecf20Sopenharmony_cistruct io_worker { 428c2ecf20Sopenharmony_ci refcount_t ref; 438c2ecf20Sopenharmony_ci unsigned flags; 448c2ecf20Sopenharmony_ci struct hlist_nulls_node nulls_node; 458c2ecf20Sopenharmony_ci struct list_head all_list; 468c2ecf20Sopenharmony_ci struct task_struct *task; 478c2ecf20Sopenharmony_ci struct io_wqe *wqe; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci struct io_wq_work *cur_work; 508c2ecf20Sopenharmony_ci spinlock_t lock; 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci struct completion ref_done; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci unsigned long create_state; 558c2ecf20Sopenharmony_ci struct callback_head create_work; 568c2ecf20Sopenharmony_ci int create_index; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci union { 598c2ecf20Sopenharmony_ci struct rcu_head rcu; 608c2ecf20Sopenharmony_ci struct work_struct work; 618c2ecf20Sopenharmony_ci }; 628c2ecf20Sopenharmony_ci}; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci#if BITS_PER_LONG == 64 658c2ecf20Sopenharmony_ci#define IO_WQ_HASH_ORDER 6 668c2ecf20Sopenharmony_ci#else 678c2ecf20Sopenharmony_ci#define IO_WQ_HASH_ORDER 5 688c2ecf20Sopenharmony_ci#endif 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistruct io_wqe_acct { 738c2ecf20Sopenharmony_ci unsigned nr_workers; 748c2ecf20Sopenharmony_ci unsigned max_workers; 758c2ecf20Sopenharmony_ci int index; 768c2ecf20Sopenharmony_ci atomic_t nr_running; 778c2ecf20Sopenharmony_ci struct io_wq_work_list work_list; 788c2ecf20Sopenharmony_ci unsigned long flags; 798c2ecf20Sopenharmony_ci}; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_cienum { 828c2ecf20Sopenharmony_ci IO_WQ_ACCT_BOUND, 838c2ecf20Sopenharmony_ci IO_WQ_ACCT_UNBOUND, 848c2ecf20Sopenharmony_ci IO_WQ_ACCT_NR, 858c2ecf20Sopenharmony_ci}; 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci/* 888c2ecf20Sopenharmony_ci * Per-node worker thread pool 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_cistruct io_wqe { 918c2ecf20Sopenharmony_ci raw_spinlock_t lock; 928c2ecf20Sopenharmony_ci struct io_wqe_acct acct[2]; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci int node; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci struct hlist_nulls_head free_list; 978c2ecf20Sopenharmony_ci struct list_head all_list; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci struct wait_queue_entry wait; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci struct io_wq *wq; 1028c2ecf20Sopenharmony_ci struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci cpumask_var_t cpu_mask; 1058c2ecf20Sopenharmony_ci}; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci/* 1088c2ecf20Sopenharmony_ci * Per io_wq state 1098c2ecf20Sopenharmony_ci */ 1108c2ecf20Sopenharmony_cistruct io_wq { 1118c2ecf20Sopenharmony_ci unsigned long state; 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci free_work_fn *free_work; 1148c2ecf20Sopenharmony_ci io_wq_work_fn *do_work; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci struct io_wq_hash *hash; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci atomic_t worker_refs; 1198c2ecf20Sopenharmony_ci struct completion worker_done; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci struct hlist_node cpuhp_node; 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci struct task_struct *task; 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci struct io_wqe *wqes[]; 1268c2ecf20Sopenharmony_ci}; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_cistatic enum cpuhp_state io_wq_online; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_cistruct io_cb_cancel_data { 1318c2ecf20Sopenharmony_ci work_cancel_fn *fn; 1328c2ecf20Sopenharmony_ci void *data; 1338c2ecf20Sopenharmony_ci int nr_running; 1348c2ecf20Sopenharmony_ci int nr_pending; 1358c2ecf20Sopenharmony_ci bool cancel_all; 1368c2ecf20Sopenharmony_ci}; 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_cistatic bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); 1398c2ecf20Sopenharmony_cistatic void io_wqe_dec_running(struct io_worker *worker); 1408c2ecf20Sopenharmony_cistatic bool io_acct_cancel_pending_work(struct io_wqe *wqe, 1418c2ecf20Sopenharmony_ci struct io_wqe_acct *acct, 1428c2ecf20Sopenharmony_ci struct io_cb_cancel_data *match); 1438c2ecf20Sopenharmony_cistatic void create_worker_cb(struct callback_head *cb); 1448c2ecf20Sopenharmony_cistatic void io_wq_cancel_tw_create(struct io_wq *wq); 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_cistatic bool io_worker_get(struct io_worker *worker) 1478c2ecf20Sopenharmony_ci{ 1488c2ecf20Sopenharmony_ci return refcount_inc_not_zero(&worker->ref); 1498c2ecf20Sopenharmony_ci} 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_cistatic void io_worker_release(struct io_worker *worker) 1528c2ecf20Sopenharmony_ci{ 1538c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&worker->ref)) 1548c2ecf20Sopenharmony_ci complete(&worker->ref_done); 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_cistatic inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_cistatic inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, 1638c2ecf20Sopenharmony_ci struct io_wq_work *work) 1648c2ecf20Sopenharmony_ci{ 1658c2ecf20Sopenharmony_ci return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); 1668c2ecf20Sopenharmony_ci} 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_cistatic inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) 1698c2ecf20Sopenharmony_ci{ 1708c2ecf20Sopenharmony_ci return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); 1718c2ecf20Sopenharmony_ci} 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_cistatic void io_worker_ref_put(struct io_wq *wq) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&wq->worker_refs)) 1768c2ecf20Sopenharmony_ci complete(&wq->worker_done); 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_cibool io_wq_worker_stopped(void) 1808c2ecf20Sopenharmony_ci{ 1818c2ecf20Sopenharmony_ci struct io_worker *worker = current->pf_io_worker; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!io_wq_current_is_worker())) 1848c2ecf20Sopenharmony_ci return true; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci return test_bit(IO_WQ_BIT_EXIT, &worker->wqe->wq->state); 1878c2ecf20Sopenharmony_ci} 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_cistatic void io_worker_cancel_cb(struct io_worker *worker) 1908c2ecf20Sopenharmony_ci{ 1918c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 1928c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 1938c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci atomic_dec(&acct->nr_running); 1968c2ecf20Sopenharmony_ci raw_spin_lock(&worker->wqe->lock); 1978c2ecf20Sopenharmony_ci acct->nr_workers--; 1988c2ecf20Sopenharmony_ci raw_spin_unlock(&worker->wqe->lock); 1998c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 2008c2ecf20Sopenharmony_ci clear_bit_unlock(0, &worker->create_state); 2018c2ecf20Sopenharmony_ci io_worker_release(worker); 2028c2ecf20Sopenharmony_ci} 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_cistatic bool io_task_worker_match(struct callback_head *cb, void *data) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci struct io_worker *worker; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci if (cb->func != create_worker_cb) 2098c2ecf20Sopenharmony_ci return false; 2108c2ecf20Sopenharmony_ci worker = container_of(cb, struct io_worker, create_work); 2118c2ecf20Sopenharmony_ci return worker == data; 2128c2ecf20Sopenharmony_ci} 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_cistatic void io_worker_exit(struct io_worker *worker) 2158c2ecf20Sopenharmony_ci{ 2168c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 2178c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci while (1) { 2208c2ecf20Sopenharmony_ci struct callback_head *cb = task_work_cancel_match(wq->task, 2218c2ecf20Sopenharmony_ci io_task_worker_match, worker); 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci if (!cb) 2248c2ecf20Sopenharmony_ci break; 2258c2ecf20Sopenharmony_ci io_worker_cancel_cb(worker); 2268c2ecf20Sopenharmony_ci } 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&worker->ref)) 2298c2ecf20Sopenharmony_ci complete(&worker->ref_done); 2308c2ecf20Sopenharmony_ci wait_for_completion(&worker->ref_done); 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 2338c2ecf20Sopenharmony_ci if (worker->flags & IO_WORKER_F_FREE) 2348c2ecf20Sopenharmony_ci hlist_nulls_del_rcu(&worker->nulls_node); 2358c2ecf20Sopenharmony_ci list_del_rcu(&worker->all_list); 2368c2ecf20Sopenharmony_ci preempt_disable(); 2378c2ecf20Sopenharmony_ci io_wqe_dec_running(worker); 2388c2ecf20Sopenharmony_ci worker->flags = 0; 2398c2ecf20Sopenharmony_ci current->flags &= ~PF_IO_WORKER; 2408c2ecf20Sopenharmony_ci preempt_enable(); 2418c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci kfree_rcu(worker, rcu); 2448c2ecf20Sopenharmony_ci io_worker_ref_put(wqe->wq); 2458c2ecf20Sopenharmony_ci do_exit(0); 2468c2ecf20Sopenharmony_ci} 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_cistatic inline bool io_acct_run_queue(struct io_wqe_acct *acct) 2498c2ecf20Sopenharmony_ci{ 2508c2ecf20Sopenharmony_ci if (!wq_list_empty(&acct->work_list) && 2518c2ecf20Sopenharmony_ci !test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) 2528c2ecf20Sopenharmony_ci return true; 2538c2ecf20Sopenharmony_ci return false; 2548c2ecf20Sopenharmony_ci} 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci/* 2578c2ecf20Sopenharmony_ci * Check head of free list for an available worker. If one isn't available, 2588c2ecf20Sopenharmony_ci * caller must create one. 2598c2ecf20Sopenharmony_ci */ 2608c2ecf20Sopenharmony_cistatic bool io_wqe_activate_free_worker(struct io_wqe *wqe, 2618c2ecf20Sopenharmony_ci struct io_wqe_acct *acct) 2628c2ecf20Sopenharmony_ci __must_hold(RCU) 2638c2ecf20Sopenharmony_ci{ 2648c2ecf20Sopenharmony_ci struct hlist_nulls_node *n; 2658c2ecf20Sopenharmony_ci struct io_worker *worker; 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci /* 2688c2ecf20Sopenharmony_ci * Iterate free_list and see if we can find an idle worker to 2698c2ecf20Sopenharmony_ci * activate. If a given worker is on the free_list but in the process 2708c2ecf20Sopenharmony_ci * of exiting, keep trying. 2718c2ecf20Sopenharmony_ci */ 2728c2ecf20Sopenharmony_ci hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { 2738c2ecf20Sopenharmony_ci if (!io_worker_get(worker)) 2748c2ecf20Sopenharmony_ci continue; 2758c2ecf20Sopenharmony_ci if (io_wqe_get_acct(worker) != acct) { 2768c2ecf20Sopenharmony_ci io_worker_release(worker); 2778c2ecf20Sopenharmony_ci continue; 2788c2ecf20Sopenharmony_ci } 2798c2ecf20Sopenharmony_ci if (wake_up_process(worker->task)) { 2808c2ecf20Sopenharmony_ci io_worker_release(worker); 2818c2ecf20Sopenharmony_ci return true; 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci io_worker_release(worker); 2848c2ecf20Sopenharmony_ci } 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci return false; 2878c2ecf20Sopenharmony_ci} 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci/* 2908c2ecf20Sopenharmony_ci * We need a worker. If we find a free one, we're good. If not, and we're 2918c2ecf20Sopenharmony_ci * below the max number of workers, create one. 2928c2ecf20Sopenharmony_ci */ 2938c2ecf20Sopenharmony_cistatic bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) 2948c2ecf20Sopenharmony_ci{ 2958c2ecf20Sopenharmony_ci /* 2968c2ecf20Sopenharmony_ci * Most likely an attempt to queue unbounded work on an io_wq that 2978c2ecf20Sopenharmony_ci * wasn't setup with any unbounded workers. 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_ci if (unlikely(!acct->max_workers)) 3008c2ecf20Sopenharmony_ci pr_warn_once("io-wq is not configured for unbound workers"); 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 3038c2ecf20Sopenharmony_ci if (acct->nr_workers >= acct->max_workers) { 3048c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 3058c2ecf20Sopenharmony_ci return true; 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci acct->nr_workers++; 3088c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 3098c2ecf20Sopenharmony_ci atomic_inc(&acct->nr_running); 3108c2ecf20Sopenharmony_ci atomic_inc(&wqe->wq->worker_refs); 3118c2ecf20Sopenharmony_ci return create_io_worker(wqe->wq, wqe, acct->index); 3128c2ecf20Sopenharmony_ci} 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_cistatic void io_wqe_inc_running(struct io_worker *worker) 3158c2ecf20Sopenharmony_ci{ 3168c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci atomic_inc(&acct->nr_running); 3198c2ecf20Sopenharmony_ci} 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_cistatic void create_worker_cb(struct callback_head *cb) 3228c2ecf20Sopenharmony_ci{ 3238c2ecf20Sopenharmony_ci struct io_worker *worker; 3248c2ecf20Sopenharmony_ci struct io_wq *wq; 3258c2ecf20Sopenharmony_ci struct io_wqe *wqe; 3268c2ecf20Sopenharmony_ci struct io_wqe_acct *acct; 3278c2ecf20Sopenharmony_ci bool do_create = false; 3288c2ecf20Sopenharmony_ci 3298c2ecf20Sopenharmony_ci worker = container_of(cb, struct io_worker, create_work); 3308c2ecf20Sopenharmony_ci wqe = worker->wqe; 3318c2ecf20Sopenharmony_ci wq = wqe->wq; 3328c2ecf20Sopenharmony_ci acct = &wqe->acct[worker->create_index]; 3338c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 3348c2ecf20Sopenharmony_ci if (acct->nr_workers < acct->max_workers) { 3358c2ecf20Sopenharmony_ci acct->nr_workers++; 3368c2ecf20Sopenharmony_ci do_create = true; 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 3398c2ecf20Sopenharmony_ci if (do_create) { 3408c2ecf20Sopenharmony_ci create_io_worker(wq, wqe, worker->create_index); 3418c2ecf20Sopenharmony_ci } else { 3428c2ecf20Sopenharmony_ci atomic_dec(&acct->nr_running); 3438c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 3448c2ecf20Sopenharmony_ci } 3458c2ecf20Sopenharmony_ci clear_bit_unlock(0, &worker->create_state); 3468c2ecf20Sopenharmony_ci io_worker_release(worker); 3478c2ecf20Sopenharmony_ci} 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_cistatic bool io_queue_worker_create(struct io_worker *worker, 3508c2ecf20Sopenharmony_ci struct io_wqe_acct *acct, 3518c2ecf20Sopenharmony_ci task_work_func_t func) 3528c2ecf20Sopenharmony_ci{ 3538c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 3548c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci /* raced with exit, just ignore create call */ 3578c2ecf20Sopenharmony_ci if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) 3588c2ecf20Sopenharmony_ci goto fail; 3598c2ecf20Sopenharmony_ci if (!io_worker_get(worker)) 3608c2ecf20Sopenharmony_ci goto fail; 3618c2ecf20Sopenharmony_ci /* 3628c2ecf20Sopenharmony_ci * create_state manages ownership of create_work/index. We should 3638c2ecf20Sopenharmony_ci * only need one entry per worker, as the worker going to sleep 3648c2ecf20Sopenharmony_ci * will trigger the condition, and waking will clear it once it 3658c2ecf20Sopenharmony_ci * runs the task_work. 3668c2ecf20Sopenharmony_ci */ 3678c2ecf20Sopenharmony_ci if (test_bit(0, &worker->create_state) || 3688c2ecf20Sopenharmony_ci test_and_set_bit_lock(0, &worker->create_state)) 3698c2ecf20Sopenharmony_ci goto fail_release; 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci atomic_inc(&wq->worker_refs); 3728c2ecf20Sopenharmony_ci init_task_work(&worker->create_work, func); 3738c2ecf20Sopenharmony_ci worker->create_index = acct->index; 3748c2ecf20Sopenharmony_ci if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { 3758c2ecf20Sopenharmony_ci /* 3768c2ecf20Sopenharmony_ci * EXIT may have been set after checking it above, check after 3778c2ecf20Sopenharmony_ci * adding the task_work and remove any creation item if it is 3788c2ecf20Sopenharmony_ci * now set. wq exit does that too, but we can have added this 3798c2ecf20Sopenharmony_ci * work item after we canceled in io_wq_exit_workers(). 3808c2ecf20Sopenharmony_ci */ 3818c2ecf20Sopenharmony_ci if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) 3828c2ecf20Sopenharmony_ci io_wq_cancel_tw_create(wq); 3838c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 3848c2ecf20Sopenharmony_ci return true; 3858c2ecf20Sopenharmony_ci } 3868c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 3878c2ecf20Sopenharmony_ci clear_bit_unlock(0, &worker->create_state); 3888c2ecf20Sopenharmony_cifail_release: 3898c2ecf20Sopenharmony_ci io_worker_release(worker); 3908c2ecf20Sopenharmony_cifail: 3918c2ecf20Sopenharmony_ci atomic_dec(&acct->nr_running); 3928c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 3938c2ecf20Sopenharmony_ci return false; 3948c2ecf20Sopenharmony_ci} 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_cistatic void io_wqe_dec_running(struct io_worker *worker) 3978c2ecf20Sopenharmony_ci __must_hold(wqe->lock) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 4008c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci if (!(worker->flags & IO_WORKER_F_UP)) 4038c2ecf20Sopenharmony_ci return; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { 4068c2ecf20Sopenharmony_ci atomic_inc(&acct->nr_running); 4078c2ecf20Sopenharmony_ci atomic_inc(&wqe->wq->worker_refs); 4088c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 4098c2ecf20Sopenharmony_ci io_queue_worker_create(worker, acct, create_worker_cb); 4108c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 4118c2ecf20Sopenharmony_ci } 4128c2ecf20Sopenharmony_ci} 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci/* 4158c2ecf20Sopenharmony_ci * Worker will start processing some work. Move it to the busy list, if 4168c2ecf20Sopenharmony_ci * it's currently on the freelist 4178c2ecf20Sopenharmony_ci */ 4188c2ecf20Sopenharmony_cistatic void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, 4198c2ecf20Sopenharmony_ci struct io_wq_work *work) 4208c2ecf20Sopenharmony_ci __must_hold(wqe->lock) 4218c2ecf20Sopenharmony_ci{ 4228c2ecf20Sopenharmony_ci if (worker->flags & IO_WORKER_F_FREE) { 4238c2ecf20Sopenharmony_ci worker->flags &= ~IO_WORKER_F_FREE; 4248c2ecf20Sopenharmony_ci hlist_nulls_del_init_rcu(&worker->nulls_node); 4258c2ecf20Sopenharmony_ci } 4268c2ecf20Sopenharmony_ci} 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci/* 4298c2ecf20Sopenharmony_ci * No work, worker going to sleep. Move to freelist, and unuse mm if we 4308c2ecf20Sopenharmony_ci * have one attached. Dropping the mm may potentially sleep, so we drop 4318c2ecf20Sopenharmony_ci * the lock in that case and return success. Since the caller has to 4328c2ecf20Sopenharmony_ci * retry the loop in that case (we changed task state), we don't regrab 4338c2ecf20Sopenharmony_ci * the lock if we return success. 4348c2ecf20Sopenharmony_ci */ 4358c2ecf20Sopenharmony_cistatic void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) 4368c2ecf20Sopenharmony_ci __must_hold(wqe->lock) 4378c2ecf20Sopenharmony_ci{ 4388c2ecf20Sopenharmony_ci if (!(worker->flags & IO_WORKER_F_FREE)) { 4398c2ecf20Sopenharmony_ci worker->flags |= IO_WORKER_F_FREE; 4408c2ecf20Sopenharmony_ci hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); 4418c2ecf20Sopenharmony_ci } 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic inline unsigned int io_get_work_hash(struct io_wq_work *work) 4458c2ecf20Sopenharmony_ci{ 4468c2ecf20Sopenharmony_ci return work->flags >> IO_WQ_HASH_SHIFT; 4478c2ecf20Sopenharmony_ci} 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_cistatic bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) 4508c2ecf20Sopenharmony_ci{ 4518c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 4528c2ecf20Sopenharmony_ci bool ret = false; 4538c2ecf20Sopenharmony_ci 4548c2ecf20Sopenharmony_ci spin_lock_irq(&wq->hash->wait.lock); 4558c2ecf20Sopenharmony_ci if (list_empty(&wqe->wait.entry)) { 4568c2ecf20Sopenharmony_ci __add_wait_queue(&wq->hash->wait, &wqe->wait); 4578c2ecf20Sopenharmony_ci if (!test_bit(hash, &wq->hash->map)) { 4588c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 4598c2ecf20Sopenharmony_ci list_del_init(&wqe->wait.entry); 4608c2ecf20Sopenharmony_ci ret = true; 4618c2ecf20Sopenharmony_ci } 4628c2ecf20Sopenharmony_ci } 4638c2ecf20Sopenharmony_ci spin_unlock_irq(&wq->hash->wait.lock); 4648c2ecf20Sopenharmony_ci return ret; 4658c2ecf20Sopenharmony_ci} 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_cistatic struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, 4688c2ecf20Sopenharmony_ci struct io_worker *worker) 4698c2ecf20Sopenharmony_ci __must_hold(wqe->lock) 4708c2ecf20Sopenharmony_ci{ 4718c2ecf20Sopenharmony_ci struct io_wq_work_node *node, *prev; 4728c2ecf20Sopenharmony_ci struct io_wq_work *work, *tail; 4738c2ecf20Sopenharmony_ci unsigned int stall_hash = -1U; 4748c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci wq_list_for_each(node, prev, &acct->work_list) { 4778c2ecf20Sopenharmony_ci unsigned int hash; 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci work = container_of(node, struct io_wq_work, list); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci /* not hashed, can run anytime */ 4828c2ecf20Sopenharmony_ci if (!io_wq_is_hashed(work)) { 4838c2ecf20Sopenharmony_ci wq_list_del(&acct->work_list, node, prev); 4848c2ecf20Sopenharmony_ci return work; 4858c2ecf20Sopenharmony_ci } 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci hash = io_get_work_hash(work); 4888c2ecf20Sopenharmony_ci /* all items with this hash lie in [work, tail] */ 4898c2ecf20Sopenharmony_ci tail = wqe->hash_tail[hash]; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci /* hashed, can run if not already running */ 4928c2ecf20Sopenharmony_ci if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { 4938c2ecf20Sopenharmony_ci wqe->hash_tail[hash] = NULL; 4948c2ecf20Sopenharmony_ci wq_list_cut(&acct->work_list, &tail->list, prev); 4958c2ecf20Sopenharmony_ci return work; 4968c2ecf20Sopenharmony_ci } 4978c2ecf20Sopenharmony_ci if (stall_hash == -1U) 4988c2ecf20Sopenharmony_ci stall_hash = hash; 4998c2ecf20Sopenharmony_ci /* fast forward to a next hash, for-each will fix up @prev */ 5008c2ecf20Sopenharmony_ci node = &tail->list; 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci if (stall_hash != -1U) { 5048c2ecf20Sopenharmony_ci bool unstalled; 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci /* 5078c2ecf20Sopenharmony_ci * Set this before dropping the lock to avoid racing with new 5088c2ecf20Sopenharmony_ci * work being added and clearing the stalled bit. 5098c2ecf20Sopenharmony_ci */ 5108c2ecf20Sopenharmony_ci set_bit(IO_ACCT_STALLED_BIT, &acct->flags); 5118c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 5128c2ecf20Sopenharmony_ci unstalled = io_wait_on_hash(wqe, stall_hash); 5138c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 5148c2ecf20Sopenharmony_ci if (unstalled) { 5158c2ecf20Sopenharmony_ci clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); 5168c2ecf20Sopenharmony_ci if (wq_has_sleeper(&wqe->wq->hash->wait)) 5178c2ecf20Sopenharmony_ci wake_up(&wqe->wq->hash->wait); 5188c2ecf20Sopenharmony_ci } 5198c2ecf20Sopenharmony_ci } 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci return NULL; 5228c2ecf20Sopenharmony_ci} 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_cistatic bool io_flush_signals(void) 5258c2ecf20Sopenharmony_ci{ 5268c2ecf20Sopenharmony_ci if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { 5278c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 5288c2ecf20Sopenharmony_ci tracehook_notify_signal(); 5298c2ecf20Sopenharmony_ci return true; 5308c2ecf20Sopenharmony_ci } 5318c2ecf20Sopenharmony_ci return false; 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_cistatic void io_assign_current_work(struct io_worker *worker, 5358c2ecf20Sopenharmony_ci struct io_wq_work *work) 5368c2ecf20Sopenharmony_ci{ 5378c2ecf20Sopenharmony_ci if (work) { 5388c2ecf20Sopenharmony_ci io_flush_signals(); 5398c2ecf20Sopenharmony_ci cond_resched(); 5408c2ecf20Sopenharmony_ci } 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci spin_lock(&worker->lock); 5438c2ecf20Sopenharmony_ci worker->cur_work = work; 5448c2ecf20Sopenharmony_ci spin_unlock(&worker->lock); 5458c2ecf20Sopenharmony_ci} 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_cistatic void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_cistatic void io_worker_handle_work(struct io_worker *worker) 5508c2ecf20Sopenharmony_ci __releases(wqe->lock) 5518c2ecf20Sopenharmony_ci{ 5528c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 5538c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 5548c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 5558c2ecf20Sopenharmony_ci bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci do { 5588c2ecf20Sopenharmony_ci struct io_wq_work *work; 5598c2ecf20Sopenharmony_ciget_next: 5608c2ecf20Sopenharmony_ci /* 5618c2ecf20Sopenharmony_ci * If we got some work, mark us as busy. If we didn't, but 5628c2ecf20Sopenharmony_ci * the list isn't empty, it means we stalled on hashed work. 5638c2ecf20Sopenharmony_ci * Mark us stalled so we don't keep looking for work when we 5648c2ecf20Sopenharmony_ci * can't make progress, any work completion or insertion will 5658c2ecf20Sopenharmony_ci * clear the stalled flag. 5668c2ecf20Sopenharmony_ci */ 5678c2ecf20Sopenharmony_ci work = io_get_next_work(acct, worker); 5688c2ecf20Sopenharmony_ci if (work) 5698c2ecf20Sopenharmony_ci __io_worker_busy(wqe, worker, work); 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 5728c2ecf20Sopenharmony_ci if (!work) 5738c2ecf20Sopenharmony_ci break; 5748c2ecf20Sopenharmony_ci io_assign_current_work(worker, work); 5758c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci /* handle a whole dependent link */ 5788c2ecf20Sopenharmony_ci do { 5798c2ecf20Sopenharmony_ci struct io_wq_work *next_hashed, *linked; 5808c2ecf20Sopenharmony_ci unsigned int hash = io_get_work_hash(work); 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci next_hashed = wq_next_work(work); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) 5858c2ecf20Sopenharmony_ci work->flags |= IO_WQ_WORK_CANCEL; 5868c2ecf20Sopenharmony_ci wq->do_work(work); 5878c2ecf20Sopenharmony_ci io_assign_current_work(worker, NULL); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci linked = wq->free_work(work); 5908c2ecf20Sopenharmony_ci work = next_hashed; 5918c2ecf20Sopenharmony_ci if (!work && linked && !io_wq_is_hashed(linked)) { 5928c2ecf20Sopenharmony_ci work = linked; 5938c2ecf20Sopenharmony_ci linked = NULL; 5948c2ecf20Sopenharmony_ci } 5958c2ecf20Sopenharmony_ci io_assign_current_work(worker, work); 5968c2ecf20Sopenharmony_ci if (linked) 5978c2ecf20Sopenharmony_ci io_wqe_enqueue(wqe, linked); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_ci if (hash != -1U && !next_hashed) { 6008c2ecf20Sopenharmony_ci /* serialize hash clear with wake_up() */ 6018c2ecf20Sopenharmony_ci spin_lock_irq(&wq->hash->wait.lock); 6028c2ecf20Sopenharmony_ci clear_bit(hash, &wq->hash->map); 6038c2ecf20Sopenharmony_ci clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); 6048c2ecf20Sopenharmony_ci spin_unlock_irq(&wq->hash->wait.lock); 6058c2ecf20Sopenharmony_ci if (wq_has_sleeper(&wq->hash->wait)) 6068c2ecf20Sopenharmony_ci wake_up(&wq->hash->wait); 6078c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 6088c2ecf20Sopenharmony_ci /* skip unnecessary unlock-lock wqe->lock */ 6098c2ecf20Sopenharmony_ci if (!work) 6108c2ecf20Sopenharmony_ci goto get_next; 6118c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 6128c2ecf20Sopenharmony_ci } 6138c2ecf20Sopenharmony_ci } while (work); 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 6168c2ecf20Sopenharmony_ci } while (1); 6178c2ecf20Sopenharmony_ci} 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_cistatic int io_wqe_worker(void *data) 6208c2ecf20Sopenharmony_ci{ 6218c2ecf20Sopenharmony_ci struct io_worker *worker = data; 6228c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 6238c2ecf20Sopenharmony_ci struct io_wqe *wqe = worker->wqe; 6248c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 6258c2ecf20Sopenharmony_ci bool last_timeout = false; 6268c2ecf20Sopenharmony_ci char buf[TASK_COMM_LEN]; 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); 6318c2ecf20Sopenharmony_ci set_task_comm(current, buf); 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { 6348c2ecf20Sopenharmony_ci long ret; 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci set_current_state(TASK_INTERRUPTIBLE); 6378c2ecf20Sopenharmony_ciloop: 6388c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 6398c2ecf20Sopenharmony_ci if (io_acct_run_queue(acct)) { 6408c2ecf20Sopenharmony_ci io_worker_handle_work(worker); 6418c2ecf20Sopenharmony_ci goto loop; 6428c2ecf20Sopenharmony_ci } 6438c2ecf20Sopenharmony_ci /* timed out, exit unless we're the last worker */ 6448c2ecf20Sopenharmony_ci if (last_timeout && acct->nr_workers > 1) { 6458c2ecf20Sopenharmony_ci acct->nr_workers--; 6468c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 6478c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 6488c2ecf20Sopenharmony_ci break; 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_ci last_timeout = false; 6518c2ecf20Sopenharmony_ci __io_worker_idle(wqe, worker); 6528c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 6538c2ecf20Sopenharmony_ci if (io_flush_signals()) 6548c2ecf20Sopenharmony_ci continue; 6558c2ecf20Sopenharmony_ci ret = schedule_timeout(WORKER_IDLE_TIMEOUT); 6568c2ecf20Sopenharmony_ci if (signal_pending(current)) { 6578c2ecf20Sopenharmony_ci struct ksignal ksig; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci if (!get_signal(&ksig)) 6608c2ecf20Sopenharmony_ci continue; 6618c2ecf20Sopenharmony_ci break; 6628c2ecf20Sopenharmony_ci } 6638c2ecf20Sopenharmony_ci last_timeout = !ret; 6648c2ecf20Sopenharmony_ci } 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) { 6678c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 6688c2ecf20Sopenharmony_ci io_worker_handle_work(worker); 6698c2ecf20Sopenharmony_ci } 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci io_worker_exit(worker); 6728c2ecf20Sopenharmony_ci return 0; 6738c2ecf20Sopenharmony_ci} 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci/* 6768c2ecf20Sopenharmony_ci * Called when a worker is scheduled in. Mark us as currently running. 6778c2ecf20Sopenharmony_ci */ 6788c2ecf20Sopenharmony_civoid io_wq_worker_running(struct task_struct *tsk) 6798c2ecf20Sopenharmony_ci{ 6808c2ecf20Sopenharmony_ci struct io_worker *worker = tsk->pf_io_worker; 6818c2ecf20Sopenharmony_ci 6828c2ecf20Sopenharmony_ci if (!worker) 6838c2ecf20Sopenharmony_ci return; 6848c2ecf20Sopenharmony_ci if (!(worker->flags & IO_WORKER_F_UP)) 6858c2ecf20Sopenharmony_ci return; 6868c2ecf20Sopenharmony_ci if (worker->flags & IO_WORKER_F_RUNNING) 6878c2ecf20Sopenharmony_ci return; 6888c2ecf20Sopenharmony_ci worker->flags |= IO_WORKER_F_RUNNING; 6898c2ecf20Sopenharmony_ci io_wqe_inc_running(worker); 6908c2ecf20Sopenharmony_ci} 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci/* 6938c2ecf20Sopenharmony_ci * Called when worker is going to sleep. If there are no workers currently 6948c2ecf20Sopenharmony_ci * running and we have work pending, wake up a free one or create a new one. 6958c2ecf20Sopenharmony_ci */ 6968c2ecf20Sopenharmony_civoid io_wq_worker_sleeping(struct task_struct *tsk) 6978c2ecf20Sopenharmony_ci{ 6988c2ecf20Sopenharmony_ci struct io_worker *worker = tsk->pf_io_worker; 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci if (!worker) 7018c2ecf20Sopenharmony_ci return; 7028c2ecf20Sopenharmony_ci if (!(worker->flags & IO_WORKER_F_UP)) 7038c2ecf20Sopenharmony_ci return; 7048c2ecf20Sopenharmony_ci if (!(worker->flags & IO_WORKER_F_RUNNING)) 7058c2ecf20Sopenharmony_ci return; 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_ci worker->flags &= ~IO_WORKER_F_RUNNING; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci raw_spin_lock(&worker->wqe->lock); 7108c2ecf20Sopenharmony_ci io_wqe_dec_running(worker); 7118c2ecf20Sopenharmony_ci raw_spin_unlock(&worker->wqe->lock); 7128c2ecf20Sopenharmony_ci} 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistatic void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, 7158c2ecf20Sopenharmony_ci struct task_struct *tsk) 7168c2ecf20Sopenharmony_ci{ 7178c2ecf20Sopenharmony_ci tsk->pf_io_worker = worker; 7188c2ecf20Sopenharmony_ci worker->task = tsk; 7198c2ecf20Sopenharmony_ci set_cpus_allowed_ptr(tsk, wqe->cpu_mask); 7208c2ecf20Sopenharmony_ci tsk->flags |= PF_NO_SETAFFINITY; 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 7238c2ecf20Sopenharmony_ci hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); 7248c2ecf20Sopenharmony_ci list_add_tail_rcu(&worker->all_list, &wqe->all_list); 7258c2ecf20Sopenharmony_ci worker->flags |= IO_WORKER_F_FREE; 7268c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 7278c2ecf20Sopenharmony_ci wake_up_new_task(tsk); 7288c2ecf20Sopenharmony_ci} 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_cistatic bool io_wq_work_match_all(struct io_wq_work *work, void *data) 7318c2ecf20Sopenharmony_ci{ 7328c2ecf20Sopenharmony_ci return true; 7338c2ecf20Sopenharmony_ci} 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_cistatic inline bool io_should_retry_thread(long err) 7368c2ecf20Sopenharmony_ci{ 7378c2ecf20Sopenharmony_ci /* 7388c2ecf20Sopenharmony_ci * Prevent perpetual task_work retry, if the task (or its group) is 7398c2ecf20Sopenharmony_ci * exiting. 7408c2ecf20Sopenharmony_ci */ 7418c2ecf20Sopenharmony_ci if (fatal_signal_pending(current)) 7428c2ecf20Sopenharmony_ci return false; 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci switch (err) { 7458c2ecf20Sopenharmony_ci case -EAGAIN: 7468c2ecf20Sopenharmony_ci case -ERESTARTSYS: 7478c2ecf20Sopenharmony_ci case -ERESTARTNOINTR: 7488c2ecf20Sopenharmony_ci case -ERESTARTNOHAND: 7498c2ecf20Sopenharmony_ci return true; 7508c2ecf20Sopenharmony_ci default: 7518c2ecf20Sopenharmony_ci return false; 7528c2ecf20Sopenharmony_ci } 7538c2ecf20Sopenharmony_ci} 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_cistatic void create_worker_cont(struct callback_head *cb) 7568c2ecf20Sopenharmony_ci{ 7578c2ecf20Sopenharmony_ci struct io_worker *worker; 7588c2ecf20Sopenharmony_ci struct task_struct *tsk; 7598c2ecf20Sopenharmony_ci struct io_wqe *wqe; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci worker = container_of(cb, struct io_worker, create_work); 7628c2ecf20Sopenharmony_ci clear_bit_unlock(0, &worker->create_state); 7638c2ecf20Sopenharmony_ci wqe = worker->wqe; 7648c2ecf20Sopenharmony_ci tsk = create_io_thread(io_wqe_worker, worker, wqe->node); 7658c2ecf20Sopenharmony_ci if (!IS_ERR(tsk)) { 7668c2ecf20Sopenharmony_ci io_init_new_worker(wqe, worker, tsk); 7678c2ecf20Sopenharmony_ci io_worker_release(worker); 7688c2ecf20Sopenharmony_ci return; 7698c2ecf20Sopenharmony_ci } else if (!io_should_retry_thread(PTR_ERR(tsk))) { 7708c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci atomic_dec(&acct->nr_running); 7738c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 7748c2ecf20Sopenharmony_ci acct->nr_workers--; 7758c2ecf20Sopenharmony_ci if (!acct->nr_workers) { 7768c2ecf20Sopenharmony_ci struct io_cb_cancel_data match = { 7778c2ecf20Sopenharmony_ci .fn = io_wq_work_match_all, 7788c2ecf20Sopenharmony_ci .cancel_all = true, 7798c2ecf20Sopenharmony_ci }; 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci while (io_acct_cancel_pending_work(wqe, acct, &match)) 7828c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 7838c2ecf20Sopenharmony_ci } 7848c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 7858c2ecf20Sopenharmony_ci io_worker_ref_put(wqe->wq); 7868c2ecf20Sopenharmony_ci kfree(worker); 7878c2ecf20Sopenharmony_ci return; 7888c2ecf20Sopenharmony_ci } 7898c2ecf20Sopenharmony_ci 7908c2ecf20Sopenharmony_ci /* re-create attempts grab a new worker ref, drop the existing one */ 7918c2ecf20Sopenharmony_ci io_worker_release(worker); 7928c2ecf20Sopenharmony_ci schedule_work(&worker->work); 7938c2ecf20Sopenharmony_ci} 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_cistatic void io_workqueue_create(struct work_struct *work) 7968c2ecf20Sopenharmony_ci{ 7978c2ecf20Sopenharmony_ci struct io_worker *worker = container_of(work, struct io_worker, work); 7988c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_wqe_get_acct(worker); 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci if (!io_queue_worker_create(worker, acct, create_worker_cont)) 8018c2ecf20Sopenharmony_ci kfree(worker); 8028c2ecf20Sopenharmony_ci} 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_cistatic bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) 8058c2ecf20Sopenharmony_ci{ 8068c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = &wqe->acct[index]; 8078c2ecf20Sopenharmony_ci struct io_worker *worker; 8088c2ecf20Sopenharmony_ci struct task_struct *tsk; 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); 8138c2ecf20Sopenharmony_ci if (!worker) { 8148c2ecf20Sopenharmony_cifail: 8158c2ecf20Sopenharmony_ci atomic_dec(&acct->nr_running); 8168c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 8178c2ecf20Sopenharmony_ci acct->nr_workers--; 8188c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 8198c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 8208c2ecf20Sopenharmony_ci return false; 8218c2ecf20Sopenharmony_ci } 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci refcount_set(&worker->ref, 1); 8248c2ecf20Sopenharmony_ci worker->wqe = wqe; 8258c2ecf20Sopenharmony_ci spin_lock_init(&worker->lock); 8268c2ecf20Sopenharmony_ci init_completion(&worker->ref_done); 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci if (index == IO_WQ_ACCT_BOUND) 8298c2ecf20Sopenharmony_ci worker->flags |= IO_WORKER_F_BOUND; 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci tsk = create_io_thread(io_wqe_worker, worker, wqe->node); 8328c2ecf20Sopenharmony_ci if (!IS_ERR(tsk)) { 8338c2ecf20Sopenharmony_ci io_init_new_worker(wqe, worker, tsk); 8348c2ecf20Sopenharmony_ci } else if (!io_should_retry_thread(PTR_ERR(tsk))) { 8358c2ecf20Sopenharmony_ci kfree(worker); 8368c2ecf20Sopenharmony_ci goto fail; 8378c2ecf20Sopenharmony_ci } else { 8388c2ecf20Sopenharmony_ci INIT_WORK(&worker->work, io_workqueue_create); 8398c2ecf20Sopenharmony_ci schedule_work(&worker->work); 8408c2ecf20Sopenharmony_ci } 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci return true; 8438c2ecf20Sopenharmony_ci} 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci/* 8468c2ecf20Sopenharmony_ci * Iterate the passed in list and call the specific function for each 8478c2ecf20Sopenharmony_ci * worker that isn't exiting 8488c2ecf20Sopenharmony_ci */ 8498c2ecf20Sopenharmony_cistatic bool io_wq_for_each_worker(struct io_wqe *wqe, 8508c2ecf20Sopenharmony_ci bool (*func)(struct io_worker *, void *), 8518c2ecf20Sopenharmony_ci void *data) 8528c2ecf20Sopenharmony_ci{ 8538c2ecf20Sopenharmony_ci struct io_worker *worker; 8548c2ecf20Sopenharmony_ci bool ret = false; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { 8578c2ecf20Sopenharmony_ci if (io_worker_get(worker)) { 8588c2ecf20Sopenharmony_ci /* no task if node is/was offline */ 8598c2ecf20Sopenharmony_ci if (worker->task) 8608c2ecf20Sopenharmony_ci ret = func(worker, data); 8618c2ecf20Sopenharmony_ci io_worker_release(worker); 8628c2ecf20Sopenharmony_ci if (ret) 8638c2ecf20Sopenharmony_ci break; 8648c2ecf20Sopenharmony_ci } 8658c2ecf20Sopenharmony_ci } 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_ci return ret; 8688c2ecf20Sopenharmony_ci} 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_cistatic bool io_wq_worker_wake(struct io_worker *worker, void *data) 8718c2ecf20Sopenharmony_ci{ 8728c2ecf20Sopenharmony_ci set_notify_signal(worker->task); 8738c2ecf20Sopenharmony_ci wake_up_process(worker->task); 8748c2ecf20Sopenharmony_ci return false; 8758c2ecf20Sopenharmony_ci} 8768c2ecf20Sopenharmony_ci 8778c2ecf20Sopenharmony_cistatic void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) 8788c2ecf20Sopenharmony_ci{ 8798c2ecf20Sopenharmony_ci struct io_wq *wq = wqe->wq; 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci do { 8828c2ecf20Sopenharmony_ci work->flags |= IO_WQ_WORK_CANCEL; 8838c2ecf20Sopenharmony_ci wq->do_work(work); 8848c2ecf20Sopenharmony_ci work = wq->free_work(work); 8858c2ecf20Sopenharmony_ci } while (work); 8868c2ecf20Sopenharmony_ci} 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_cistatic void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) 8898c2ecf20Sopenharmony_ci{ 8908c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_work_get_acct(wqe, work); 8918c2ecf20Sopenharmony_ci unsigned int hash; 8928c2ecf20Sopenharmony_ci struct io_wq_work *tail; 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci if (!io_wq_is_hashed(work)) { 8958c2ecf20Sopenharmony_ciappend: 8968c2ecf20Sopenharmony_ci wq_list_add_tail(&work->list, &acct->work_list); 8978c2ecf20Sopenharmony_ci return; 8988c2ecf20Sopenharmony_ci } 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ci hash = io_get_work_hash(work); 9018c2ecf20Sopenharmony_ci tail = wqe->hash_tail[hash]; 9028c2ecf20Sopenharmony_ci wqe->hash_tail[hash] = work; 9038c2ecf20Sopenharmony_ci if (!tail) 9048c2ecf20Sopenharmony_ci goto append; 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci wq_list_add_after(&work->list, &tail->list, &acct->work_list); 9078c2ecf20Sopenharmony_ci} 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_cistatic bool io_wq_work_match_item(struct io_wq_work *work, void *data) 9108c2ecf20Sopenharmony_ci{ 9118c2ecf20Sopenharmony_ci return work == data; 9128c2ecf20Sopenharmony_ci} 9138c2ecf20Sopenharmony_ci 9148c2ecf20Sopenharmony_cistatic void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) 9158c2ecf20Sopenharmony_ci{ 9168c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_work_get_acct(wqe, work); 9178c2ecf20Sopenharmony_ci unsigned work_flags = work->flags; 9188c2ecf20Sopenharmony_ci bool do_create; 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci /* 9218c2ecf20Sopenharmony_ci * If io-wq is exiting for this task, or if the request has explicitly 9228c2ecf20Sopenharmony_ci * been marked as one that should not get executed, cancel it here. 9238c2ecf20Sopenharmony_ci */ 9248c2ecf20Sopenharmony_ci if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || 9258c2ecf20Sopenharmony_ci (work->flags & IO_WQ_WORK_CANCEL)) { 9268c2ecf20Sopenharmony_ci io_run_cancel(work, wqe); 9278c2ecf20Sopenharmony_ci return; 9288c2ecf20Sopenharmony_ci } 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 9318c2ecf20Sopenharmony_ci io_wqe_insert_work(wqe, work); 9328c2ecf20Sopenharmony_ci clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci rcu_read_lock(); 9358c2ecf20Sopenharmony_ci do_create = !io_wqe_activate_free_worker(wqe, acct); 9368c2ecf20Sopenharmony_ci rcu_read_unlock(); 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ci if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || 9418c2ecf20Sopenharmony_ci !atomic_read(&acct->nr_running))) { 9428c2ecf20Sopenharmony_ci bool did_create; 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci did_create = io_wqe_create_worker(wqe, acct); 9458c2ecf20Sopenharmony_ci if (likely(did_create)) 9468c2ecf20Sopenharmony_ci return; 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 9498c2ecf20Sopenharmony_ci /* fatal condition, failed to create the first worker */ 9508c2ecf20Sopenharmony_ci if (!acct->nr_workers) { 9518c2ecf20Sopenharmony_ci struct io_cb_cancel_data match = { 9528c2ecf20Sopenharmony_ci .fn = io_wq_work_match_item, 9538c2ecf20Sopenharmony_ci .data = work, 9548c2ecf20Sopenharmony_ci .cancel_all = false, 9558c2ecf20Sopenharmony_ci }; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci if (io_acct_cancel_pending_work(wqe, acct, &match)) 9588c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 9598c2ecf20Sopenharmony_ci } 9608c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 9618c2ecf20Sopenharmony_ci } 9628c2ecf20Sopenharmony_ci} 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_civoid io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) 9658c2ecf20Sopenharmony_ci{ 9668c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[numa_node_id()]; 9678c2ecf20Sopenharmony_ci 9688c2ecf20Sopenharmony_ci io_wqe_enqueue(wqe, work); 9698c2ecf20Sopenharmony_ci} 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_ci/* 9728c2ecf20Sopenharmony_ci * Work items that hash to the same value will not be done in parallel. 9738c2ecf20Sopenharmony_ci * Used to limit concurrent writes, generally hashed by inode. 9748c2ecf20Sopenharmony_ci */ 9758c2ecf20Sopenharmony_civoid io_wq_hash_work(struct io_wq_work *work, void *val) 9768c2ecf20Sopenharmony_ci{ 9778c2ecf20Sopenharmony_ci unsigned int bit; 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_ci bit = hash_ptr(val, IO_WQ_HASH_ORDER); 9808c2ecf20Sopenharmony_ci work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); 9818c2ecf20Sopenharmony_ci} 9828c2ecf20Sopenharmony_ci 9838c2ecf20Sopenharmony_cistatic bool io_wq_worker_cancel(struct io_worker *worker, void *data) 9848c2ecf20Sopenharmony_ci{ 9858c2ecf20Sopenharmony_ci struct io_cb_cancel_data *match = data; 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci /* 9888c2ecf20Sopenharmony_ci * Hold the lock to avoid ->cur_work going out of scope, caller 9898c2ecf20Sopenharmony_ci * may dereference the passed in work. 9908c2ecf20Sopenharmony_ci */ 9918c2ecf20Sopenharmony_ci spin_lock(&worker->lock); 9928c2ecf20Sopenharmony_ci if (worker->cur_work && 9938c2ecf20Sopenharmony_ci match->fn(worker->cur_work, match->data)) { 9948c2ecf20Sopenharmony_ci set_notify_signal(worker->task); 9958c2ecf20Sopenharmony_ci match->nr_running++; 9968c2ecf20Sopenharmony_ci } 9978c2ecf20Sopenharmony_ci spin_unlock(&worker->lock); 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_ci return match->nr_running && !match->cancel_all; 10008c2ecf20Sopenharmony_ci} 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_cistatic inline void io_wqe_remove_pending(struct io_wqe *wqe, 10038c2ecf20Sopenharmony_ci struct io_wq_work *work, 10048c2ecf20Sopenharmony_ci struct io_wq_work_node *prev) 10058c2ecf20Sopenharmony_ci{ 10068c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_work_get_acct(wqe, work); 10078c2ecf20Sopenharmony_ci unsigned int hash = io_get_work_hash(work); 10088c2ecf20Sopenharmony_ci struct io_wq_work *prev_work = NULL; 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { 10118c2ecf20Sopenharmony_ci if (prev) 10128c2ecf20Sopenharmony_ci prev_work = container_of(prev, struct io_wq_work, list); 10138c2ecf20Sopenharmony_ci if (prev_work && io_get_work_hash(prev_work) == hash) 10148c2ecf20Sopenharmony_ci wqe->hash_tail[hash] = prev_work; 10158c2ecf20Sopenharmony_ci else 10168c2ecf20Sopenharmony_ci wqe->hash_tail[hash] = NULL; 10178c2ecf20Sopenharmony_ci } 10188c2ecf20Sopenharmony_ci wq_list_del(&acct->work_list, &work->list, prev); 10198c2ecf20Sopenharmony_ci} 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_cistatic bool io_acct_cancel_pending_work(struct io_wqe *wqe, 10228c2ecf20Sopenharmony_ci struct io_wqe_acct *acct, 10238c2ecf20Sopenharmony_ci struct io_cb_cancel_data *match) 10248c2ecf20Sopenharmony_ci __releases(wqe->lock) 10258c2ecf20Sopenharmony_ci{ 10268c2ecf20Sopenharmony_ci struct io_wq_work_node *node, *prev; 10278c2ecf20Sopenharmony_ci struct io_wq_work *work; 10288c2ecf20Sopenharmony_ci 10298c2ecf20Sopenharmony_ci wq_list_for_each(node, prev, &acct->work_list) { 10308c2ecf20Sopenharmony_ci work = container_of(node, struct io_wq_work, list); 10318c2ecf20Sopenharmony_ci if (!match->fn(work, match->data)) 10328c2ecf20Sopenharmony_ci continue; 10338c2ecf20Sopenharmony_ci io_wqe_remove_pending(wqe, work, prev); 10348c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 10358c2ecf20Sopenharmony_ci io_run_cancel(work, wqe); 10368c2ecf20Sopenharmony_ci match->nr_pending++; 10378c2ecf20Sopenharmony_ci /* not safe to continue after unlock */ 10388c2ecf20Sopenharmony_ci return true; 10398c2ecf20Sopenharmony_ci } 10408c2ecf20Sopenharmony_ci 10418c2ecf20Sopenharmony_ci return false; 10428c2ecf20Sopenharmony_ci} 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_cistatic void io_wqe_cancel_pending_work(struct io_wqe *wqe, 10458c2ecf20Sopenharmony_ci struct io_cb_cancel_data *match) 10468c2ecf20Sopenharmony_ci{ 10478c2ecf20Sopenharmony_ci int i; 10488c2ecf20Sopenharmony_ciretry: 10498c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 10508c2ecf20Sopenharmony_ci for (i = 0; i < IO_WQ_ACCT_NR; i++) { 10518c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci if (io_acct_cancel_pending_work(wqe, acct, match)) { 10548c2ecf20Sopenharmony_ci if (match->cancel_all) 10558c2ecf20Sopenharmony_ci goto retry; 10568c2ecf20Sopenharmony_ci return; 10578c2ecf20Sopenharmony_ci } 10588c2ecf20Sopenharmony_ci } 10598c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 10608c2ecf20Sopenharmony_ci} 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_cistatic void io_wqe_cancel_running_work(struct io_wqe *wqe, 10638c2ecf20Sopenharmony_ci struct io_cb_cancel_data *match) 10648c2ecf20Sopenharmony_ci{ 10658c2ecf20Sopenharmony_ci rcu_read_lock(); 10668c2ecf20Sopenharmony_ci io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); 10678c2ecf20Sopenharmony_ci rcu_read_unlock(); 10688c2ecf20Sopenharmony_ci} 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_cienum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, 10718c2ecf20Sopenharmony_ci void *data, bool cancel_all) 10728c2ecf20Sopenharmony_ci{ 10738c2ecf20Sopenharmony_ci struct io_cb_cancel_data match = { 10748c2ecf20Sopenharmony_ci .fn = cancel, 10758c2ecf20Sopenharmony_ci .data = data, 10768c2ecf20Sopenharmony_ci .cancel_all = cancel_all, 10778c2ecf20Sopenharmony_ci }; 10788c2ecf20Sopenharmony_ci int node; 10798c2ecf20Sopenharmony_ci 10808c2ecf20Sopenharmony_ci /* 10818c2ecf20Sopenharmony_ci * First check pending list, if we're lucky we can just remove it 10828c2ecf20Sopenharmony_ci * from there. CANCEL_OK means that the work is returned as-new, 10838c2ecf20Sopenharmony_ci * no completion will be posted for it. 10848c2ecf20Sopenharmony_ci */ 10858c2ecf20Sopenharmony_ci for_each_node(node) { 10868c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[node]; 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_ci io_wqe_cancel_pending_work(wqe, &match); 10898c2ecf20Sopenharmony_ci if (match.nr_pending && !match.cancel_all) 10908c2ecf20Sopenharmony_ci return IO_WQ_CANCEL_OK; 10918c2ecf20Sopenharmony_ci } 10928c2ecf20Sopenharmony_ci 10938c2ecf20Sopenharmony_ci /* 10948c2ecf20Sopenharmony_ci * Now check if a free (going busy) or busy worker has the work 10958c2ecf20Sopenharmony_ci * currently running. If we find it there, we'll return CANCEL_RUNNING 10968c2ecf20Sopenharmony_ci * as an indication that we attempt to signal cancellation. The 10978c2ecf20Sopenharmony_ci * completion will run normally in this case. 10988c2ecf20Sopenharmony_ci */ 10998c2ecf20Sopenharmony_ci for_each_node(node) { 11008c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[node]; 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_ci io_wqe_cancel_running_work(wqe, &match); 11038c2ecf20Sopenharmony_ci if (match.nr_running && !match.cancel_all) 11048c2ecf20Sopenharmony_ci return IO_WQ_CANCEL_RUNNING; 11058c2ecf20Sopenharmony_ci } 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci if (match.nr_running) 11088c2ecf20Sopenharmony_ci return IO_WQ_CANCEL_RUNNING; 11098c2ecf20Sopenharmony_ci if (match.nr_pending) 11108c2ecf20Sopenharmony_ci return IO_WQ_CANCEL_OK; 11118c2ecf20Sopenharmony_ci return IO_WQ_CANCEL_NOTFOUND; 11128c2ecf20Sopenharmony_ci} 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_cistatic int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode, 11158c2ecf20Sopenharmony_ci int sync, void *key) 11168c2ecf20Sopenharmony_ci{ 11178c2ecf20Sopenharmony_ci struct io_wqe *wqe = container_of(wait, struct io_wqe, wait); 11188c2ecf20Sopenharmony_ci int i; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci list_del_init(&wait->entry); 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci rcu_read_lock(); 11238c2ecf20Sopenharmony_ci for (i = 0; i < IO_WQ_ACCT_NR; i++) { 11248c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = &wqe->acct[i]; 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_ci if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) 11278c2ecf20Sopenharmony_ci io_wqe_activate_free_worker(wqe, acct); 11288c2ecf20Sopenharmony_ci } 11298c2ecf20Sopenharmony_ci rcu_read_unlock(); 11308c2ecf20Sopenharmony_ci return 1; 11318c2ecf20Sopenharmony_ci} 11328c2ecf20Sopenharmony_ci 11338c2ecf20Sopenharmony_cistruct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) 11348c2ecf20Sopenharmony_ci{ 11358c2ecf20Sopenharmony_ci int ret, node, i; 11368c2ecf20Sopenharmony_ci struct io_wq *wq; 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!data->free_work || !data->do_work)) 11398c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 11408c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!bounded)) 11418c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL); 11448c2ecf20Sopenharmony_ci if (!wq) 11458c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 11468c2ecf20Sopenharmony_ci ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); 11478c2ecf20Sopenharmony_ci if (ret) 11488c2ecf20Sopenharmony_ci goto err_wq; 11498c2ecf20Sopenharmony_ci 11508c2ecf20Sopenharmony_ci refcount_inc(&data->hash->refs); 11518c2ecf20Sopenharmony_ci wq->hash = data->hash; 11528c2ecf20Sopenharmony_ci wq->free_work = data->free_work; 11538c2ecf20Sopenharmony_ci wq->do_work = data->do_work; 11548c2ecf20Sopenharmony_ci 11558c2ecf20Sopenharmony_ci ret = -ENOMEM; 11568c2ecf20Sopenharmony_ci for_each_node(node) { 11578c2ecf20Sopenharmony_ci struct io_wqe *wqe; 11588c2ecf20Sopenharmony_ci int alloc_node = node; 11598c2ecf20Sopenharmony_ci 11608c2ecf20Sopenharmony_ci if (!node_online(alloc_node)) 11618c2ecf20Sopenharmony_ci alloc_node = NUMA_NO_NODE; 11628c2ecf20Sopenharmony_ci wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); 11638c2ecf20Sopenharmony_ci if (!wqe) 11648c2ecf20Sopenharmony_ci goto err; 11658c2ecf20Sopenharmony_ci wq->wqes[node] = wqe; 11668c2ecf20Sopenharmony_ci if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) 11678c2ecf20Sopenharmony_ci goto err; 11688c2ecf20Sopenharmony_ci cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); 11698c2ecf20Sopenharmony_ci wqe->node = alloc_node; 11708c2ecf20Sopenharmony_ci wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; 11718c2ecf20Sopenharmony_ci wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = 11728c2ecf20Sopenharmony_ci task_rlimit(current, RLIMIT_NPROC); 11738c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&wqe->wait.entry); 11748c2ecf20Sopenharmony_ci wqe->wait.func = io_wqe_hash_wake; 11758c2ecf20Sopenharmony_ci for (i = 0; i < IO_WQ_ACCT_NR; i++) { 11768c2ecf20Sopenharmony_ci struct io_wqe_acct *acct = &wqe->acct[i]; 11778c2ecf20Sopenharmony_ci 11788c2ecf20Sopenharmony_ci acct->index = i; 11798c2ecf20Sopenharmony_ci atomic_set(&acct->nr_running, 0); 11808c2ecf20Sopenharmony_ci INIT_WQ_LIST(&acct->work_list); 11818c2ecf20Sopenharmony_ci } 11828c2ecf20Sopenharmony_ci wqe->wq = wq; 11838c2ecf20Sopenharmony_ci raw_spin_lock_init(&wqe->lock); 11848c2ecf20Sopenharmony_ci INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); 11858c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&wqe->all_list); 11868c2ecf20Sopenharmony_ci } 11878c2ecf20Sopenharmony_ci 11888c2ecf20Sopenharmony_ci wq->task = get_task_struct(data->task); 11898c2ecf20Sopenharmony_ci atomic_set(&wq->worker_refs, 1); 11908c2ecf20Sopenharmony_ci init_completion(&wq->worker_done); 11918c2ecf20Sopenharmony_ci return wq; 11928c2ecf20Sopenharmony_cierr: 11938c2ecf20Sopenharmony_ci io_wq_put_hash(data->hash); 11948c2ecf20Sopenharmony_ci cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); 11958c2ecf20Sopenharmony_ci for_each_node(node) { 11968c2ecf20Sopenharmony_ci if (!wq->wqes[node]) 11978c2ecf20Sopenharmony_ci continue; 11988c2ecf20Sopenharmony_ci free_cpumask_var(wq->wqes[node]->cpu_mask); 11998c2ecf20Sopenharmony_ci kfree(wq->wqes[node]); 12008c2ecf20Sopenharmony_ci } 12018c2ecf20Sopenharmony_cierr_wq: 12028c2ecf20Sopenharmony_ci kfree(wq); 12038c2ecf20Sopenharmony_ci return ERR_PTR(ret); 12048c2ecf20Sopenharmony_ci} 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_cistatic bool io_task_work_match(struct callback_head *cb, void *data) 12078c2ecf20Sopenharmony_ci{ 12088c2ecf20Sopenharmony_ci struct io_worker *worker; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci if (cb->func != create_worker_cb && cb->func != create_worker_cont) 12118c2ecf20Sopenharmony_ci return false; 12128c2ecf20Sopenharmony_ci worker = container_of(cb, struct io_worker, create_work); 12138c2ecf20Sopenharmony_ci return worker->wqe->wq == data; 12148c2ecf20Sopenharmony_ci} 12158c2ecf20Sopenharmony_ci 12168c2ecf20Sopenharmony_civoid io_wq_exit_start(struct io_wq *wq) 12178c2ecf20Sopenharmony_ci{ 12188c2ecf20Sopenharmony_ci set_bit(IO_WQ_BIT_EXIT, &wq->state); 12198c2ecf20Sopenharmony_ci} 12208c2ecf20Sopenharmony_ci 12218c2ecf20Sopenharmony_cistatic void io_wq_cancel_tw_create(struct io_wq *wq) 12228c2ecf20Sopenharmony_ci{ 12238c2ecf20Sopenharmony_ci struct callback_head *cb; 12248c2ecf20Sopenharmony_ci 12258c2ecf20Sopenharmony_ci while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { 12268c2ecf20Sopenharmony_ci struct io_worker *worker; 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_ci worker = container_of(cb, struct io_worker, create_work); 12298c2ecf20Sopenharmony_ci io_worker_cancel_cb(worker); 12308c2ecf20Sopenharmony_ci /* 12318c2ecf20Sopenharmony_ci * Only the worker continuation helper has worker allocated and 12328c2ecf20Sopenharmony_ci * hence needs freeing. 12338c2ecf20Sopenharmony_ci */ 12348c2ecf20Sopenharmony_ci if (cb->func == create_worker_cont) 12358c2ecf20Sopenharmony_ci kfree(worker); 12368c2ecf20Sopenharmony_ci } 12378c2ecf20Sopenharmony_ci} 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_cistatic void io_wq_exit_workers(struct io_wq *wq) 12408c2ecf20Sopenharmony_ci{ 12418c2ecf20Sopenharmony_ci int node; 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci if (!wq->task) 12448c2ecf20Sopenharmony_ci return; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci io_wq_cancel_tw_create(wq); 12478c2ecf20Sopenharmony_ci 12488c2ecf20Sopenharmony_ci rcu_read_lock(); 12498c2ecf20Sopenharmony_ci for_each_node(node) { 12508c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[node]; 12518c2ecf20Sopenharmony_ci 12528c2ecf20Sopenharmony_ci io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); 12538c2ecf20Sopenharmony_ci } 12548c2ecf20Sopenharmony_ci rcu_read_unlock(); 12558c2ecf20Sopenharmony_ci io_worker_ref_put(wq); 12568c2ecf20Sopenharmony_ci wait_for_completion(&wq->worker_done); 12578c2ecf20Sopenharmony_ci 12588c2ecf20Sopenharmony_ci for_each_node(node) { 12598c2ecf20Sopenharmony_ci spin_lock_irq(&wq->hash->wait.lock); 12608c2ecf20Sopenharmony_ci list_del_init(&wq->wqes[node]->wait.entry); 12618c2ecf20Sopenharmony_ci spin_unlock_irq(&wq->hash->wait.lock); 12628c2ecf20Sopenharmony_ci } 12638c2ecf20Sopenharmony_ci put_task_struct(wq->task); 12648c2ecf20Sopenharmony_ci wq->task = NULL; 12658c2ecf20Sopenharmony_ci} 12668c2ecf20Sopenharmony_ci 12678c2ecf20Sopenharmony_cistatic void io_wq_destroy(struct io_wq *wq) 12688c2ecf20Sopenharmony_ci{ 12698c2ecf20Sopenharmony_ci int node; 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_ci cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); 12728c2ecf20Sopenharmony_ci 12738c2ecf20Sopenharmony_ci for_each_node(node) { 12748c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[node]; 12758c2ecf20Sopenharmony_ci struct io_cb_cancel_data match = { 12768c2ecf20Sopenharmony_ci .fn = io_wq_work_match_all, 12778c2ecf20Sopenharmony_ci .cancel_all = true, 12788c2ecf20Sopenharmony_ci }; 12798c2ecf20Sopenharmony_ci io_wqe_cancel_pending_work(wqe, &match); 12808c2ecf20Sopenharmony_ci free_cpumask_var(wqe->cpu_mask); 12818c2ecf20Sopenharmony_ci kfree(wqe); 12828c2ecf20Sopenharmony_ci } 12838c2ecf20Sopenharmony_ci io_wq_put_hash(wq->hash); 12848c2ecf20Sopenharmony_ci kfree(wq); 12858c2ecf20Sopenharmony_ci} 12868c2ecf20Sopenharmony_ci 12878c2ecf20Sopenharmony_civoid io_wq_put_and_exit(struct io_wq *wq) 12888c2ecf20Sopenharmony_ci{ 12898c2ecf20Sopenharmony_ci WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci io_wq_exit_workers(wq); 12928c2ecf20Sopenharmony_ci io_wq_destroy(wq); 12938c2ecf20Sopenharmony_ci} 12948c2ecf20Sopenharmony_ci 12958c2ecf20Sopenharmony_cistruct online_data { 12968c2ecf20Sopenharmony_ci unsigned int cpu; 12978c2ecf20Sopenharmony_ci bool online; 12988c2ecf20Sopenharmony_ci}; 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_cistatic bool io_wq_worker_affinity(struct io_worker *worker, void *data) 13018c2ecf20Sopenharmony_ci{ 13028c2ecf20Sopenharmony_ci struct online_data *od = data; 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci if (od->online) 13058c2ecf20Sopenharmony_ci cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask); 13068c2ecf20Sopenharmony_ci else 13078c2ecf20Sopenharmony_ci cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask); 13088c2ecf20Sopenharmony_ci return false; 13098c2ecf20Sopenharmony_ci} 13108c2ecf20Sopenharmony_ci 13118c2ecf20Sopenharmony_cistatic int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) 13128c2ecf20Sopenharmony_ci{ 13138c2ecf20Sopenharmony_ci struct online_data od = { 13148c2ecf20Sopenharmony_ci .cpu = cpu, 13158c2ecf20Sopenharmony_ci .online = online 13168c2ecf20Sopenharmony_ci }; 13178c2ecf20Sopenharmony_ci int i; 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci rcu_read_lock(); 13208c2ecf20Sopenharmony_ci for_each_node(i) 13218c2ecf20Sopenharmony_ci io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od); 13228c2ecf20Sopenharmony_ci rcu_read_unlock(); 13238c2ecf20Sopenharmony_ci return 0; 13248c2ecf20Sopenharmony_ci} 13258c2ecf20Sopenharmony_ci 13268c2ecf20Sopenharmony_cistatic int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) 13278c2ecf20Sopenharmony_ci{ 13288c2ecf20Sopenharmony_ci struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_ci return __io_wq_cpu_online(wq, cpu, true); 13318c2ecf20Sopenharmony_ci} 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_cistatic int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) 13348c2ecf20Sopenharmony_ci{ 13358c2ecf20Sopenharmony_ci struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_ci return __io_wq_cpu_online(wq, cpu, false); 13388c2ecf20Sopenharmony_ci} 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ciint io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) 13418c2ecf20Sopenharmony_ci{ 13428c2ecf20Sopenharmony_ci int i; 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci rcu_read_lock(); 13458c2ecf20Sopenharmony_ci for_each_node(i) { 13468c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[i]; 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci if (mask) 13498c2ecf20Sopenharmony_ci cpumask_copy(wqe->cpu_mask, mask); 13508c2ecf20Sopenharmony_ci else 13518c2ecf20Sopenharmony_ci cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); 13528c2ecf20Sopenharmony_ci } 13538c2ecf20Sopenharmony_ci rcu_read_unlock(); 13548c2ecf20Sopenharmony_ci return 0; 13558c2ecf20Sopenharmony_ci} 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci/* 13588c2ecf20Sopenharmony_ci * Set max number of unbounded workers, returns old value. If new_count is 0, 13598c2ecf20Sopenharmony_ci * then just return the old value. 13608c2ecf20Sopenharmony_ci */ 13618c2ecf20Sopenharmony_ciint io_wq_max_workers(struct io_wq *wq, int *new_count) 13628c2ecf20Sopenharmony_ci{ 13638c2ecf20Sopenharmony_ci int prev[IO_WQ_ACCT_NR]; 13648c2ecf20Sopenharmony_ci bool first_node = true; 13658c2ecf20Sopenharmony_ci int i, node; 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); 13688c2ecf20Sopenharmony_ci BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); 13698c2ecf20Sopenharmony_ci BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); 13708c2ecf20Sopenharmony_ci 13718c2ecf20Sopenharmony_ci for (i = 0; i < 2; i++) { 13728c2ecf20Sopenharmony_ci if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) 13738c2ecf20Sopenharmony_ci new_count[i] = task_rlimit(current, RLIMIT_NPROC); 13748c2ecf20Sopenharmony_ci } 13758c2ecf20Sopenharmony_ci 13768c2ecf20Sopenharmony_ci for (i = 0; i < IO_WQ_ACCT_NR; i++) 13778c2ecf20Sopenharmony_ci prev[i] = 0; 13788c2ecf20Sopenharmony_ci 13798c2ecf20Sopenharmony_ci rcu_read_lock(); 13808c2ecf20Sopenharmony_ci for_each_node(node) { 13818c2ecf20Sopenharmony_ci struct io_wqe *wqe = wq->wqes[node]; 13828c2ecf20Sopenharmony_ci struct io_wqe_acct *acct; 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_ci raw_spin_lock(&wqe->lock); 13858c2ecf20Sopenharmony_ci for (i = 0; i < IO_WQ_ACCT_NR; i++) { 13868c2ecf20Sopenharmony_ci acct = &wqe->acct[i]; 13878c2ecf20Sopenharmony_ci if (first_node) 13888c2ecf20Sopenharmony_ci prev[i] = max_t(int, acct->max_workers, prev[i]); 13898c2ecf20Sopenharmony_ci if (new_count[i]) 13908c2ecf20Sopenharmony_ci acct->max_workers = new_count[i]; 13918c2ecf20Sopenharmony_ci } 13928c2ecf20Sopenharmony_ci raw_spin_unlock(&wqe->lock); 13938c2ecf20Sopenharmony_ci first_node = false; 13948c2ecf20Sopenharmony_ci } 13958c2ecf20Sopenharmony_ci rcu_read_unlock(); 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci for (i = 0; i < IO_WQ_ACCT_NR; i++) 13988c2ecf20Sopenharmony_ci new_count[i] = prev[i]; 13998c2ecf20Sopenharmony_ci 14008c2ecf20Sopenharmony_ci return 0; 14018c2ecf20Sopenharmony_ci} 14028c2ecf20Sopenharmony_ci 14038c2ecf20Sopenharmony_cistatic __init int io_wq_init(void) 14048c2ecf20Sopenharmony_ci{ 14058c2ecf20Sopenharmony_ci int ret; 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", 14088c2ecf20Sopenharmony_ci io_wq_cpu_online, io_wq_cpu_offline); 14098c2ecf20Sopenharmony_ci if (ret < 0) 14108c2ecf20Sopenharmony_ci return ret; 14118c2ecf20Sopenharmony_ci io_wq_online = ret; 14128c2ecf20Sopenharmony_ci return 0; 14138c2ecf20Sopenharmony_ci} 14148c2ecf20Sopenharmony_cisubsys_initcall(io_wq_init); 1415