162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/kernel.h> 362306a36Sopenharmony_ci#include <linux/errno.h> 462306a36Sopenharmony_ci#include <linux/fs.h> 562306a36Sopenharmony_ci#include <linux/file.h> 662306a36Sopenharmony_ci#include <linux/mm.h> 762306a36Sopenharmony_ci#include <linux/slab.h> 862306a36Sopenharmony_ci#include <linux/poll.h> 962306a36Sopenharmony_ci#include <linux/hashtable.h> 1062306a36Sopenharmony_ci#include <linux/io_uring.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <trace/events/io_uring.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#include <uapi/linux/io_uring.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include "io_uring.h" 1762306a36Sopenharmony_ci#include "refs.h" 1862306a36Sopenharmony_ci#include "opdef.h" 1962306a36Sopenharmony_ci#include "kbuf.h" 2062306a36Sopenharmony_ci#include "poll.h" 2162306a36Sopenharmony_ci#include "cancel.h" 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cistruct io_poll_update { 2462306a36Sopenharmony_ci struct file *file; 2562306a36Sopenharmony_ci u64 old_user_data; 2662306a36Sopenharmony_ci u64 new_user_data; 2762306a36Sopenharmony_ci __poll_t events; 2862306a36Sopenharmony_ci bool update_events; 2962306a36Sopenharmony_ci bool update_user_data; 3062306a36Sopenharmony_ci}; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cistruct io_poll_table { 3362306a36Sopenharmony_ci struct poll_table_struct pt; 3462306a36Sopenharmony_ci struct io_kiocb *req; 3562306a36Sopenharmony_ci int nr_entries; 3662306a36Sopenharmony_ci int error; 3762306a36Sopenharmony_ci bool owning; 3862306a36Sopenharmony_ci /* output value, set only if arm poll returns >0 */ 3962306a36Sopenharmony_ci __poll_t result_mask; 4062306a36Sopenharmony_ci}; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci#define IO_POLL_CANCEL_FLAG BIT(31) 4362306a36Sopenharmony_ci#define IO_POLL_RETRY_FLAG BIT(30) 4462306a36Sopenharmony_ci#define IO_POLL_REF_MASK GENMASK(29, 0) 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci * We usually have 1-2 refs taken, 128 is more than enough and we want to 4862306a36Sopenharmony_ci * maximise the margin between this amount and the moment when it overflows. 4962306a36Sopenharmony_ci */ 5062306a36Sopenharmony_ci#define IO_POLL_REF_BIAS 128 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#define IO_WQE_F_DOUBLE 1 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistatic int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 5562306a36Sopenharmony_ci void *key); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistatic inline struct io_kiocb *wqe_to_req(struct wait_queue_entry *wqe) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci unsigned long priv = (unsigned long)wqe->private; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci return (struct io_kiocb *)(priv & ~IO_WQE_F_DOUBLE); 6262306a36Sopenharmony_ci} 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_cistatic inline bool wqe_is_double(struct wait_queue_entry *wqe) 6562306a36Sopenharmony_ci{ 6662306a36Sopenharmony_ci unsigned long priv = (unsigned long)wqe->private; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci return priv & IO_WQE_F_DOUBLE; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic bool io_poll_get_ownership_slowpath(struct io_kiocb *req) 7262306a36Sopenharmony_ci{ 7362306a36Sopenharmony_ci int v; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci /* 7662306a36Sopenharmony_ci * poll_refs are already elevated and we don't have much hope for 7762306a36Sopenharmony_ci * grabbing the ownership. Instead of incrementing set a retry flag 7862306a36Sopenharmony_ci * to notify the loop that there might have been some change. 7962306a36Sopenharmony_ci */ 8062306a36Sopenharmony_ci v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); 8162306a36Sopenharmony_ci if (v & IO_POLL_REF_MASK) 8262306a36Sopenharmony_ci return false; 8362306a36Sopenharmony_ci return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci/* 8762306a36Sopenharmony_ci * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 8862306a36Sopenharmony_ci * bump it and acquire ownership. It's disallowed to modify requests while not 8962306a36Sopenharmony_ci * owning it, that prevents from races for enqueueing task_work's and b/w 9062306a36Sopenharmony_ci * arming poll and wakeups. 9162306a36Sopenharmony_ci */ 9262306a36Sopenharmony_cistatic inline bool io_poll_get_ownership(struct io_kiocb *req) 9362306a36Sopenharmony_ci{ 9462306a36Sopenharmony_ci if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) 9562306a36Sopenharmony_ci return io_poll_get_ownership_slowpath(req); 9662306a36Sopenharmony_ci return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 9762306a36Sopenharmony_ci} 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_cistatic void io_poll_mark_cancelled(struct io_kiocb *req) 10062306a36Sopenharmony_ci{ 10162306a36Sopenharmony_ci atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_cistatic struct io_poll *io_poll_get_double(struct io_kiocb *req) 10562306a36Sopenharmony_ci{ 10662306a36Sopenharmony_ci /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 10762306a36Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) 10862306a36Sopenharmony_ci return req->async_data; 10962306a36Sopenharmony_ci return req->apoll->double_poll; 11062306a36Sopenharmony_ci} 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_cistatic struct io_poll *io_poll_get_single(struct io_kiocb *req) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) 11562306a36Sopenharmony_ci return io_kiocb_to_cmd(req, struct io_poll); 11662306a36Sopenharmony_ci return &req->apoll->poll; 11762306a36Sopenharmony_ci} 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_cistatic void io_poll_req_insert(struct io_kiocb *req) 12062306a36Sopenharmony_ci{ 12162306a36Sopenharmony_ci struct io_hash_table *table = &req->ctx->cancel_table; 12262306a36Sopenharmony_ci u32 index = hash_long(req->cqe.user_data, table->hash_bits); 12362306a36Sopenharmony_ci struct io_hash_bucket *hb = &table->hbs[index]; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci spin_lock(&hb->lock); 12662306a36Sopenharmony_ci hlist_add_head(&req->hash_node, &hb->list); 12762306a36Sopenharmony_ci spin_unlock(&hb->lock); 12862306a36Sopenharmony_ci} 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_cistatic void io_poll_req_delete(struct io_kiocb *req, struct io_ring_ctx *ctx) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci struct io_hash_table *table = &req->ctx->cancel_table; 13362306a36Sopenharmony_ci u32 index = hash_long(req->cqe.user_data, table->hash_bits); 13462306a36Sopenharmony_ci spinlock_t *lock = &table->hbs[index].lock; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci spin_lock(lock); 13762306a36Sopenharmony_ci hash_del(&req->hash_node); 13862306a36Sopenharmony_ci spin_unlock(lock); 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistatic void io_poll_req_insert_locked(struct io_kiocb *req) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci struct io_hash_table *table = &req->ctx->cancel_table_locked; 14462306a36Sopenharmony_ci u32 index = hash_long(req->cqe.user_data, table->hash_bits); 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci lockdep_assert_held(&req->ctx->uring_lock); 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci hlist_add_head(&req->hash_node, &table->hbs[index].list); 14962306a36Sopenharmony_ci} 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistatic void io_poll_tw_hash_eject(struct io_kiocb *req, struct io_tw_state *ts) 15262306a36Sopenharmony_ci{ 15362306a36Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci if (req->flags & REQ_F_HASH_LOCKED) { 15662306a36Sopenharmony_ci /* 15762306a36Sopenharmony_ci * ->cancel_table_locked is protected by ->uring_lock in 15862306a36Sopenharmony_ci * contrast to per bucket spinlocks. Likely, tctx_task_work() 15962306a36Sopenharmony_ci * already grabbed the mutex for us, but there is a chance it 16062306a36Sopenharmony_ci * failed. 16162306a36Sopenharmony_ci */ 16262306a36Sopenharmony_ci io_tw_lock(ctx, ts); 16362306a36Sopenharmony_ci hash_del(&req->hash_node); 16462306a36Sopenharmony_ci req->flags &= ~REQ_F_HASH_LOCKED; 16562306a36Sopenharmony_ci } else { 16662306a36Sopenharmony_ci io_poll_req_delete(req, ctx); 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_cistatic void io_init_poll_iocb(struct io_poll *poll, __poll_t events) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci poll->head = NULL; 17362306a36Sopenharmony_ci#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 17462306a36Sopenharmony_ci /* mask in events that we always want/need */ 17562306a36Sopenharmony_ci poll->events = events | IO_POLL_UNMASK; 17662306a36Sopenharmony_ci INIT_LIST_HEAD(&poll->wait.entry); 17762306a36Sopenharmony_ci init_waitqueue_func_entry(&poll->wait, io_poll_wake); 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic inline void io_poll_remove_entry(struct io_poll *poll) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci struct wait_queue_head *head = smp_load_acquire(&poll->head); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci if (head) { 18562306a36Sopenharmony_ci spin_lock_irq(&head->lock); 18662306a36Sopenharmony_ci list_del_init(&poll->wait.entry); 18762306a36Sopenharmony_ci poll->head = NULL; 18862306a36Sopenharmony_ci spin_unlock_irq(&head->lock); 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_cistatic void io_poll_remove_entries(struct io_kiocb *req) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci /* 19562306a36Sopenharmony_ci * Nothing to do if neither of those flags are set. Avoid dipping 19662306a36Sopenharmony_ci * into the poll/apoll/double cachelines if we can. 19762306a36Sopenharmony_ci */ 19862306a36Sopenharmony_ci if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) 19962306a36Sopenharmony_ci return; 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_ci /* 20262306a36Sopenharmony_ci * While we hold the waitqueue lock and the waitqueue is nonempty, 20362306a36Sopenharmony_ci * wake_up_pollfree() will wait for us. However, taking the waitqueue 20462306a36Sopenharmony_ci * lock in the first place can race with the waitqueue being freed. 20562306a36Sopenharmony_ci * 20662306a36Sopenharmony_ci * We solve this as eventpoll does: by taking advantage of the fact that 20762306a36Sopenharmony_ci * all users of wake_up_pollfree() will RCU-delay the actual free. If 20862306a36Sopenharmony_ci * we enter rcu_read_lock() and see that the pointer to the queue is 20962306a36Sopenharmony_ci * non-NULL, we can then lock it without the memory being freed out from 21062306a36Sopenharmony_ci * under us. 21162306a36Sopenharmony_ci * 21262306a36Sopenharmony_ci * Keep holding rcu_read_lock() as long as we hold the queue lock, in 21362306a36Sopenharmony_ci * case the caller deletes the entry from the queue, leaving it empty. 21462306a36Sopenharmony_ci * In that case, only RCU prevents the queue memory from being freed. 21562306a36Sopenharmony_ci */ 21662306a36Sopenharmony_ci rcu_read_lock(); 21762306a36Sopenharmony_ci if (req->flags & REQ_F_SINGLE_POLL) 21862306a36Sopenharmony_ci io_poll_remove_entry(io_poll_get_single(req)); 21962306a36Sopenharmony_ci if (req->flags & REQ_F_DOUBLE_POLL) 22062306a36Sopenharmony_ci io_poll_remove_entry(io_poll_get_double(req)); 22162306a36Sopenharmony_ci rcu_read_unlock(); 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cienum { 22562306a36Sopenharmony_ci IOU_POLL_DONE = 0, 22662306a36Sopenharmony_ci IOU_POLL_NO_ACTION = 1, 22762306a36Sopenharmony_ci IOU_POLL_REMOVE_POLL_USE_RES = 2, 22862306a36Sopenharmony_ci IOU_POLL_REISSUE = 3, 22962306a36Sopenharmony_ci IOU_POLL_REQUEUE = 4, 23062306a36Sopenharmony_ci}; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_cistatic void __io_poll_execute(struct io_kiocb *req, int mask) 23362306a36Sopenharmony_ci{ 23462306a36Sopenharmony_ci io_req_set_res(req, mask, 0); 23562306a36Sopenharmony_ci req->io_task_work.func = io_poll_task_func; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci trace_io_uring_task_add(req, mask); 23862306a36Sopenharmony_ci io_req_task_work_add(req); 23962306a36Sopenharmony_ci} 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_cistatic inline void io_poll_execute(struct io_kiocb *req, int res) 24262306a36Sopenharmony_ci{ 24362306a36Sopenharmony_ci if (io_poll_get_ownership(req)) 24462306a36Sopenharmony_ci __io_poll_execute(req, res); 24562306a36Sopenharmony_ci} 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci/* 24862306a36Sopenharmony_ci * All poll tw should go through this. Checks for poll events, manages 24962306a36Sopenharmony_ci * references, does rewait, etc. 25062306a36Sopenharmony_ci * 25162306a36Sopenharmony_ci * Returns a negative error on failure. IOU_POLL_NO_ACTION when no action 25262306a36Sopenharmony_ci * require, which is either spurious wakeup or multishot CQE is served. 25362306a36Sopenharmony_ci * IOU_POLL_DONE when it's done with the request, then the mask is stored in 25462306a36Sopenharmony_ci * req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot 25562306a36Sopenharmony_ci * poll and that the result is stored in req->cqe. 25662306a36Sopenharmony_ci */ 25762306a36Sopenharmony_cistatic int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci int v; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci /* req->task == current here, checking PF_EXITING is safe */ 26262306a36Sopenharmony_ci if (unlikely(req->task->flags & PF_EXITING)) 26362306a36Sopenharmony_ci return -ECANCELED; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci do { 26662306a36Sopenharmony_ci v = atomic_read(&req->poll_refs); 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci if (unlikely(v != 1)) { 26962306a36Sopenharmony_ci /* tw should be the owner and so have some refs */ 27062306a36Sopenharmony_ci if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 27162306a36Sopenharmony_ci return IOU_POLL_NO_ACTION; 27262306a36Sopenharmony_ci if (v & IO_POLL_CANCEL_FLAG) 27362306a36Sopenharmony_ci return -ECANCELED; 27462306a36Sopenharmony_ci /* 27562306a36Sopenharmony_ci * cqe.res contains only events of the first wake up 27662306a36Sopenharmony_ci * and all others are to be lost. Redo vfs_poll() to get 27762306a36Sopenharmony_ci * up to date state. 27862306a36Sopenharmony_ci */ 27962306a36Sopenharmony_ci if ((v & IO_POLL_REF_MASK) != 1) 28062306a36Sopenharmony_ci req->cqe.res = 0; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci if (v & IO_POLL_RETRY_FLAG) { 28362306a36Sopenharmony_ci req->cqe.res = 0; 28462306a36Sopenharmony_ci /* 28562306a36Sopenharmony_ci * We won't find new events that came in between 28662306a36Sopenharmony_ci * vfs_poll and the ref put unless we clear the 28762306a36Sopenharmony_ci * flag in advance. 28862306a36Sopenharmony_ci */ 28962306a36Sopenharmony_ci atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); 29062306a36Sopenharmony_ci v &= ~IO_POLL_RETRY_FLAG; 29162306a36Sopenharmony_ci } 29262306a36Sopenharmony_ci } 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci /* the mask was stashed in __io_poll_execute */ 29562306a36Sopenharmony_ci if (!req->cqe.res) { 29662306a36Sopenharmony_ci struct poll_table_struct pt = { ._key = req->apoll_events }; 29762306a36Sopenharmony_ci req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; 29862306a36Sopenharmony_ci /* 29962306a36Sopenharmony_ci * We got woken with a mask, but someone else got to 30062306a36Sopenharmony_ci * it first. The above vfs_poll() doesn't add us back 30162306a36Sopenharmony_ci * to the waitqueue, so if we get nothing back, we 30262306a36Sopenharmony_ci * should be safe and attempt a reissue. 30362306a36Sopenharmony_ci */ 30462306a36Sopenharmony_ci if (unlikely(!req->cqe.res)) { 30562306a36Sopenharmony_ci /* Multishot armed need not reissue */ 30662306a36Sopenharmony_ci if (!(req->apoll_events & EPOLLONESHOT)) 30762306a36Sopenharmony_ci continue; 30862306a36Sopenharmony_ci return IOU_POLL_REISSUE; 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci } 31162306a36Sopenharmony_ci if (req->apoll_events & EPOLLONESHOT) 31262306a36Sopenharmony_ci return IOU_POLL_DONE; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* multishot, just fill a CQE and proceed */ 31562306a36Sopenharmony_ci if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { 31662306a36Sopenharmony_ci __poll_t mask = mangle_poll(req->cqe.res & 31762306a36Sopenharmony_ci req->apoll_events); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci if (!io_fill_cqe_req_aux(req, ts->locked, mask, 32062306a36Sopenharmony_ci IORING_CQE_F_MORE)) { 32162306a36Sopenharmony_ci io_req_set_res(req, mask, 0); 32262306a36Sopenharmony_ci return IOU_POLL_REMOVE_POLL_USE_RES; 32362306a36Sopenharmony_ci } 32462306a36Sopenharmony_ci } else { 32562306a36Sopenharmony_ci int ret = io_poll_issue(req, ts); 32662306a36Sopenharmony_ci if (ret == IOU_STOP_MULTISHOT) 32762306a36Sopenharmony_ci return IOU_POLL_REMOVE_POLL_USE_RES; 32862306a36Sopenharmony_ci else if (ret == IOU_REQUEUE) 32962306a36Sopenharmony_ci return IOU_POLL_REQUEUE; 33062306a36Sopenharmony_ci if (ret < 0) 33162306a36Sopenharmony_ci return ret; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci /* force the next iteration to vfs_poll() */ 33562306a36Sopenharmony_ci req->cqe.res = 0; 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* 33862306a36Sopenharmony_ci * Release all references, retry if someone tried to restart 33962306a36Sopenharmony_ci * task_work while we were executing it. 34062306a36Sopenharmony_ci */ 34162306a36Sopenharmony_ci } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & 34262306a36Sopenharmony_ci IO_POLL_REF_MASK); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci return IOU_POLL_NO_ACTION; 34562306a36Sopenharmony_ci} 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_civoid io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts) 34862306a36Sopenharmony_ci{ 34962306a36Sopenharmony_ci int ret; 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_ci ret = io_poll_check_events(req, ts); 35262306a36Sopenharmony_ci if (ret == IOU_POLL_NO_ACTION) { 35362306a36Sopenharmony_ci return; 35462306a36Sopenharmony_ci } else if (ret == IOU_POLL_REQUEUE) { 35562306a36Sopenharmony_ci __io_poll_execute(req, 0); 35662306a36Sopenharmony_ci return; 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ci io_poll_remove_entries(req); 35962306a36Sopenharmony_ci io_poll_tw_hash_eject(req, ts); 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) { 36262306a36Sopenharmony_ci if (ret == IOU_POLL_DONE) { 36362306a36Sopenharmony_ci struct io_poll *poll; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci poll = io_kiocb_to_cmd(req, struct io_poll); 36662306a36Sopenharmony_ci req->cqe.res = mangle_poll(req->cqe.res & poll->events); 36762306a36Sopenharmony_ci } else if (ret == IOU_POLL_REISSUE) { 36862306a36Sopenharmony_ci io_req_task_submit(req, ts); 36962306a36Sopenharmony_ci return; 37062306a36Sopenharmony_ci } else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) { 37162306a36Sopenharmony_ci req->cqe.res = ret; 37262306a36Sopenharmony_ci req_set_fail(req); 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci io_req_set_res(req, req->cqe.res, 0); 37662306a36Sopenharmony_ci io_req_task_complete(req, ts); 37762306a36Sopenharmony_ci } else { 37862306a36Sopenharmony_ci io_tw_lock(req->ctx, ts); 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci if (ret == IOU_POLL_REMOVE_POLL_USE_RES) 38162306a36Sopenharmony_ci io_req_task_complete(req, ts); 38262306a36Sopenharmony_ci else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE) 38362306a36Sopenharmony_ci io_req_task_submit(req, ts); 38462306a36Sopenharmony_ci else 38562306a36Sopenharmony_ci io_req_defer_failed(req, ret); 38662306a36Sopenharmony_ci } 38762306a36Sopenharmony_ci} 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_cistatic void io_poll_cancel_req(struct io_kiocb *req) 39062306a36Sopenharmony_ci{ 39162306a36Sopenharmony_ci io_poll_mark_cancelled(req); 39262306a36Sopenharmony_ci /* kick tw, which should complete the request */ 39362306a36Sopenharmony_ci io_poll_execute(req, 0); 39462306a36Sopenharmony_ci} 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_cistatic __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll) 39962306a36Sopenharmony_ci{ 40062306a36Sopenharmony_ci io_poll_mark_cancelled(req); 40162306a36Sopenharmony_ci /* we have to kick tw in case it's not already */ 40262306a36Sopenharmony_ci io_poll_execute(req, 0); 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci /* 40562306a36Sopenharmony_ci * If the waitqueue is being freed early but someone is already 40662306a36Sopenharmony_ci * holds ownership over it, we have to tear down the request as 40762306a36Sopenharmony_ci * best we can. That means immediately removing the request from 40862306a36Sopenharmony_ci * its waitqueue and preventing all further accesses to the 40962306a36Sopenharmony_ci * waitqueue via the request. 41062306a36Sopenharmony_ci */ 41162306a36Sopenharmony_ci list_del_init(&poll->wait.entry); 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci /* 41462306a36Sopenharmony_ci * Careful: this *must* be the last step, since as soon 41562306a36Sopenharmony_ci * as req->head is NULL'ed out, the request can be 41662306a36Sopenharmony_ci * completed and freed, since aio_poll_complete_work() 41762306a36Sopenharmony_ci * will no longer need to take the waitqueue lock. 41862306a36Sopenharmony_ci */ 41962306a36Sopenharmony_ci smp_store_release(&poll->head, NULL); 42062306a36Sopenharmony_ci return 1; 42162306a36Sopenharmony_ci} 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_cistatic int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 42462306a36Sopenharmony_ci void *key) 42562306a36Sopenharmony_ci{ 42662306a36Sopenharmony_ci struct io_kiocb *req = wqe_to_req(wait); 42762306a36Sopenharmony_ci struct io_poll *poll = container_of(wait, struct io_poll, wait); 42862306a36Sopenharmony_ci __poll_t mask = key_to_poll(key); 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci if (unlikely(mask & POLLFREE)) 43162306a36Sopenharmony_ci return io_pollfree_wake(req, poll); 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci /* for instances that support it check for an event match first */ 43462306a36Sopenharmony_ci if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) 43562306a36Sopenharmony_ci return 0; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci if (io_poll_get_ownership(req)) { 43862306a36Sopenharmony_ci /* 43962306a36Sopenharmony_ci * If we trigger a multishot poll off our own wakeup path, 44062306a36Sopenharmony_ci * disable multishot as there is a circular dependency between 44162306a36Sopenharmony_ci * CQ posting and triggering the event. 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_ci if (mask & EPOLL_URING_WAKE) 44462306a36Sopenharmony_ci poll->events |= EPOLLONESHOT; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci /* optional, saves extra locking for removal in tw handler */ 44762306a36Sopenharmony_ci if (mask && poll->events & EPOLLONESHOT) { 44862306a36Sopenharmony_ci list_del_init(&poll->wait.entry); 44962306a36Sopenharmony_ci poll->head = NULL; 45062306a36Sopenharmony_ci if (wqe_is_double(wait)) 45162306a36Sopenharmony_ci req->flags &= ~REQ_F_DOUBLE_POLL; 45262306a36Sopenharmony_ci else 45362306a36Sopenharmony_ci req->flags &= ~REQ_F_SINGLE_POLL; 45462306a36Sopenharmony_ci } 45562306a36Sopenharmony_ci __io_poll_execute(req, mask); 45662306a36Sopenharmony_ci } 45762306a36Sopenharmony_ci return 1; 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci/* fails only when polling is already completing by the first entry */ 46162306a36Sopenharmony_cistatic bool io_poll_double_prepare(struct io_kiocb *req) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci struct wait_queue_head *head; 46462306a36Sopenharmony_ci struct io_poll *poll = io_poll_get_single(req); 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci /* head is RCU protected, see io_poll_remove_entries() comments */ 46762306a36Sopenharmony_ci rcu_read_lock(); 46862306a36Sopenharmony_ci head = smp_load_acquire(&poll->head); 46962306a36Sopenharmony_ci /* 47062306a36Sopenharmony_ci * poll arm might not hold ownership and so race for req->flags with 47162306a36Sopenharmony_ci * io_poll_wake(). There is only one poll entry queued, serialise with 47262306a36Sopenharmony_ci * it by taking its head lock. As we're still arming the tw hanlder 47362306a36Sopenharmony_ci * is not going to be run, so there are no races with it. 47462306a36Sopenharmony_ci */ 47562306a36Sopenharmony_ci if (head) { 47662306a36Sopenharmony_ci spin_lock_irq(&head->lock); 47762306a36Sopenharmony_ci req->flags |= REQ_F_DOUBLE_POLL; 47862306a36Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) 47962306a36Sopenharmony_ci req->flags |= REQ_F_ASYNC_DATA; 48062306a36Sopenharmony_ci spin_unlock_irq(&head->lock); 48162306a36Sopenharmony_ci } 48262306a36Sopenharmony_ci rcu_read_unlock(); 48362306a36Sopenharmony_ci return !!head; 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_cistatic void __io_queue_proc(struct io_poll *poll, struct io_poll_table *pt, 48762306a36Sopenharmony_ci struct wait_queue_head *head, 48862306a36Sopenharmony_ci struct io_poll **poll_ptr) 48962306a36Sopenharmony_ci{ 49062306a36Sopenharmony_ci struct io_kiocb *req = pt->req; 49162306a36Sopenharmony_ci unsigned long wqe_private = (unsigned long) req; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci /* 49462306a36Sopenharmony_ci * The file being polled uses multiple waitqueues for poll handling 49562306a36Sopenharmony_ci * (e.g. one for read, one for write). Setup a separate io_poll 49662306a36Sopenharmony_ci * if this happens. 49762306a36Sopenharmony_ci */ 49862306a36Sopenharmony_ci if (unlikely(pt->nr_entries)) { 49962306a36Sopenharmony_ci struct io_poll *first = poll; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci /* double add on the same waitqueue head, ignore */ 50262306a36Sopenharmony_ci if (first->head == head) 50362306a36Sopenharmony_ci return; 50462306a36Sopenharmony_ci /* already have a 2nd entry, fail a third attempt */ 50562306a36Sopenharmony_ci if (*poll_ptr) { 50662306a36Sopenharmony_ci if ((*poll_ptr)->head == head) 50762306a36Sopenharmony_ci return; 50862306a36Sopenharmony_ci pt->error = -EINVAL; 50962306a36Sopenharmony_ci return; 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 51362306a36Sopenharmony_ci if (!poll) { 51462306a36Sopenharmony_ci pt->error = -ENOMEM; 51562306a36Sopenharmony_ci return; 51662306a36Sopenharmony_ci } 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci /* mark as double wq entry */ 51962306a36Sopenharmony_ci wqe_private |= IO_WQE_F_DOUBLE; 52062306a36Sopenharmony_ci io_init_poll_iocb(poll, first->events); 52162306a36Sopenharmony_ci if (!io_poll_double_prepare(req)) { 52262306a36Sopenharmony_ci /* the request is completing, just back off */ 52362306a36Sopenharmony_ci kfree(poll); 52462306a36Sopenharmony_ci return; 52562306a36Sopenharmony_ci } 52662306a36Sopenharmony_ci *poll_ptr = poll; 52762306a36Sopenharmony_ci } else { 52862306a36Sopenharmony_ci /* fine to modify, there is no poll queued to race with us */ 52962306a36Sopenharmony_ci req->flags |= REQ_F_SINGLE_POLL; 53062306a36Sopenharmony_ci } 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci pt->nr_entries++; 53362306a36Sopenharmony_ci poll->head = head; 53462306a36Sopenharmony_ci poll->wait.private = (void *) wqe_private; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci if (poll->events & EPOLLEXCLUSIVE) 53762306a36Sopenharmony_ci add_wait_queue_exclusive(head, &poll->wait); 53862306a36Sopenharmony_ci else 53962306a36Sopenharmony_ci add_wait_queue(head, &poll->wait); 54062306a36Sopenharmony_ci} 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_cistatic void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 54362306a36Sopenharmony_ci struct poll_table_struct *p) 54462306a36Sopenharmony_ci{ 54562306a36Sopenharmony_ci struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 54662306a36Sopenharmony_ci struct io_poll *poll = io_kiocb_to_cmd(pt->req, struct io_poll); 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci __io_queue_proc(poll, pt, head, 54962306a36Sopenharmony_ci (struct io_poll **) &pt->req->async_data); 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_cistatic bool io_poll_can_finish_inline(struct io_kiocb *req, 55362306a36Sopenharmony_ci struct io_poll_table *pt) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci return pt->owning || io_poll_get_ownership(req); 55662306a36Sopenharmony_ci} 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_cistatic void io_poll_add_hash(struct io_kiocb *req) 55962306a36Sopenharmony_ci{ 56062306a36Sopenharmony_ci if (req->flags & REQ_F_HASH_LOCKED) 56162306a36Sopenharmony_ci io_poll_req_insert_locked(req); 56262306a36Sopenharmony_ci else 56362306a36Sopenharmony_ci io_poll_req_insert(req); 56462306a36Sopenharmony_ci} 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci/* 56762306a36Sopenharmony_ci * Returns 0 when it's handed over for polling. The caller owns the requests if 56862306a36Sopenharmony_ci * it returns non-zero, but otherwise should not touch it. Negative values 56962306a36Sopenharmony_ci * contain an error code. When the result is >0, the polling has completed 57062306a36Sopenharmony_ci * inline and ipt.result_mask is set to the mask. 57162306a36Sopenharmony_ci */ 57262306a36Sopenharmony_cistatic int __io_arm_poll_handler(struct io_kiocb *req, 57362306a36Sopenharmony_ci struct io_poll *poll, 57462306a36Sopenharmony_ci struct io_poll_table *ipt, __poll_t mask, 57562306a36Sopenharmony_ci unsigned issue_flags) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci INIT_HLIST_NODE(&req->hash_node); 58062306a36Sopenharmony_ci req->work.cancel_seq = atomic_read(&ctx->cancel_seq); 58162306a36Sopenharmony_ci io_init_poll_iocb(poll, mask); 58262306a36Sopenharmony_ci poll->file = req->file; 58362306a36Sopenharmony_ci req->apoll_events = poll->events; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci ipt->pt._key = mask; 58662306a36Sopenharmony_ci ipt->req = req; 58762306a36Sopenharmony_ci ipt->error = 0; 58862306a36Sopenharmony_ci ipt->nr_entries = 0; 58962306a36Sopenharmony_ci /* 59062306a36Sopenharmony_ci * Polling is either completed here or via task_work, so if we're in the 59162306a36Sopenharmony_ci * task context we're naturally serialised with tw by merit of running 59262306a36Sopenharmony_ci * the same task. When it's io-wq, take the ownership to prevent tw 59362306a36Sopenharmony_ci * from running. However, when we're in the task context, skip taking 59462306a36Sopenharmony_ci * it as an optimisation. 59562306a36Sopenharmony_ci * 59662306a36Sopenharmony_ci * Note: even though the request won't be completed/freed, without 59762306a36Sopenharmony_ci * ownership we still can race with io_poll_wake(). 59862306a36Sopenharmony_ci * io_poll_can_finish_inline() tries to deal with that. 59962306a36Sopenharmony_ci */ 60062306a36Sopenharmony_ci ipt->owning = issue_flags & IO_URING_F_UNLOCKED; 60162306a36Sopenharmony_ci atomic_set(&req->poll_refs, (int)ipt->owning); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci /* io-wq doesn't hold uring_lock */ 60462306a36Sopenharmony_ci if (issue_flags & IO_URING_F_UNLOCKED) 60562306a36Sopenharmony_ci req->flags &= ~REQ_F_HASH_LOCKED; 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_ci mask = vfs_poll(req->file, &ipt->pt) & poll->events; 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ci if (unlikely(ipt->error || !ipt->nr_entries)) { 61062306a36Sopenharmony_ci io_poll_remove_entries(req); 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci if (!io_poll_can_finish_inline(req, ipt)) { 61362306a36Sopenharmony_ci io_poll_mark_cancelled(req); 61462306a36Sopenharmony_ci return 0; 61562306a36Sopenharmony_ci } else if (mask && (poll->events & EPOLLET)) { 61662306a36Sopenharmony_ci ipt->result_mask = mask; 61762306a36Sopenharmony_ci return 1; 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci return ipt->error ?: -EINVAL; 62062306a36Sopenharmony_ci } 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci if (mask && 62362306a36Sopenharmony_ci ((poll->events & (EPOLLET|EPOLLONESHOT)) == (EPOLLET|EPOLLONESHOT))) { 62462306a36Sopenharmony_ci if (!io_poll_can_finish_inline(req, ipt)) { 62562306a36Sopenharmony_ci io_poll_add_hash(req); 62662306a36Sopenharmony_ci return 0; 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci io_poll_remove_entries(req); 62962306a36Sopenharmony_ci ipt->result_mask = mask; 63062306a36Sopenharmony_ci /* no one else has access to the req, forget about the ref */ 63162306a36Sopenharmony_ci return 1; 63262306a36Sopenharmony_ci } 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci io_poll_add_hash(req); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci if (mask && (poll->events & EPOLLET) && 63762306a36Sopenharmony_ci io_poll_can_finish_inline(req, ipt)) { 63862306a36Sopenharmony_ci __io_poll_execute(req, mask); 63962306a36Sopenharmony_ci return 0; 64062306a36Sopenharmony_ci } 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci if (ipt->owning) { 64362306a36Sopenharmony_ci /* 64462306a36Sopenharmony_ci * Try to release ownership. If we see a change of state, e.g. 64562306a36Sopenharmony_ci * poll was waken up, queue up a tw, it'll deal with it. 64662306a36Sopenharmony_ci */ 64762306a36Sopenharmony_ci if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) 64862306a36Sopenharmony_ci __io_poll_execute(req, 0); 64962306a36Sopenharmony_ci } 65062306a36Sopenharmony_ci return 0; 65162306a36Sopenharmony_ci} 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_cistatic void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 65462306a36Sopenharmony_ci struct poll_table_struct *p) 65562306a36Sopenharmony_ci{ 65662306a36Sopenharmony_ci struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 65762306a36Sopenharmony_ci struct async_poll *apoll = pt->req->apoll; 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 66062306a36Sopenharmony_ci} 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci/* 66362306a36Sopenharmony_ci * We can't reliably detect loops in repeated poll triggers and issue 66462306a36Sopenharmony_ci * subsequently failing. But rather than fail these immediately, allow a 66562306a36Sopenharmony_ci * certain amount of retries before we give up. Given that this condition 66662306a36Sopenharmony_ci * should _rarely_ trigger even once, we should be fine with a larger value. 66762306a36Sopenharmony_ci */ 66862306a36Sopenharmony_ci#define APOLL_MAX_RETRY 128 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_cistatic struct async_poll *io_req_alloc_apoll(struct io_kiocb *req, 67162306a36Sopenharmony_ci unsigned issue_flags) 67262306a36Sopenharmony_ci{ 67362306a36Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 67462306a36Sopenharmony_ci struct io_cache_entry *entry; 67562306a36Sopenharmony_ci struct async_poll *apoll; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci if (req->flags & REQ_F_POLLED) { 67862306a36Sopenharmony_ci apoll = req->apoll; 67962306a36Sopenharmony_ci kfree(apoll->double_poll); 68062306a36Sopenharmony_ci } else if (!(issue_flags & IO_URING_F_UNLOCKED)) { 68162306a36Sopenharmony_ci entry = io_alloc_cache_get(&ctx->apoll_cache); 68262306a36Sopenharmony_ci if (entry == NULL) 68362306a36Sopenharmony_ci goto alloc_apoll; 68462306a36Sopenharmony_ci apoll = container_of(entry, struct async_poll, cache); 68562306a36Sopenharmony_ci apoll->poll.retries = APOLL_MAX_RETRY; 68662306a36Sopenharmony_ci } else { 68762306a36Sopenharmony_cialloc_apoll: 68862306a36Sopenharmony_ci apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 68962306a36Sopenharmony_ci if (unlikely(!apoll)) 69062306a36Sopenharmony_ci return NULL; 69162306a36Sopenharmony_ci apoll->poll.retries = APOLL_MAX_RETRY; 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci apoll->double_poll = NULL; 69462306a36Sopenharmony_ci req->apoll = apoll; 69562306a36Sopenharmony_ci if (unlikely(!--apoll->poll.retries)) 69662306a36Sopenharmony_ci return NULL; 69762306a36Sopenharmony_ci return apoll; 69862306a36Sopenharmony_ci} 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ciint io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) 70162306a36Sopenharmony_ci{ 70262306a36Sopenharmony_ci const struct io_issue_def *def = &io_issue_defs[req->opcode]; 70362306a36Sopenharmony_ci struct async_poll *apoll; 70462306a36Sopenharmony_ci struct io_poll_table ipt; 70562306a36Sopenharmony_ci __poll_t mask = POLLPRI | POLLERR | EPOLLET; 70662306a36Sopenharmony_ci int ret; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci /* 70962306a36Sopenharmony_ci * apoll requests already grab the mutex to complete in the tw handler, 71062306a36Sopenharmony_ci * so removal from the mutex-backed hash is free, use it by default. 71162306a36Sopenharmony_ci */ 71262306a36Sopenharmony_ci req->flags |= REQ_F_HASH_LOCKED; 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci if (!def->pollin && !def->pollout) 71562306a36Sopenharmony_ci return IO_APOLL_ABORTED; 71662306a36Sopenharmony_ci if (!file_can_poll(req->file)) 71762306a36Sopenharmony_ci return IO_APOLL_ABORTED; 71862306a36Sopenharmony_ci if (!(req->flags & REQ_F_APOLL_MULTISHOT)) 71962306a36Sopenharmony_ci mask |= EPOLLONESHOT; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci if (def->pollin) { 72262306a36Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 72562306a36Sopenharmony_ci if (req->flags & REQ_F_CLEAR_POLLIN) 72662306a36Sopenharmony_ci mask &= ~EPOLLIN; 72762306a36Sopenharmony_ci } else { 72862306a36Sopenharmony_ci mask |= EPOLLOUT | EPOLLWRNORM; 72962306a36Sopenharmony_ci } 73062306a36Sopenharmony_ci if (def->poll_exclusive) 73162306a36Sopenharmony_ci mask |= EPOLLEXCLUSIVE; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci apoll = io_req_alloc_apoll(req, issue_flags); 73462306a36Sopenharmony_ci if (!apoll) 73562306a36Sopenharmony_ci return IO_APOLL_ABORTED; 73662306a36Sopenharmony_ci req->flags &= ~(REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL); 73762306a36Sopenharmony_ci req->flags |= REQ_F_POLLED; 73862306a36Sopenharmony_ci ipt.pt._qproc = io_async_queue_proc; 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci io_kbuf_recycle(req, issue_flags); 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, issue_flags); 74362306a36Sopenharmony_ci if (ret) 74462306a36Sopenharmony_ci return ret > 0 ? IO_APOLL_READY : IO_APOLL_ABORTED; 74562306a36Sopenharmony_ci trace_io_uring_poll_arm(req, mask, apoll->poll.events); 74662306a36Sopenharmony_ci return IO_APOLL_OK; 74762306a36Sopenharmony_ci} 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_cistatic __cold bool io_poll_remove_all_table(struct task_struct *tsk, 75062306a36Sopenharmony_ci struct io_hash_table *table, 75162306a36Sopenharmony_ci bool cancel_all) 75262306a36Sopenharmony_ci{ 75362306a36Sopenharmony_ci unsigned nr_buckets = 1U << table->hash_bits; 75462306a36Sopenharmony_ci struct hlist_node *tmp; 75562306a36Sopenharmony_ci struct io_kiocb *req; 75662306a36Sopenharmony_ci bool found = false; 75762306a36Sopenharmony_ci int i; 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci for (i = 0; i < nr_buckets; i++) { 76062306a36Sopenharmony_ci struct io_hash_bucket *hb = &table->hbs[i]; 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci spin_lock(&hb->lock); 76362306a36Sopenharmony_ci hlist_for_each_entry_safe(req, tmp, &hb->list, hash_node) { 76462306a36Sopenharmony_ci if (io_match_task_safe(req, tsk, cancel_all)) { 76562306a36Sopenharmony_ci hlist_del_init(&req->hash_node); 76662306a36Sopenharmony_ci io_poll_cancel_req(req); 76762306a36Sopenharmony_ci found = true; 76862306a36Sopenharmony_ci } 76962306a36Sopenharmony_ci } 77062306a36Sopenharmony_ci spin_unlock(&hb->lock); 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci return found; 77362306a36Sopenharmony_ci} 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci/* 77662306a36Sopenharmony_ci * Returns true if we found and killed one or more poll requests 77762306a36Sopenharmony_ci */ 77862306a36Sopenharmony_ci__cold bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 77962306a36Sopenharmony_ci bool cancel_all) 78062306a36Sopenharmony_ci __must_hold(&ctx->uring_lock) 78162306a36Sopenharmony_ci{ 78262306a36Sopenharmony_ci bool ret; 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci ret = io_poll_remove_all_table(tsk, &ctx->cancel_table, cancel_all); 78562306a36Sopenharmony_ci ret |= io_poll_remove_all_table(tsk, &ctx->cancel_table_locked, cancel_all); 78662306a36Sopenharmony_ci return ret; 78762306a36Sopenharmony_ci} 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_cistatic struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, 79062306a36Sopenharmony_ci struct io_cancel_data *cd, 79162306a36Sopenharmony_ci struct io_hash_table *table, 79262306a36Sopenharmony_ci struct io_hash_bucket **out_bucket) 79362306a36Sopenharmony_ci{ 79462306a36Sopenharmony_ci struct io_kiocb *req; 79562306a36Sopenharmony_ci u32 index = hash_long(cd->data, table->hash_bits); 79662306a36Sopenharmony_ci struct io_hash_bucket *hb = &table->hbs[index]; 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci *out_bucket = NULL; 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci spin_lock(&hb->lock); 80162306a36Sopenharmony_ci hlist_for_each_entry(req, &hb->list, hash_node) { 80262306a36Sopenharmony_ci if (cd->data != req->cqe.user_data) 80362306a36Sopenharmony_ci continue; 80462306a36Sopenharmony_ci if (poll_only && req->opcode != IORING_OP_POLL_ADD) 80562306a36Sopenharmony_ci continue; 80662306a36Sopenharmony_ci if (cd->flags & IORING_ASYNC_CANCEL_ALL) { 80762306a36Sopenharmony_ci if (cd->seq == req->work.cancel_seq) 80862306a36Sopenharmony_ci continue; 80962306a36Sopenharmony_ci req->work.cancel_seq = cd->seq; 81062306a36Sopenharmony_ci } 81162306a36Sopenharmony_ci *out_bucket = hb; 81262306a36Sopenharmony_ci return req; 81362306a36Sopenharmony_ci } 81462306a36Sopenharmony_ci spin_unlock(&hb->lock); 81562306a36Sopenharmony_ci return NULL; 81662306a36Sopenharmony_ci} 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_cistatic struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, 81962306a36Sopenharmony_ci struct io_cancel_data *cd, 82062306a36Sopenharmony_ci struct io_hash_table *table, 82162306a36Sopenharmony_ci struct io_hash_bucket **out_bucket) 82262306a36Sopenharmony_ci{ 82362306a36Sopenharmony_ci unsigned nr_buckets = 1U << table->hash_bits; 82462306a36Sopenharmony_ci struct io_kiocb *req; 82562306a36Sopenharmony_ci int i; 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci *out_bucket = NULL; 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci for (i = 0; i < nr_buckets; i++) { 83062306a36Sopenharmony_ci struct io_hash_bucket *hb = &table->hbs[i]; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci spin_lock(&hb->lock); 83362306a36Sopenharmony_ci hlist_for_each_entry(req, &hb->list, hash_node) { 83462306a36Sopenharmony_ci if (io_cancel_req_match(req, cd)) { 83562306a36Sopenharmony_ci *out_bucket = hb; 83662306a36Sopenharmony_ci return req; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci } 83962306a36Sopenharmony_ci spin_unlock(&hb->lock); 84062306a36Sopenharmony_ci } 84162306a36Sopenharmony_ci return NULL; 84262306a36Sopenharmony_ci} 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_cistatic int io_poll_disarm(struct io_kiocb *req) 84562306a36Sopenharmony_ci{ 84662306a36Sopenharmony_ci if (!req) 84762306a36Sopenharmony_ci return -ENOENT; 84862306a36Sopenharmony_ci if (!io_poll_get_ownership(req)) 84962306a36Sopenharmony_ci return -EALREADY; 85062306a36Sopenharmony_ci io_poll_remove_entries(req); 85162306a36Sopenharmony_ci hash_del(&req->hash_node); 85262306a36Sopenharmony_ci return 0; 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_cistatic int __io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 85662306a36Sopenharmony_ci struct io_hash_table *table) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci struct io_hash_bucket *bucket; 85962306a36Sopenharmony_ci struct io_kiocb *req; 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci if (cd->flags & (IORING_ASYNC_CANCEL_FD | IORING_ASYNC_CANCEL_OP | 86262306a36Sopenharmony_ci IORING_ASYNC_CANCEL_ANY)) 86362306a36Sopenharmony_ci req = io_poll_file_find(ctx, cd, table, &bucket); 86462306a36Sopenharmony_ci else 86562306a36Sopenharmony_ci req = io_poll_find(ctx, false, cd, table, &bucket); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (req) 86862306a36Sopenharmony_ci io_poll_cancel_req(req); 86962306a36Sopenharmony_ci if (bucket) 87062306a36Sopenharmony_ci spin_unlock(&bucket->lock); 87162306a36Sopenharmony_ci return req ? 0 : -ENOENT; 87262306a36Sopenharmony_ci} 87362306a36Sopenharmony_ci 87462306a36Sopenharmony_ciint io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd, 87562306a36Sopenharmony_ci unsigned issue_flags) 87662306a36Sopenharmony_ci{ 87762306a36Sopenharmony_ci int ret; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table); 88062306a36Sopenharmony_ci if (ret != -ENOENT) 88162306a36Sopenharmony_ci return ret; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci io_ring_submit_lock(ctx, issue_flags); 88462306a36Sopenharmony_ci ret = __io_poll_cancel(ctx, cd, &ctx->cancel_table_locked); 88562306a36Sopenharmony_ci io_ring_submit_unlock(ctx, issue_flags); 88662306a36Sopenharmony_ci return ret; 88762306a36Sopenharmony_ci} 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_cistatic __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 89062306a36Sopenharmony_ci unsigned int flags) 89162306a36Sopenharmony_ci{ 89262306a36Sopenharmony_ci u32 events; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci events = READ_ONCE(sqe->poll32_events); 89562306a36Sopenharmony_ci#ifdef __BIG_ENDIAN 89662306a36Sopenharmony_ci events = swahw32(events); 89762306a36Sopenharmony_ci#endif 89862306a36Sopenharmony_ci if (!(flags & IORING_POLL_ADD_MULTI)) 89962306a36Sopenharmony_ci events |= EPOLLONESHOT; 90062306a36Sopenharmony_ci if (!(flags & IORING_POLL_ADD_LEVEL)) 90162306a36Sopenharmony_ci events |= EPOLLET; 90262306a36Sopenharmony_ci return demangle_poll(events) | 90362306a36Sopenharmony_ci (events & (EPOLLEXCLUSIVE|EPOLLONESHOT|EPOLLET)); 90462306a36Sopenharmony_ci} 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ciint io_poll_remove_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 90762306a36Sopenharmony_ci{ 90862306a36Sopenharmony_ci struct io_poll_update *upd = io_kiocb_to_cmd(req, struct io_poll_update); 90962306a36Sopenharmony_ci u32 flags; 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci if (sqe->buf_index || sqe->splice_fd_in) 91262306a36Sopenharmony_ci return -EINVAL; 91362306a36Sopenharmony_ci flags = READ_ONCE(sqe->len); 91462306a36Sopenharmony_ci if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 91562306a36Sopenharmony_ci IORING_POLL_ADD_MULTI)) 91662306a36Sopenharmony_ci return -EINVAL; 91762306a36Sopenharmony_ci /* meaningless without update */ 91862306a36Sopenharmony_ci if (flags == IORING_POLL_ADD_MULTI) 91962306a36Sopenharmony_ci return -EINVAL; 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci upd->old_user_data = READ_ONCE(sqe->addr); 92262306a36Sopenharmony_ci upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 92362306a36Sopenharmony_ci upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci upd->new_user_data = READ_ONCE(sqe->off); 92662306a36Sopenharmony_ci if (!upd->update_user_data && upd->new_user_data) 92762306a36Sopenharmony_ci return -EINVAL; 92862306a36Sopenharmony_ci if (upd->update_events) 92962306a36Sopenharmony_ci upd->events = io_poll_parse_events(sqe, flags); 93062306a36Sopenharmony_ci else if (sqe->poll32_events) 93162306a36Sopenharmony_ci return -EINVAL; 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci return 0; 93462306a36Sopenharmony_ci} 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ciint io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 93762306a36Sopenharmony_ci{ 93862306a36Sopenharmony_ci struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 93962306a36Sopenharmony_ci u32 flags; 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_ci if (sqe->buf_index || sqe->off || sqe->addr) 94262306a36Sopenharmony_ci return -EINVAL; 94362306a36Sopenharmony_ci flags = READ_ONCE(sqe->len); 94462306a36Sopenharmony_ci if (flags & ~IORING_POLL_ADD_MULTI) 94562306a36Sopenharmony_ci return -EINVAL; 94662306a36Sopenharmony_ci if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) 94762306a36Sopenharmony_ci return -EINVAL; 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci poll->events = io_poll_parse_events(sqe, flags); 95062306a36Sopenharmony_ci return 0; 95162306a36Sopenharmony_ci} 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ciint io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 95462306a36Sopenharmony_ci{ 95562306a36Sopenharmony_ci struct io_poll *poll = io_kiocb_to_cmd(req, struct io_poll); 95662306a36Sopenharmony_ci struct io_poll_table ipt; 95762306a36Sopenharmony_ci int ret; 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci ipt.pt._qproc = io_poll_queue_proc; 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci /* 96262306a36Sopenharmony_ci * If sqpoll or single issuer, there is no contention for ->uring_lock 96362306a36Sopenharmony_ci * and we'll end up holding it in tw handlers anyway. 96462306a36Sopenharmony_ci */ 96562306a36Sopenharmony_ci if (req->ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_SINGLE_ISSUER)) 96662306a36Sopenharmony_ci req->flags |= REQ_F_HASH_LOCKED; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci ret = __io_arm_poll_handler(req, poll, &ipt, poll->events, issue_flags); 96962306a36Sopenharmony_ci if (ret > 0) { 97062306a36Sopenharmony_ci io_req_set_res(req, ipt.result_mask, 0); 97162306a36Sopenharmony_ci return IOU_OK; 97262306a36Sopenharmony_ci } 97362306a36Sopenharmony_ci return ret ?: IOU_ISSUE_SKIP_COMPLETE; 97462306a36Sopenharmony_ci} 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ciint io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) 97762306a36Sopenharmony_ci{ 97862306a36Sopenharmony_ci struct io_poll_update *poll_update = io_kiocb_to_cmd(req, struct io_poll_update); 97962306a36Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 98062306a36Sopenharmony_ci struct io_cancel_data cd = { .ctx = ctx, .data = poll_update->old_user_data, }; 98162306a36Sopenharmony_ci struct io_hash_bucket *bucket; 98262306a36Sopenharmony_ci struct io_kiocb *preq; 98362306a36Sopenharmony_ci int ret2, ret = 0; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci io_ring_submit_lock(ctx, issue_flags); 98662306a36Sopenharmony_ci preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table, &bucket); 98762306a36Sopenharmony_ci ret2 = io_poll_disarm(preq); 98862306a36Sopenharmony_ci if (bucket) 98962306a36Sopenharmony_ci spin_unlock(&bucket->lock); 99062306a36Sopenharmony_ci if (!ret2) 99162306a36Sopenharmony_ci goto found; 99262306a36Sopenharmony_ci if (ret2 != -ENOENT) { 99362306a36Sopenharmony_ci ret = ret2; 99462306a36Sopenharmony_ci goto out; 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci preq = io_poll_find(ctx, true, &cd, &ctx->cancel_table_locked, &bucket); 99862306a36Sopenharmony_ci ret2 = io_poll_disarm(preq); 99962306a36Sopenharmony_ci if (bucket) 100062306a36Sopenharmony_ci spin_unlock(&bucket->lock); 100162306a36Sopenharmony_ci if (ret2) { 100262306a36Sopenharmony_ci ret = ret2; 100362306a36Sopenharmony_ci goto out; 100462306a36Sopenharmony_ci } 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_cifound: 100762306a36Sopenharmony_ci if (WARN_ON_ONCE(preq->opcode != IORING_OP_POLL_ADD)) { 100862306a36Sopenharmony_ci ret = -EFAULT; 100962306a36Sopenharmony_ci goto out; 101062306a36Sopenharmony_ci } 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci if (poll_update->update_events || poll_update->update_user_data) { 101362306a36Sopenharmony_ci /* only mask one event flags, keep behavior flags */ 101462306a36Sopenharmony_ci if (poll_update->update_events) { 101562306a36Sopenharmony_ci struct io_poll *poll = io_kiocb_to_cmd(preq, struct io_poll); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci poll->events &= ~0xffff; 101862306a36Sopenharmony_ci poll->events |= poll_update->events & 0xffff; 101962306a36Sopenharmony_ci poll->events |= IO_POLL_UNMASK; 102062306a36Sopenharmony_ci } 102162306a36Sopenharmony_ci if (poll_update->update_user_data) 102262306a36Sopenharmony_ci preq->cqe.user_data = poll_update->new_user_data; 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED); 102562306a36Sopenharmony_ci /* successfully updated, don't complete poll request */ 102662306a36Sopenharmony_ci if (!ret2 || ret2 == -EIOCBQUEUED) 102762306a36Sopenharmony_ci goto out; 102862306a36Sopenharmony_ci } 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci req_set_fail(preq); 103162306a36Sopenharmony_ci io_req_set_res(preq, -ECANCELED, 0); 103262306a36Sopenharmony_ci preq->io_task_work.func = io_req_task_complete; 103362306a36Sopenharmony_ci io_req_task_work_add(preq); 103462306a36Sopenharmony_ciout: 103562306a36Sopenharmony_ci io_ring_submit_unlock(ctx, issue_flags); 103662306a36Sopenharmony_ci if (ret < 0) { 103762306a36Sopenharmony_ci req_set_fail(req); 103862306a36Sopenharmony_ci return ret; 103962306a36Sopenharmony_ci } 104062306a36Sopenharmony_ci /* complete update request, we're done with it */ 104162306a36Sopenharmony_ci io_req_set_res(req, ret, 0); 104262306a36Sopenharmony_ci return IOU_OK; 104362306a36Sopenharmony_ci} 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_civoid io_apoll_cache_free(struct io_cache_entry *entry) 104662306a36Sopenharmony_ci{ 104762306a36Sopenharmony_ci kfree(container_of(entry, struct async_poll, cache)); 104862306a36Sopenharmony_ci} 1049