18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Shared application/kernel submission and completion ring pairs, for 48c2ecf20Sopenharmony_ci * supporting fast/efficient IO. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * A note on the read/write ordering memory barriers that are matched between 78c2ecf20Sopenharmony_ci * the application and kernel side. 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * After the application reads the CQ ring tail, it must use an 108c2ecf20Sopenharmony_ci * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses 118c2ecf20Sopenharmony_ci * before writing the tail (using smp_load_acquire to read the tail will 128c2ecf20Sopenharmony_ci * do). It also needs a smp_mb() before updating CQ head (ordering the 138c2ecf20Sopenharmony_ci * entry load(s) with the head store), pairing with an implicit barrier 148c2ecf20Sopenharmony_ci * through a control-dependency in io_get_cqe (smp_store_release to 158c2ecf20Sopenharmony_ci * store head will do). Failure to do so could lead to reading invalid 168c2ecf20Sopenharmony_ci * CQ entries. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * Likewise, the application must use an appropriate smp_wmb() before 198c2ecf20Sopenharmony_ci * writing the SQ tail (ordering SQ entry stores with the tail store), 208c2ecf20Sopenharmony_ci * which pairs with smp_load_acquire in io_get_sqring (smp_store_release 218c2ecf20Sopenharmony_ci * to store the tail will do). And it needs a barrier ordering the SQ 228c2ecf20Sopenharmony_ci * head load before writing new SQ entries (smp_load_acquire to read 238c2ecf20Sopenharmony_ci * head will do). 248c2ecf20Sopenharmony_ci * 258c2ecf20Sopenharmony_ci * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application 268c2ecf20Sopenharmony_ci * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* 278c2ecf20Sopenharmony_ci * updating the SQ tail; a full memory barrier smp_mb() is needed 288c2ecf20Sopenharmony_ci * between. 298c2ecf20Sopenharmony_ci * 308c2ecf20Sopenharmony_ci * Also see the examples in the liburing library: 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci * git://git.kernel.dk/liburing 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens 358c2ecf20Sopenharmony_ci * from data shared between the kernel and application. This is done both 368c2ecf20Sopenharmony_ci * for ordering purposes, but also to ensure that once a value is loaded from 378c2ecf20Sopenharmony_ci * data that the application could potentially modify, it remains stable. 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * Copyright (C) 2018-2019 Jens Axboe 408c2ecf20Sopenharmony_ci * Copyright (c) 2018-2019 Christoph Hellwig 418c2ecf20Sopenharmony_ci */ 428c2ecf20Sopenharmony_ci#include <linux/kernel.h> 438c2ecf20Sopenharmony_ci#include <linux/init.h> 448c2ecf20Sopenharmony_ci#include <linux/errno.h> 458c2ecf20Sopenharmony_ci#include <linux/syscalls.h> 468c2ecf20Sopenharmony_ci#include <linux/compat.h> 478c2ecf20Sopenharmony_ci#include <net/compat.h> 488c2ecf20Sopenharmony_ci#include <linux/refcount.h> 498c2ecf20Sopenharmony_ci#include <linux/uio.h> 508c2ecf20Sopenharmony_ci#include <linux/bits.h> 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 538c2ecf20Sopenharmony_ci#include <linux/fs.h> 548c2ecf20Sopenharmony_ci#include <linux/file.h> 558c2ecf20Sopenharmony_ci#include <linux/fdtable.h> 568c2ecf20Sopenharmony_ci#include <linux/mm.h> 578c2ecf20Sopenharmony_ci#include <linux/mman.h> 588c2ecf20Sopenharmony_ci#include <linux/percpu.h> 598c2ecf20Sopenharmony_ci#include <linux/slab.h> 608c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 618c2ecf20Sopenharmony_ci#include <linux/bvec.h> 628c2ecf20Sopenharmony_ci#include <linux/net.h> 638c2ecf20Sopenharmony_ci#include <net/sock.h> 648c2ecf20Sopenharmony_ci#include <net/af_unix.h> 658c2ecf20Sopenharmony_ci#include <net/scm.h> 668c2ecf20Sopenharmony_ci#include <linux/anon_inodes.h> 678c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 688c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 698c2ecf20Sopenharmony_ci#include <linux/nospec.h> 708c2ecf20Sopenharmony_ci#include <linux/sizes.h> 718c2ecf20Sopenharmony_ci#include <linux/hugetlb.h> 728c2ecf20Sopenharmony_ci#include <linux/highmem.h> 738c2ecf20Sopenharmony_ci#include <linux/namei.h> 748c2ecf20Sopenharmony_ci#include <linux/fsnotify.h> 758c2ecf20Sopenharmony_ci#include <linux/fadvise.h> 768c2ecf20Sopenharmony_ci#include <linux/eventpoll.h> 778c2ecf20Sopenharmony_ci#include <linux/splice.h> 788c2ecf20Sopenharmony_ci#include <linux/task_work.h> 798c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 808c2ecf20Sopenharmony_ci#include <linux/io_uring.h> 818c2ecf20Sopenharmony_ci#include <linux/tracehook.h> 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci#define CREATE_TRACE_POINTS 848c2ecf20Sopenharmony_ci#include <trace/events/io_uring.h> 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci#include <uapi/linux/io_uring.h> 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci#include "../fs/internal.h" 898c2ecf20Sopenharmony_ci#include "io-wq.h" 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci#define IORING_MAX_ENTRIES 32768 928c2ecf20Sopenharmony_ci#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) 938c2ecf20Sopenharmony_ci#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci/* only define max */ 968c2ecf20Sopenharmony_ci#define IORING_MAX_FIXED_FILES (1U << 15) 978c2ecf20Sopenharmony_ci#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ 988c2ecf20Sopenharmony_ci IORING_REGISTER_LAST + IORING_OP_LAST) 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) 1018c2ecf20Sopenharmony_ci#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) 1028c2ecf20Sopenharmony_ci#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci#define IORING_MAX_REG_BUFFERS (1U << 14) 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \ 1078c2ecf20Sopenharmony_ci IOSQE_IO_HARDLINK | IOSQE_ASYNC | \ 1088c2ecf20Sopenharmony_ci IOSQE_BUFFER_SELECT) 1098c2ecf20Sopenharmony_ci#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ 1108c2ecf20Sopenharmony_ci REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS) 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci#define IO_TCTX_REFS_CACHE_NR (1U << 10) 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cistruct io_uring { 1158c2ecf20Sopenharmony_ci u32 head ____cacheline_aligned_in_smp; 1168c2ecf20Sopenharmony_ci u32 tail ____cacheline_aligned_in_smp; 1178c2ecf20Sopenharmony_ci}; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci/* 1208c2ecf20Sopenharmony_ci * This data is shared with the application through the mmap at offsets 1218c2ecf20Sopenharmony_ci * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. 1228c2ecf20Sopenharmony_ci * 1238c2ecf20Sopenharmony_ci * The offsets to the member fields are published through struct 1248c2ecf20Sopenharmony_ci * io_sqring_offsets when calling io_uring_setup. 1258c2ecf20Sopenharmony_ci */ 1268c2ecf20Sopenharmony_cistruct io_rings { 1278c2ecf20Sopenharmony_ci /* 1288c2ecf20Sopenharmony_ci * Head and tail offsets into the ring; the offsets need to be 1298c2ecf20Sopenharmony_ci * masked to get valid indices. 1308c2ecf20Sopenharmony_ci * 1318c2ecf20Sopenharmony_ci * The kernel controls head of the sq ring and the tail of the cq ring, 1328c2ecf20Sopenharmony_ci * and the application controls tail of the sq ring and the head of the 1338c2ecf20Sopenharmony_ci * cq ring. 1348c2ecf20Sopenharmony_ci */ 1358c2ecf20Sopenharmony_ci struct io_uring sq, cq; 1368c2ecf20Sopenharmony_ci /* 1378c2ecf20Sopenharmony_ci * Bitmasks to apply to head and tail offsets (constant, equals 1388c2ecf20Sopenharmony_ci * ring_entries - 1) 1398c2ecf20Sopenharmony_ci */ 1408c2ecf20Sopenharmony_ci u32 sq_ring_mask, cq_ring_mask; 1418c2ecf20Sopenharmony_ci /* Ring sizes (constant, power of 2) */ 1428c2ecf20Sopenharmony_ci u32 sq_ring_entries, cq_ring_entries; 1438c2ecf20Sopenharmony_ci /* 1448c2ecf20Sopenharmony_ci * Number of invalid entries dropped by the kernel due to 1458c2ecf20Sopenharmony_ci * invalid index stored in array 1468c2ecf20Sopenharmony_ci * 1478c2ecf20Sopenharmony_ci * Written by the kernel, shouldn't be modified by the 1488c2ecf20Sopenharmony_ci * application (i.e. get number of "new events" by comparing to 1498c2ecf20Sopenharmony_ci * cached value). 1508c2ecf20Sopenharmony_ci * 1518c2ecf20Sopenharmony_ci * After a new SQ head value was read by the application this 1528c2ecf20Sopenharmony_ci * counter includes all submissions that were dropped reaching 1538c2ecf20Sopenharmony_ci * the new SQ head (and possibly more). 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_ci u32 sq_dropped; 1568c2ecf20Sopenharmony_ci /* 1578c2ecf20Sopenharmony_ci * Runtime SQ flags 1588c2ecf20Sopenharmony_ci * 1598c2ecf20Sopenharmony_ci * Written by the kernel, shouldn't be modified by the 1608c2ecf20Sopenharmony_ci * application. 1618c2ecf20Sopenharmony_ci * 1628c2ecf20Sopenharmony_ci * The application needs a full memory barrier before checking 1638c2ecf20Sopenharmony_ci * for IORING_SQ_NEED_WAKEUP after updating the sq tail. 1648c2ecf20Sopenharmony_ci */ 1658c2ecf20Sopenharmony_ci u32 sq_flags; 1668c2ecf20Sopenharmony_ci /* 1678c2ecf20Sopenharmony_ci * Runtime CQ flags 1688c2ecf20Sopenharmony_ci * 1698c2ecf20Sopenharmony_ci * Written by the application, shouldn't be modified by the 1708c2ecf20Sopenharmony_ci * kernel. 1718c2ecf20Sopenharmony_ci */ 1728c2ecf20Sopenharmony_ci u32 cq_flags; 1738c2ecf20Sopenharmony_ci /* 1748c2ecf20Sopenharmony_ci * Number of completion events lost because the queue was full; 1758c2ecf20Sopenharmony_ci * this should be avoided by the application by making sure 1768c2ecf20Sopenharmony_ci * there are not more requests pending than there is space in 1778c2ecf20Sopenharmony_ci * the completion queue. 1788c2ecf20Sopenharmony_ci * 1798c2ecf20Sopenharmony_ci * Written by the kernel, shouldn't be modified by the 1808c2ecf20Sopenharmony_ci * application (i.e. get number of "new events" by comparing to 1818c2ecf20Sopenharmony_ci * cached value). 1828c2ecf20Sopenharmony_ci * 1838c2ecf20Sopenharmony_ci * As completion events come in out of order this counter is not 1848c2ecf20Sopenharmony_ci * ordered with any other data. 1858c2ecf20Sopenharmony_ci */ 1868c2ecf20Sopenharmony_ci u32 cq_overflow; 1878c2ecf20Sopenharmony_ci /* 1888c2ecf20Sopenharmony_ci * Ring buffer of completion events. 1898c2ecf20Sopenharmony_ci * 1908c2ecf20Sopenharmony_ci * The kernel writes completion events fresh every time they are 1918c2ecf20Sopenharmony_ci * produced, so the application is allowed to modify pending 1928c2ecf20Sopenharmony_ci * entries. 1938c2ecf20Sopenharmony_ci */ 1948c2ecf20Sopenharmony_ci struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; 1958c2ecf20Sopenharmony_ci}; 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_cienum io_uring_cmd_flags { 1988c2ecf20Sopenharmony_ci IO_URING_F_NONBLOCK = 1, 1998c2ecf20Sopenharmony_ci IO_URING_F_COMPLETE_DEFER = 2, 2008c2ecf20Sopenharmony_ci}; 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_cistruct io_mapped_ubuf { 2038c2ecf20Sopenharmony_ci u64 ubuf; 2048c2ecf20Sopenharmony_ci u64 ubuf_end; 2058c2ecf20Sopenharmony_ci unsigned int nr_bvecs; 2068c2ecf20Sopenharmony_ci unsigned long acct_pages; 2078c2ecf20Sopenharmony_ci struct bio_vec bvec[]; 2088c2ecf20Sopenharmony_ci}; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_cistruct io_ring_ctx; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_cistruct io_overflow_cqe { 2138c2ecf20Sopenharmony_ci struct io_uring_cqe cqe; 2148c2ecf20Sopenharmony_ci struct list_head list; 2158c2ecf20Sopenharmony_ci}; 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_cistruct io_fixed_file { 2188c2ecf20Sopenharmony_ci /* file * with additional FFS_* flags */ 2198c2ecf20Sopenharmony_ci unsigned long file_ptr; 2208c2ecf20Sopenharmony_ci}; 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_cistruct io_rsrc_put { 2238c2ecf20Sopenharmony_ci struct list_head list; 2248c2ecf20Sopenharmony_ci u64 tag; 2258c2ecf20Sopenharmony_ci union { 2268c2ecf20Sopenharmony_ci void *rsrc; 2278c2ecf20Sopenharmony_ci struct file *file; 2288c2ecf20Sopenharmony_ci struct io_mapped_ubuf *buf; 2298c2ecf20Sopenharmony_ci }; 2308c2ecf20Sopenharmony_ci}; 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_cistruct io_file_table { 2338c2ecf20Sopenharmony_ci struct io_fixed_file *files; 2348c2ecf20Sopenharmony_ci}; 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_cistruct io_rsrc_node { 2378c2ecf20Sopenharmony_ci struct percpu_ref refs; 2388c2ecf20Sopenharmony_ci struct list_head node; 2398c2ecf20Sopenharmony_ci struct list_head rsrc_list; 2408c2ecf20Sopenharmony_ci struct io_rsrc_data *rsrc_data; 2418c2ecf20Sopenharmony_ci struct llist_node llist; 2428c2ecf20Sopenharmony_ci bool done; 2438c2ecf20Sopenharmony_ci}; 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_citypedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_cistruct io_rsrc_data { 2488c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci u64 **tags; 2518c2ecf20Sopenharmony_ci unsigned int nr; 2528c2ecf20Sopenharmony_ci rsrc_put_fn *do_put; 2538c2ecf20Sopenharmony_ci atomic_t refs; 2548c2ecf20Sopenharmony_ci struct completion done; 2558c2ecf20Sopenharmony_ci bool quiesce; 2568c2ecf20Sopenharmony_ci}; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_cistruct io_buffer { 2598c2ecf20Sopenharmony_ci struct list_head list; 2608c2ecf20Sopenharmony_ci __u64 addr; 2618c2ecf20Sopenharmony_ci __u32 len; 2628c2ecf20Sopenharmony_ci __u16 bid; 2638c2ecf20Sopenharmony_ci}; 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_cistruct io_restriction { 2668c2ecf20Sopenharmony_ci DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); 2678c2ecf20Sopenharmony_ci DECLARE_BITMAP(sqe_op, IORING_OP_LAST); 2688c2ecf20Sopenharmony_ci u8 sqe_flags_allowed; 2698c2ecf20Sopenharmony_ci u8 sqe_flags_required; 2708c2ecf20Sopenharmony_ci bool registered; 2718c2ecf20Sopenharmony_ci}; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_cienum { 2748c2ecf20Sopenharmony_ci IO_SQ_THREAD_SHOULD_STOP = 0, 2758c2ecf20Sopenharmony_ci IO_SQ_THREAD_SHOULD_PARK, 2768c2ecf20Sopenharmony_ci}; 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_cistruct io_sq_data { 2798c2ecf20Sopenharmony_ci refcount_t refs; 2808c2ecf20Sopenharmony_ci atomic_t park_pending; 2818c2ecf20Sopenharmony_ci struct mutex lock; 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci /* ctx's that are using this sqd */ 2848c2ecf20Sopenharmony_ci struct list_head ctx_list; 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci struct task_struct *thread; 2878c2ecf20Sopenharmony_ci struct wait_queue_head wait; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci unsigned sq_thread_idle; 2908c2ecf20Sopenharmony_ci int sq_cpu; 2918c2ecf20Sopenharmony_ci pid_t task_pid; 2928c2ecf20Sopenharmony_ci pid_t task_tgid; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci unsigned long state; 2958c2ecf20Sopenharmony_ci struct completion exited; 2968c2ecf20Sopenharmony_ci}; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci#define IO_COMPL_BATCH 32 2998c2ecf20Sopenharmony_ci#define IO_REQ_CACHE_SIZE 32 3008c2ecf20Sopenharmony_ci#define IO_REQ_ALLOC_BATCH 8 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_cistruct io_submit_link { 3038c2ecf20Sopenharmony_ci struct io_kiocb *head; 3048c2ecf20Sopenharmony_ci struct io_kiocb *last; 3058c2ecf20Sopenharmony_ci}; 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_cistruct io_submit_state { 3088c2ecf20Sopenharmony_ci struct blk_plug plug; 3098c2ecf20Sopenharmony_ci struct io_submit_link link; 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci /* 3128c2ecf20Sopenharmony_ci * io_kiocb alloc cache 3138c2ecf20Sopenharmony_ci */ 3148c2ecf20Sopenharmony_ci void *reqs[IO_REQ_CACHE_SIZE]; 3158c2ecf20Sopenharmony_ci unsigned int free_reqs; 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci bool plug_started; 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci /* 3208c2ecf20Sopenharmony_ci * Batch completion logic 3218c2ecf20Sopenharmony_ci */ 3228c2ecf20Sopenharmony_ci struct io_kiocb *compl_reqs[IO_COMPL_BATCH]; 3238c2ecf20Sopenharmony_ci unsigned int compl_nr; 3248c2ecf20Sopenharmony_ci /* inline/task_work completion list, under ->uring_lock */ 3258c2ecf20Sopenharmony_ci struct list_head free_list; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci unsigned int ios_left; 3288c2ecf20Sopenharmony_ci}; 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_cistruct io_ring_ctx { 3318c2ecf20Sopenharmony_ci /* const or read-mostly hot data */ 3328c2ecf20Sopenharmony_ci struct { 3338c2ecf20Sopenharmony_ci struct percpu_ref refs; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci struct io_rings *rings; 3368c2ecf20Sopenharmony_ci unsigned int flags; 3378c2ecf20Sopenharmony_ci unsigned int compat: 1; 3388c2ecf20Sopenharmony_ci unsigned int drain_next: 1; 3398c2ecf20Sopenharmony_ci unsigned int eventfd_async: 1; 3408c2ecf20Sopenharmony_ci unsigned int restricted: 1; 3418c2ecf20Sopenharmony_ci unsigned int off_timeout_used: 1; 3428c2ecf20Sopenharmony_ci unsigned int drain_active: 1; 3438c2ecf20Sopenharmony_ci } ____cacheline_aligned_in_smp; 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci /* submission data */ 3468c2ecf20Sopenharmony_ci struct { 3478c2ecf20Sopenharmony_ci struct mutex uring_lock; 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci /* 3508c2ecf20Sopenharmony_ci * Ring buffer of indices into array of io_uring_sqe, which is 3518c2ecf20Sopenharmony_ci * mmapped by the application using the IORING_OFF_SQES offset. 3528c2ecf20Sopenharmony_ci * 3538c2ecf20Sopenharmony_ci * This indirection could e.g. be used to assign fixed 3548c2ecf20Sopenharmony_ci * io_uring_sqe entries to operations and only submit them to 3558c2ecf20Sopenharmony_ci * the queue when needed. 3568c2ecf20Sopenharmony_ci * 3578c2ecf20Sopenharmony_ci * The kernel modifies neither the indices array nor the entries 3588c2ecf20Sopenharmony_ci * array. 3598c2ecf20Sopenharmony_ci */ 3608c2ecf20Sopenharmony_ci u32 *sq_array; 3618c2ecf20Sopenharmony_ci struct io_uring_sqe *sq_sqes; 3628c2ecf20Sopenharmony_ci unsigned cached_sq_head; 3638c2ecf20Sopenharmony_ci unsigned sq_entries; 3648c2ecf20Sopenharmony_ci struct list_head defer_list; 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci /* 3678c2ecf20Sopenharmony_ci * Fixed resources fast path, should be accessed only under 3688c2ecf20Sopenharmony_ci * uring_lock, and updated through io_uring_register(2) 3698c2ecf20Sopenharmony_ci */ 3708c2ecf20Sopenharmony_ci struct io_rsrc_node *rsrc_node; 3718c2ecf20Sopenharmony_ci struct io_file_table file_table; 3728c2ecf20Sopenharmony_ci unsigned nr_user_files; 3738c2ecf20Sopenharmony_ci unsigned nr_user_bufs; 3748c2ecf20Sopenharmony_ci struct io_mapped_ubuf **user_bufs; 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci struct io_submit_state submit_state; 3778c2ecf20Sopenharmony_ci struct list_head timeout_list; 3788c2ecf20Sopenharmony_ci struct list_head ltimeout_list; 3798c2ecf20Sopenharmony_ci struct list_head cq_overflow_list; 3808c2ecf20Sopenharmony_ci struct xarray io_buffers; 3818c2ecf20Sopenharmony_ci struct xarray personalities; 3828c2ecf20Sopenharmony_ci u32 pers_next; 3838c2ecf20Sopenharmony_ci unsigned sq_thread_idle; 3848c2ecf20Sopenharmony_ci } ____cacheline_aligned_in_smp; 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci /* IRQ completion list, under ->completion_lock */ 3878c2ecf20Sopenharmony_ci struct list_head locked_free_list; 3888c2ecf20Sopenharmony_ci unsigned int locked_free_nr; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci const struct cred *sq_creds; /* cred used for __io_sq_thread() */ 3918c2ecf20Sopenharmony_ci struct io_sq_data *sq_data; /* if using sq thread polling */ 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_ci struct wait_queue_head sqo_sq_wait; 3948c2ecf20Sopenharmony_ci struct list_head sqd_list; 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci unsigned long check_cq_overflow; 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci struct { 3998c2ecf20Sopenharmony_ci unsigned cached_cq_tail; 4008c2ecf20Sopenharmony_ci unsigned cq_entries; 4018c2ecf20Sopenharmony_ci struct eventfd_ctx *cq_ev_fd; 4028c2ecf20Sopenharmony_ci struct wait_queue_head poll_wait; 4038c2ecf20Sopenharmony_ci struct wait_queue_head cq_wait; 4048c2ecf20Sopenharmony_ci unsigned cq_extra; 4058c2ecf20Sopenharmony_ci atomic_t cq_timeouts; 4068c2ecf20Sopenharmony_ci unsigned cq_last_tm_flush; 4078c2ecf20Sopenharmony_ci } ____cacheline_aligned_in_smp; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci struct { 4108c2ecf20Sopenharmony_ci spinlock_t completion_lock; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci spinlock_t timeout_lock; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci /* 4158c2ecf20Sopenharmony_ci * ->iopoll_list is protected by the ctx->uring_lock for 4168c2ecf20Sopenharmony_ci * io_uring instances that don't use IORING_SETUP_SQPOLL. 4178c2ecf20Sopenharmony_ci * For SQPOLL, only the single threaded io_sq_thread() will 4188c2ecf20Sopenharmony_ci * manipulate the list, hence no extra locking is needed there. 4198c2ecf20Sopenharmony_ci */ 4208c2ecf20Sopenharmony_ci struct list_head iopoll_list; 4218c2ecf20Sopenharmony_ci struct hlist_head *cancel_hash; 4228c2ecf20Sopenharmony_ci unsigned cancel_hash_bits; 4238c2ecf20Sopenharmony_ci bool poll_multi_queue; 4248c2ecf20Sopenharmony_ci } ____cacheline_aligned_in_smp; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci struct io_restriction restrictions; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci /* slow path rsrc auxilary data, used by update/register */ 4298c2ecf20Sopenharmony_ci struct { 4308c2ecf20Sopenharmony_ci struct io_rsrc_node *rsrc_backup_node; 4318c2ecf20Sopenharmony_ci struct io_mapped_ubuf *dummy_ubuf; 4328c2ecf20Sopenharmony_ci struct io_rsrc_data *file_data; 4338c2ecf20Sopenharmony_ci struct io_rsrc_data *buf_data; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci struct delayed_work rsrc_put_work; 4368c2ecf20Sopenharmony_ci struct llist_head rsrc_put_llist; 4378c2ecf20Sopenharmony_ci struct list_head rsrc_ref_list; 4388c2ecf20Sopenharmony_ci spinlock_t rsrc_ref_lock; 4398c2ecf20Sopenharmony_ci }; 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci /* Keep this last, we don't need it for the fast path */ 4428c2ecf20Sopenharmony_ci struct { 4438c2ecf20Sopenharmony_ci #if defined(CONFIG_UNIX) 4448c2ecf20Sopenharmony_ci struct socket *ring_sock; 4458c2ecf20Sopenharmony_ci #endif 4468c2ecf20Sopenharmony_ci /* hashed buffered write serialization */ 4478c2ecf20Sopenharmony_ci struct io_wq_hash *hash_map; 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci /* Only used for accounting purposes */ 4508c2ecf20Sopenharmony_ci struct user_struct *user; 4518c2ecf20Sopenharmony_ci struct mm_struct *mm_account; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci /* ctx exit and cancelation */ 4548c2ecf20Sopenharmony_ci struct llist_head fallback_llist; 4558c2ecf20Sopenharmony_ci struct delayed_work fallback_work; 4568c2ecf20Sopenharmony_ci struct work_struct exit_work; 4578c2ecf20Sopenharmony_ci struct list_head tctx_list; 4588c2ecf20Sopenharmony_ci struct completion ref_comp; 4598c2ecf20Sopenharmony_ci u32 iowq_limits[2]; 4608c2ecf20Sopenharmony_ci bool iowq_limits_set; 4618c2ecf20Sopenharmony_ci }; 4628c2ecf20Sopenharmony_ci}; 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_cistruct io_uring_task { 4658c2ecf20Sopenharmony_ci /* submission side */ 4668c2ecf20Sopenharmony_ci int cached_refs; 4678c2ecf20Sopenharmony_ci struct xarray xa; 4688c2ecf20Sopenharmony_ci struct wait_queue_head wait; 4698c2ecf20Sopenharmony_ci const struct io_ring_ctx *last; 4708c2ecf20Sopenharmony_ci struct io_wq *io_wq; 4718c2ecf20Sopenharmony_ci struct percpu_counter inflight; 4728c2ecf20Sopenharmony_ci atomic_t inflight_tracked; 4738c2ecf20Sopenharmony_ci atomic_t in_idle; 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci spinlock_t task_lock; 4768c2ecf20Sopenharmony_ci struct io_wq_work_list task_list; 4778c2ecf20Sopenharmony_ci struct callback_head task_work; 4788c2ecf20Sopenharmony_ci bool task_running; 4798c2ecf20Sopenharmony_ci}; 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci/* 4828c2ecf20Sopenharmony_ci * First field must be the file pointer in all the 4838c2ecf20Sopenharmony_ci * iocb unions! See also 'struct kiocb' in <linux/fs.h> 4848c2ecf20Sopenharmony_ci */ 4858c2ecf20Sopenharmony_cistruct io_poll_iocb { 4868c2ecf20Sopenharmony_ci struct file *file; 4878c2ecf20Sopenharmony_ci struct wait_queue_head *head; 4888c2ecf20Sopenharmony_ci __poll_t events; 4898c2ecf20Sopenharmony_ci int retries; 4908c2ecf20Sopenharmony_ci struct wait_queue_entry wait; 4918c2ecf20Sopenharmony_ci}; 4928c2ecf20Sopenharmony_ci 4938c2ecf20Sopenharmony_cistruct io_poll_update { 4948c2ecf20Sopenharmony_ci struct file *file; 4958c2ecf20Sopenharmony_ci u64 old_user_data; 4968c2ecf20Sopenharmony_ci u64 new_user_data; 4978c2ecf20Sopenharmony_ci __poll_t events; 4988c2ecf20Sopenharmony_ci bool update_events; 4998c2ecf20Sopenharmony_ci bool update_user_data; 5008c2ecf20Sopenharmony_ci}; 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_cistruct io_close { 5038c2ecf20Sopenharmony_ci struct file *file; 5048c2ecf20Sopenharmony_ci int fd; 5058c2ecf20Sopenharmony_ci u32 file_slot; 5068c2ecf20Sopenharmony_ci}; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_cistruct io_timeout_data { 5098c2ecf20Sopenharmony_ci struct io_kiocb *req; 5108c2ecf20Sopenharmony_ci struct hrtimer timer; 5118c2ecf20Sopenharmony_ci struct timespec64 ts; 5128c2ecf20Sopenharmony_ci enum hrtimer_mode mode; 5138c2ecf20Sopenharmony_ci u32 flags; 5148c2ecf20Sopenharmony_ci}; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_cistruct io_accept { 5178c2ecf20Sopenharmony_ci struct file *file; 5188c2ecf20Sopenharmony_ci struct sockaddr __user *addr; 5198c2ecf20Sopenharmony_ci int __user *addr_len; 5208c2ecf20Sopenharmony_ci int flags; 5218c2ecf20Sopenharmony_ci u32 file_slot; 5228c2ecf20Sopenharmony_ci unsigned long nofile; 5238c2ecf20Sopenharmony_ci}; 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_cistruct io_sync { 5268c2ecf20Sopenharmony_ci struct file *file; 5278c2ecf20Sopenharmony_ci loff_t len; 5288c2ecf20Sopenharmony_ci loff_t off; 5298c2ecf20Sopenharmony_ci int flags; 5308c2ecf20Sopenharmony_ci int mode; 5318c2ecf20Sopenharmony_ci}; 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_cistruct io_cancel { 5348c2ecf20Sopenharmony_ci struct file *file; 5358c2ecf20Sopenharmony_ci u64 addr; 5368c2ecf20Sopenharmony_ci}; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_cistruct io_timeout { 5398c2ecf20Sopenharmony_ci struct file *file; 5408c2ecf20Sopenharmony_ci u32 off; 5418c2ecf20Sopenharmony_ci u32 target_seq; 5428c2ecf20Sopenharmony_ci struct list_head list; 5438c2ecf20Sopenharmony_ci /* head of the link, used by linked timeouts only */ 5448c2ecf20Sopenharmony_ci struct io_kiocb *head; 5458c2ecf20Sopenharmony_ci /* for linked completions */ 5468c2ecf20Sopenharmony_ci struct io_kiocb *prev; 5478c2ecf20Sopenharmony_ci}; 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_cistruct io_timeout_rem { 5508c2ecf20Sopenharmony_ci struct file *file; 5518c2ecf20Sopenharmony_ci u64 addr; 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci /* timeout update */ 5548c2ecf20Sopenharmony_ci struct timespec64 ts; 5558c2ecf20Sopenharmony_ci u32 flags; 5568c2ecf20Sopenharmony_ci bool ltimeout; 5578c2ecf20Sopenharmony_ci}; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_cistruct io_rw { 5608c2ecf20Sopenharmony_ci /* NOTE: kiocb has the file as the first member, so don't do it here */ 5618c2ecf20Sopenharmony_ci struct kiocb kiocb; 5628c2ecf20Sopenharmony_ci u64 addr; 5638c2ecf20Sopenharmony_ci u64 len; 5648c2ecf20Sopenharmony_ci}; 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_cistruct io_connect { 5678c2ecf20Sopenharmony_ci struct file *file; 5688c2ecf20Sopenharmony_ci struct sockaddr __user *addr; 5698c2ecf20Sopenharmony_ci int addr_len; 5708c2ecf20Sopenharmony_ci}; 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_cistruct io_sr_msg { 5738c2ecf20Sopenharmony_ci struct file *file; 5748c2ecf20Sopenharmony_ci union { 5758c2ecf20Sopenharmony_ci struct compat_msghdr __user *umsg_compat; 5768c2ecf20Sopenharmony_ci struct user_msghdr __user *umsg; 5778c2ecf20Sopenharmony_ci void __user *buf; 5788c2ecf20Sopenharmony_ci }; 5798c2ecf20Sopenharmony_ci int msg_flags; 5808c2ecf20Sopenharmony_ci int bgid; 5818c2ecf20Sopenharmony_ci size_t len; 5828c2ecf20Sopenharmony_ci size_t done_io; 5838c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 5848c2ecf20Sopenharmony_ci void __user *msg_control; 5858c2ecf20Sopenharmony_ci}; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistruct io_open { 5888c2ecf20Sopenharmony_ci struct file *file; 5898c2ecf20Sopenharmony_ci int dfd; 5908c2ecf20Sopenharmony_ci u32 file_slot; 5918c2ecf20Sopenharmony_ci struct filename *filename; 5928c2ecf20Sopenharmony_ci struct open_how how; 5938c2ecf20Sopenharmony_ci unsigned long nofile; 5948c2ecf20Sopenharmony_ci}; 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_cistruct io_rsrc_update { 5978c2ecf20Sopenharmony_ci struct file *file; 5988c2ecf20Sopenharmony_ci u64 arg; 5998c2ecf20Sopenharmony_ci u32 nr_args; 6008c2ecf20Sopenharmony_ci u32 offset; 6018c2ecf20Sopenharmony_ci}; 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_cistruct io_fadvise { 6048c2ecf20Sopenharmony_ci struct file *file; 6058c2ecf20Sopenharmony_ci u64 offset; 6068c2ecf20Sopenharmony_ci u32 len; 6078c2ecf20Sopenharmony_ci u32 advice; 6088c2ecf20Sopenharmony_ci}; 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_cistruct io_madvise { 6118c2ecf20Sopenharmony_ci struct file *file; 6128c2ecf20Sopenharmony_ci u64 addr; 6138c2ecf20Sopenharmony_ci u32 len; 6148c2ecf20Sopenharmony_ci u32 advice; 6158c2ecf20Sopenharmony_ci}; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_cistruct io_epoll { 6188c2ecf20Sopenharmony_ci struct file *file; 6198c2ecf20Sopenharmony_ci int epfd; 6208c2ecf20Sopenharmony_ci int op; 6218c2ecf20Sopenharmony_ci int fd; 6228c2ecf20Sopenharmony_ci struct epoll_event event; 6238c2ecf20Sopenharmony_ci}; 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_cistruct io_splice { 6268c2ecf20Sopenharmony_ci struct file *file_out; 6278c2ecf20Sopenharmony_ci loff_t off_out; 6288c2ecf20Sopenharmony_ci loff_t off_in; 6298c2ecf20Sopenharmony_ci u64 len; 6308c2ecf20Sopenharmony_ci int splice_fd_in; 6318c2ecf20Sopenharmony_ci unsigned int flags; 6328c2ecf20Sopenharmony_ci}; 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_cistruct io_provide_buf { 6358c2ecf20Sopenharmony_ci struct file *file; 6368c2ecf20Sopenharmony_ci __u64 addr; 6378c2ecf20Sopenharmony_ci __u32 len; 6388c2ecf20Sopenharmony_ci __u32 bgid; 6398c2ecf20Sopenharmony_ci __u16 nbufs; 6408c2ecf20Sopenharmony_ci __u16 bid; 6418c2ecf20Sopenharmony_ci}; 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_cistruct io_statx { 6448c2ecf20Sopenharmony_ci struct file *file; 6458c2ecf20Sopenharmony_ci int dfd; 6468c2ecf20Sopenharmony_ci unsigned int mask; 6478c2ecf20Sopenharmony_ci unsigned int flags; 6488c2ecf20Sopenharmony_ci const char __user *filename; 6498c2ecf20Sopenharmony_ci struct statx __user *buffer; 6508c2ecf20Sopenharmony_ci}; 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_cistruct io_shutdown { 6538c2ecf20Sopenharmony_ci struct file *file; 6548c2ecf20Sopenharmony_ci int how; 6558c2ecf20Sopenharmony_ci}; 6568c2ecf20Sopenharmony_ci 6578c2ecf20Sopenharmony_cistruct io_rename { 6588c2ecf20Sopenharmony_ci struct file *file; 6598c2ecf20Sopenharmony_ci int old_dfd; 6608c2ecf20Sopenharmony_ci int new_dfd; 6618c2ecf20Sopenharmony_ci struct filename *oldpath; 6628c2ecf20Sopenharmony_ci struct filename *newpath; 6638c2ecf20Sopenharmony_ci int flags; 6648c2ecf20Sopenharmony_ci}; 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_cistruct io_unlink { 6678c2ecf20Sopenharmony_ci struct file *file; 6688c2ecf20Sopenharmony_ci int dfd; 6698c2ecf20Sopenharmony_ci int flags; 6708c2ecf20Sopenharmony_ci struct filename *filename; 6718c2ecf20Sopenharmony_ci}; 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_cistruct io_mkdir { 6748c2ecf20Sopenharmony_ci struct file *file; 6758c2ecf20Sopenharmony_ci int dfd; 6768c2ecf20Sopenharmony_ci umode_t mode; 6778c2ecf20Sopenharmony_ci struct filename *filename; 6788c2ecf20Sopenharmony_ci}; 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_cistruct io_symlink { 6818c2ecf20Sopenharmony_ci struct file *file; 6828c2ecf20Sopenharmony_ci int new_dfd; 6838c2ecf20Sopenharmony_ci struct filename *oldpath; 6848c2ecf20Sopenharmony_ci struct filename *newpath; 6858c2ecf20Sopenharmony_ci}; 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_cistruct io_hardlink { 6888c2ecf20Sopenharmony_ci struct file *file; 6898c2ecf20Sopenharmony_ci int old_dfd; 6908c2ecf20Sopenharmony_ci int new_dfd; 6918c2ecf20Sopenharmony_ci struct filename *oldpath; 6928c2ecf20Sopenharmony_ci struct filename *newpath; 6938c2ecf20Sopenharmony_ci int flags; 6948c2ecf20Sopenharmony_ci}; 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_cistruct io_completion { 6978c2ecf20Sopenharmony_ci struct file *file; 6988c2ecf20Sopenharmony_ci u32 cflags; 6998c2ecf20Sopenharmony_ci}; 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_cistruct io_async_connect { 7028c2ecf20Sopenharmony_ci struct sockaddr_storage address; 7038c2ecf20Sopenharmony_ci}; 7048c2ecf20Sopenharmony_ci 7058c2ecf20Sopenharmony_cistruct io_async_msghdr { 7068c2ecf20Sopenharmony_ci struct iovec fast_iov[UIO_FASTIOV]; 7078c2ecf20Sopenharmony_ci /* points to an allocated iov, if NULL we use fast_iov instead */ 7088c2ecf20Sopenharmony_ci struct iovec *free_iov; 7098c2ecf20Sopenharmony_ci struct sockaddr __user *uaddr; 7108c2ecf20Sopenharmony_ci struct msghdr msg; 7118c2ecf20Sopenharmony_ci struct sockaddr_storage addr; 7128c2ecf20Sopenharmony_ci}; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistruct io_async_rw { 7158c2ecf20Sopenharmony_ci struct iovec fast_iov[UIO_FASTIOV]; 7168c2ecf20Sopenharmony_ci const struct iovec *free_iovec; 7178c2ecf20Sopenharmony_ci struct iov_iter iter; 7188c2ecf20Sopenharmony_ci struct iov_iter_state iter_state; 7198c2ecf20Sopenharmony_ci size_t bytes_done; 7208c2ecf20Sopenharmony_ci struct wait_page_queue wpq; 7218c2ecf20Sopenharmony_ci}; 7228c2ecf20Sopenharmony_ci 7238c2ecf20Sopenharmony_cienum { 7248c2ecf20Sopenharmony_ci REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, 7258c2ecf20Sopenharmony_ci REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, 7268c2ecf20Sopenharmony_ci REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, 7278c2ecf20Sopenharmony_ci REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, 7288c2ecf20Sopenharmony_ci REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, 7298c2ecf20Sopenharmony_ci REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci /* first byte is taken by user flags, shift it to not overlap */ 7328c2ecf20Sopenharmony_ci REQ_F_FAIL_BIT = 8, 7338c2ecf20Sopenharmony_ci REQ_F_INFLIGHT_BIT, 7348c2ecf20Sopenharmony_ci REQ_F_CUR_POS_BIT, 7358c2ecf20Sopenharmony_ci REQ_F_NOWAIT_BIT, 7368c2ecf20Sopenharmony_ci REQ_F_LINK_TIMEOUT_BIT, 7378c2ecf20Sopenharmony_ci REQ_F_NEED_CLEANUP_BIT, 7388c2ecf20Sopenharmony_ci REQ_F_POLLED_BIT, 7398c2ecf20Sopenharmony_ci REQ_F_BUFFER_SELECTED_BIT, 7408c2ecf20Sopenharmony_ci REQ_F_COMPLETE_INLINE_BIT, 7418c2ecf20Sopenharmony_ci REQ_F_REISSUE_BIT, 7428c2ecf20Sopenharmony_ci REQ_F_CREDS_BIT, 7438c2ecf20Sopenharmony_ci REQ_F_REFCOUNT_BIT, 7448c2ecf20Sopenharmony_ci REQ_F_ARM_LTIMEOUT_BIT, 7458c2ecf20Sopenharmony_ci REQ_F_PARTIAL_IO_BIT, 7468c2ecf20Sopenharmony_ci /* keep async read/write and isreg together and in order */ 7478c2ecf20Sopenharmony_ci REQ_F_NOWAIT_READ_BIT, 7488c2ecf20Sopenharmony_ci REQ_F_NOWAIT_WRITE_BIT, 7498c2ecf20Sopenharmony_ci REQ_F_ISREG_BIT, 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ci /* not a real bit, just to check we're not overflowing the space */ 7528c2ecf20Sopenharmony_ci __REQ_F_LAST_BIT, 7538c2ecf20Sopenharmony_ci}; 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_cienum { 7568c2ecf20Sopenharmony_ci /* ctx owns file */ 7578c2ecf20Sopenharmony_ci REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), 7588c2ecf20Sopenharmony_ci /* drain existing IO first */ 7598c2ecf20Sopenharmony_ci REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), 7608c2ecf20Sopenharmony_ci /* linked sqes */ 7618c2ecf20Sopenharmony_ci REQ_F_LINK = BIT(REQ_F_LINK_BIT), 7628c2ecf20Sopenharmony_ci /* doesn't sever on completion < 0 */ 7638c2ecf20Sopenharmony_ci REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), 7648c2ecf20Sopenharmony_ci /* IOSQE_ASYNC */ 7658c2ecf20Sopenharmony_ci REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), 7668c2ecf20Sopenharmony_ci /* IOSQE_BUFFER_SELECT */ 7678c2ecf20Sopenharmony_ci REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), 7688c2ecf20Sopenharmony_ci 7698c2ecf20Sopenharmony_ci /* fail rest of links */ 7708c2ecf20Sopenharmony_ci REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), 7718c2ecf20Sopenharmony_ci /* on inflight list, should be cancelled and waited on exit reliably */ 7728c2ecf20Sopenharmony_ci REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), 7738c2ecf20Sopenharmony_ci /* read/write uses file position */ 7748c2ecf20Sopenharmony_ci REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), 7758c2ecf20Sopenharmony_ci /* must not punt to workers */ 7768c2ecf20Sopenharmony_ci REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), 7778c2ecf20Sopenharmony_ci /* has or had linked timeout */ 7788c2ecf20Sopenharmony_ci REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), 7798c2ecf20Sopenharmony_ci /* needs cleanup */ 7808c2ecf20Sopenharmony_ci REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), 7818c2ecf20Sopenharmony_ci /* already went through poll handler */ 7828c2ecf20Sopenharmony_ci REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), 7838c2ecf20Sopenharmony_ci /* buffer already selected */ 7848c2ecf20Sopenharmony_ci REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), 7858c2ecf20Sopenharmony_ci /* completion is deferred through io_comp_state */ 7868c2ecf20Sopenharmony_ci REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), 7878c2ecf20Sopenharmony_ci /* caller should reissue async */ 7888c2ecf20Sopenharmony_ci REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), 7898c2ecf20Sopenharmony_ci /* supports async reads */ 7908c2ecf20Sopenharmony_ci REQ_F_NOWAIT_READ = BIT(REQ_F_NOWAIT_READ_BIT), 7918c2ecf20Sopenharmony_ci /* supports async writes */ 7928c2ecf20Sopenharmony_ci REQ_F_NOWAIT_WRITE = BIT(REQ_F_NOWAIT_WRITE_BIT), 7938c2ecf20Sopenharmony_ci /* regular file */ 7948c2ecf20Sopenharmony_ci REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), 7958c2ecf20Sopenharmony_ci /* has creds assigned */ 7968c2ecf20Sopenharmony_ci REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), 7978c2ecf20Sopenharmony_ci /* skip refcounting if not set */ 7988c2ecf20Sopenharmony_ci REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), 7998c2ecf20Sopenharmony_ci /* there is a linked timeout that has to be armed */ 8008c2ecf20Sopenharmony_ci REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), 8018c2ecf20Sopenharmony_ci /* request has already done partial IO */ 8028c2ecf20Sopenharmony_ci REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), 8038c2ecf20Sopenharmony_ci}; 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_cistruct async_poll { 8068c2ecf20Sopenharmony_ci struct io_poll_iocb poll; 8078c2ecf20Sopenharmony_ci struct io_poll_iocb *double_poll; 8088c2ecf20Sopenharmony_ci}; 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_citypedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_cistruct io_task_work { 8138c2ecf20Sopenharmony_ci union { 8148c2ecf20Sopenharmony_ci struct io_wq_work_node node; 8158c2ecf20Sopenharmony_ci struct llist_node fallback_node; 8168c2ecf20Sopenharmony_ci }; 8178c2ecf20Sopenharmony_ci io_req_tw_func_t func; 8188c2ecf20Sopenharmony_ci}; 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_cienum { 8218c2ecf20Sopenharmony_ci IORING_RSRC_FILE = 0, 8228c2ecf20Sopenharmony_ci IORING_RSRC_BUFFER = 1, 8238c2ecf20Sopenharmony_ci}; 8248c2ecf20Sopenharmony_ci 8258c2ecf20Sopenharmony_ci/* 8268c2ecf20Sopenharmony_ci * NOTE! Each of the iocb union members has the file pointer 8278c2ecf20Sopenharmony_ci * as the first entry in their struct definition. So you can 8288c2ecf20Sopenharmony_ci * access the file pointer through any of the sub-structs, 8298c2ecf20Sopenharmony_ci * or directly as just 'ki_filp' in this struct. 8308c2ecf20Sopenharmony_ci */ 8318c2ecf20Sopenharmony_cistruct io_kiocb { 8328c2ecf20Sopenharmony_ci union { 8338c2ecf20Sopenharmony_ci struct file *file; 8348c2ecf20Sopenharmony_ci struct io_rw rw; 8358c2ecf20Sopenharmony_ci struct io_poll_iocb poll; 8368c2ecf20Sopenharmony_ci struct io_poll_update poll_update; 8378c2ecf20Sopenharmony_ci struct io_accept accept; 8388c2ecf20Sopenharmony_ci struct io_sync sync; 8398c2ecf20Sopenharmony_ci struct io_cancel cancel; 8408c2ecf20Sopenharmony_ci struct io_timeout timeout; 8418c2ecf20Sopenharmony_ci struct io_timeout_rem timeout_rem; 8428c2ecf20Sopenharmony_ci struct io_connect connect; 8438c2ecf20Sopenharmony_ci struct io_sr_msg sr_msg; 8448c2ecf20Sopenharmony_ci struct io_open open; 8458c2ecf20Sopenharmony_ci struct io_close close; 8468c2ecf20Sopenharmony_ci struct io_rsrc_update rsrc_update; 8478c2ecf20Sopenharmony_ci struct io_fadvise fadvise; 8488c2ecf20Sopenharmony_ci struct io_madvise madvise; 8498c2ecf20Sopenharmony_ci struct io_epoll epoll; 8508c2ecf20Sopenharmony_ci struct io_splice splice; 8518c2ecf20Sopenharmony_ci struct io_provide_buf pbuf; 8528c2ecf20Sopenharmony_ci struct io_statx statx; 8538c2ecf20Sopenharmony_ci struct io_shutdown shutdown; 8548c2ecf20Sopenharmony_ci struct io_rename rename; 8558c2ecf20Sopenharmony_ci struct io_unlink unlink; 8568c2ecf20Sopenharmony_ci struct io_mkdir mkdir; 8578c2ecf20Sopenharmony_ci struct io_symlink symlink; 8588c2ecf20Sopenharmony_ci struct io_hardlink hardlink; 8598c2ecf20Sopenharmony_ci /* use only after cleaning per-op data, see io_clean_op() */ 8608c2ecf20Sopenharmony_ci struct io_completion compl; 8618c2ecf20Sopenharmony_ci }; 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci /* opcode allocated if it needs to store data for async defer */ 8648c2ecf20Sopenharmony_ci void *async_data; 8658c2ecf20Sopenharmony_ci u8 opcode; 8668c2ecf20Sopenharmony_ci /* polled IO has completed */ 8678c2ecf20Sopenharmony_ci u8 iopoll_completed; 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_ci u16 buf_index; 8708c2ecf20Sopenharmony_ci u32 result; 8718c2ecf20Sopenharmony_ci 8728c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 8738c2ecf20Sopenharmony_ci unsigned int flags; 8748c2ecf20Sopenharmony_ci atomic_t refs; 8758c2ecf20Sopenharmony_ci struct task_struct *task; 8768c2ecf20Sopenharmony_ci u64 user_data; 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci struct io_kiocb *link; 8798c2ecf20Sopenharmony_ci struct percpu_ref *fixed_rsrc_refs; 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci /* used with ctx->iopoll_list with reads/writes */ 8828c2ecf20Sopenharmony_ci struct list_head inflight_entry; 8838c2ecf20Sopenharmony_ci struct io_task_work io_task_work; 8848c2ecf20Sopenharmony_ci /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ 8858c2ecf20Sopenharmony_ci struct hlist_node hash_node; 8868c2ecf20Sopenharmony_ci struct async_poll *apoll; 8878c2ecf20Sopenharmony_ci struct io_wq_work work; 8888c2ecf20Sopenharmony_ci const struct cred *creds; 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_ci /* store used ubuf, so we can prevent reloading */ 8918c2ecf20Sopenharmony_ci struct io_mapped_ubuf *imu; 8928c2ecf20Sopenharmony_ci /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ 8938c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 8948c2ecf20Sopenharmony_ci atomic_t poll_refs; 8958c2ecf20Sopenharmony_ci}; 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_cistruct io_tctx_node { 8988c2ecf20Sopenharmony_ci struct list_head ctx_node; 8998c2ecf20Sopenharmony_ci struct task_struct *task; 9008c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 9018c2ecf20Sopenharmony_ci}; 9028c2ecf20Sopenharmony_ci 9038c2ecf20Sopenharmony_cistruct io_defer_entry { 9048c2ecf20Sopenharmony_ci struct list_head list; 9058c2ecf20Sopenharmony_ci struct io_kiocb *req; 9068c2ecf20Sopenharmony_ci u32 seq; 9078c2ecf20Sopenharmony_ci}; 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_cistruct io_op_def { 9108c2ecf20Sopenharmony_ci /* needs req->file assigned */ 9118c2ecf20Sopenharmony_ci unsigned needs_file : 1; 9128c2ecf20Sopenharmony_ci /* hash wq insertion if file is a regular file */ 9138c2ecf20Sopenharmony_ci unsigned hash_reg_file : 1; 9148c2ecf20Sopenharmony_ci /* unbound wq insertion if file is a non-regular file */ 9158c2ecf20Sopenharmony_ci unsigned unbound_nonreg_file : 1; 9168c2ecf20Sopenharmony_ci /* opcode is not supported by this kernel */ 9178c2ecf20Sopenharmony_ci unsigned not_supported : 1; 9188c2ecf20Sopenharmony_ci /* set if opcode supports polled "wait" */ 9198c2ecf20Sopenharmony_ci unsigned pollin : 1; 9208c2ecf20Sopenharmony_ci unsigned pollout : 1; 9218c2ecf20Sopenharmony_ci /* op supports buffer selection */ 9228c2ecf20Sopenharmony_ci unsigned buffer_select : 1; 9238c2ecf20Sopenharmony_ci /* do prep async if is going to be punted */ 9248c2ecf20Sopenharmony_ci unsigned needs_async_setup : 1; 9258c2ecf20Sopenharmony_ci /* should block plug */ 9268c2ecf20Sopenharmony_ci unsigned plug : 1; 9278c2ecf20Sopenharmony_ci /* size of async data needed, if any */ 9288c2ecf20Sopenharmony_ci unsigned short async_size; 9298c2ecf20Sopenharmony_ci}; 9308c2ecf20Sopenharmony_ci 9318c2ecf20Sopenharmony_cistatic const struct io_op_def io_op_defs[] = { 9328c2ecf20Sopenharmony_ci [IORING_OP_NOP] = {}, 9338c2ecf20Sopenharmony_ci [IORING_OP_READV] = { 9348c2ecf20Sopenharmony_ci .needs_file = 1, 9358c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9368c2ecf20Sopenharmony_ci .pollin = 1, 9378c2ecf20Sopenharmony_ci .buffer_select = 1, 9388c2ecf20Sopenharmony_ci .needs_async_setup = 1, 9398c2ecf20Sopenharmony_ci .plug = 1, 9408c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_rw), 9418c2ecf20Sopenharmony_ci }, 9428c2ecf20Sopenharmony_ci [IORING_OP_WRITEV] = { 9438c2ecf20Sopenharmony_ci .needs_file = 1, 9448c2ecf20Sopenharmony_ci .hash_reg_file = 1, 9458c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9468c2ecf20Sopenharmony_ci .pollout = 1, 9478c2ecf20Sopenharmony_ci .needs_async_setup = 1, 9488c2ecf20Sopenharmony_ci .plug = 1, 9498c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_rw), 9508c2ecf20Sopenharmony_ci }, 9518c2ecf20Sopenharmony_ci [IORING_OP_FSYNC] = { 9528c2ecf20Sopenharmony_ci .needs_file = 1, 9538c2ecf20Sopenharmony_ci }, 9548c2ecf20Sopenharmony_ci [IORING_OP_READ_FIXED] = { 9558c2ecf20Sopenharmony_ci .needs_file = 1, 9568c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9578c2ecf20Sopenharmony_ci .pollin = 1, 9588c2ecf20Sopenharmony_ci .plug = 1, 9598c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_rw), 9608c2ecf20Sopenharmony_ci }, 9618c2ecf20Sopenharmony_ci [IORING_OP_WRITE_FIXED] = { 9628c2ecf20Sopenharmony_ci .needs_file = 1, 9638c2ecf20Sopenharmony_ci .hash_reg_file = 1, 9648c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9658c2ecf20Sopenharmony_ci .pollout = 1, 9668c2ecf20Sopenharmony_ci .plug = 1, 9678c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_rw), 9688c2ecf20Sopenharmony_ci }, 9698c2ecf20Sopenharmony_ci [IORING_OP_POLL_ADD] = { 9708c2ecf20Sopenharmony_ci .needs_file = 1, 9718c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9728c2ecf20Sopenharmony_ci }, 9738c2ecf20Sopenharmony_ci [IORING_OP_POLL_REMOVE] = {}, 9748c2ecf20Sopenharmony_ci [IORING_OP_SYNC_FILE_RANGE] = { 9758c2ecf20Sopenharmony_ci .needs_file = 1, 9768c2ecf20Sopenharmony_ci }, 9778c2ecf20Sopenharmony_ci [IORING_OP_SENDMSG] = { 9788c2ecf20Sopenharmony_ci .needs_file = 1, 9798c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9808c2ecf20Sopenharmony_ci .pollout = 1, 9818c2ecf20Sopenharmony_ci .needs_async_setup = 1, 9828c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_msghdr), 9838c2ecf20Sopenharmony_ci }, 9848c2ecf20Sopenharmony_ci [IORING_OP_RECVMSG] = { 9858c2ecf20Sopenharmony_ci .needs_file = 1, 9868c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 9878c2ecf20Sopenharmony_ci .pollin = 1, 9888c2ecf20Sopenharmony_ci .buffer_select = 1, 9898c2ecf20Sopenharmony_ci .needs_async_setup = 1, 9908c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_msghdr), 9918c2ecf20Sopenharmony_ci }, 9928c2ecf20Sopenharmony_ci [IORING_OP_TIMEOUT] = { 9938c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_timeout_data), 9948c2ecf20Sopenharmony_ci }, 9958c2ecf20Sopenharmony_ci [IORING_OP_TIMEOUT_REMOVE] = { 9968c2ecf20Sopenharmony_ci /* used by timeout updates' prep() */ 9978c2ecf20Sopenharmony_ci }, 9988c2ecf20Sopenharmony_ci [IORING_OP_ACCEPT] = { 9998c2ecf20Sopenharmony_ci .needs_file = 1, 10008c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10018c2ecf20Sopenharmony_ci .pollin = 1, 10028c2ecf20Sopenharmony_ci }, 10038c2ecf20Sopenharmony_ci [IORING_OP_ASYNC_CANCEL] = {}, 10048c2ecf20Sopenharmony_ci [IORING_OP_LINK_TIMEOUT] = { 10058c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_timeout_data), 10068c2ecf20Sopenharmony_ci }, 10078c2ecf20Sopenharmony_ci [IORING_OP_CONNECT] = { 10088c2ecf20Sopenharmony_ci .needs_file = 1, 10098c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10108c2ecf20Sopenharmony_ci .pollout = 1, 10118c2ecf20Sopenharmony_ci .needs_async_setup = 1, 10128c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_connect), 10138c2ecf20Sopenharmony_ci }, 10148c2ecf20Sopenharmony_ci [IORING_OP_FALLOCATE] = { 10158c2ecf20Sopenharmony_ci .needs_file = 1, 10168c2ecf20Sopenharmony_ci }, 10178c2ecf20Sopenharmony_ci [IORING_OP_OPENAT] = {}, 10188c2ecf20Sopenharmony_ci [IORING_OP_CLOSE] = {}, 10198c2ecf20Sopenharmony_ci [IORING_OP_FILES_UPDATE] = {}, 10208c2ecf20Sopenharmony_ci [IORING_OP_STATX] = {}, 10218c2ecf20Sopenharmony_ci [IORING_OP_READ] = { 10228c2ecf20Sopenharmony_ci .needs_file = 1, 10238c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10248c2ecf20Sopenharmony_ci .pollin = 1, 10258c2ecf20Sopenharmony_ci .buffer_select = 1, 10268c2ecf20Sopenharmony_ci .plug = 1, 10278c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_rw), 10288c2ecf20Sopenharmony_ci }, 10298c2ecf20Sopenharmony_ci [IORING_OP_WRITE] = { 10308c2ecf20Sopenharmony_ci .needs_file = 1, 10318c2ecf20Sopenharmony_ci .hash_reg_file = 1, 10328c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10338c2ecf20Sopenharmony_ci .pollout = 1, 10348c2ecf20Sopenharmony_ci .plug = 1, 10358c2ecf20Sopenharmony_ci .async_size = sizeof(struct io_async_rw), 10368c2ecf20Sopenharmony_ci }, 10378c2ecf20Sopenharmony_ci [IORING_OP_FADVISE] = { 10388c2ecf20Sopenharmony_ci .needs_file = 1, 10398c2ecf20Sopenharmony_ci }, 10408c2ecf20Sopenharmony_ci [IORING_OP_MADVISE] = {}, 10418c2ecf20Sopenharmony_ci [IORING_OP_SEND] = { 10428c2ecf20Sopenharmony_ci .needs_file = 1, 10438c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10448c2ecf20Sopenharmony_ci .pollout = 1, 10458c2ecf20Sopenharmony_ci }, 10468c2ecf20Sopenharmony_ci [IORING_OP_RECV] = { 10478c2ecf20Sopenharmony_ci .needs_file = 1, 10488c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10498c2ecf20Sopenharmony_ci .pollin = 1, 10508c2ecf20Sopenharmony_ci .buffer_select = 1, 10518c2ecf20Sopenharmony_ci }, 10528c2ecf20Sopenharmony_ci [IORING_OP_OPENAT2] = { 10538c2ecf20Sopenharmony_ci }, 10548c2ecf20Sopenharmony_ci [IORING_OP_EPOLL_CTL] = { 10558c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10568c2ecf20Sopenharmony_ci }, 10578c2ecf20Sopenharmony_ci [IORING_OP_SPLICE] = { 10588c2ecf20Sopenharmony_ci .needs_file = 1, 10598c2ecf20Sopenharmony_ci .hash_reg_file = 1, 10608c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10618c2ecf20Sopenharmony_ci }, 10628c2ecf20Sopenharmony_ci [IORING_OP_PROVIDE_BUFFERS] = {}, 10638c2ecf20Sopenharmony_ci [IORING_OP_REMOVE_BUFFERS] = {}, 10648c2ecf20Sopenharmony_ci [IORING_OP_TEE] = { 10658c2ecf20Sopenharmony_ci .needs_file = 1, 10668c2ecf20Sopenharmony_ci .hash_reg_file = 1, 10678c2ecf20Sopenharmony_ci .unbound_nonreg_file = 1, 10688c2ecf20Sopenharmony_ci }, 10698c2ecf20Sopenharmony_ci [IORING_OP_SHUTDOWN] = { 10708c2ecf20Sopenharmony_ci .needs_file = 1, 10718c2ecf20Sopenharmony_ci }, 10728c2ecf20Sopenharmony_ci [IORING_OP_RENAMEAT] = {}, 10738c2ecf20Sopenharmony_ci [IORING_OP_UNLINKAT] = {}, 10748c2ecf20Sopenharmony_ci}; 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci/* requests with any of those set should undergo io_disarm_next() */ 10778c2ecf20Sopenharmony_ci#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_cistatic bool io_disarm_next(struct io_kiocb *req); 10808c2ecf20Sopenharmony_cistatic void io_uring_del_tctx_node(unsigned long index); 10818c2ecf20Sopenharmony_cistatic void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, 10828c2ecf20Sopenharmony_ci struct task_struct *task, 10838c2ecf20Sopenharmony_ci bool cancel_all); 10848c2ecf20Sopenharmony_cistatic void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); 10858c2ecf20Sopenharmony_ci 10868c2ecf20Sopenharmony_cistatic void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags); 10878c2ecf20Sopenharmony_ci 10888c2ecf20Sopenharmony_cistatic void io_put_req(struct io_kiocb *req); 10898c2ecf20Sopenharmony_cistatic void io_put_req_deferred(struct io_kiocb *req); 10908c2ecf20Sopenharmony_cistatic void io_dismantle_req(struct io_kiocb *req); 10918c2ecf20Sopenharmony_cistatic void io_queue_linked_timeout(struct io_kiocb *req); 10928c2ecf20Sopenharmony_cistatic int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, 10938c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 *up, 10948c2ecf20Sopenharmony_ci unsigned nr_args); 10958c2ecf20Sopenharmony_cistatic void io_clean_op(struct io_kiocb *req); 10968c2ecf20Sopenharmony_cistatic struct file *io_file_get(struct io_ring_ctx *ctx, 10978c2ecf20Sopenharmony_ci struct io_kiocb *req, int fd, bool fixed, 10988c2ecf20Sopenharmony_ci unsigned int issue_flags); 10998c2ecf20Sopenharmony_cistatic void __io_queue_sqe(struct io_kiocb *req); 11008c2ecf20Sopenharmony_cistatic void io_rsrc_put_work(struct work_struct *work); 11018c2ecf20Sopenharmony_ci 11028c2ecf20Sopenharmony_cistatic void io_req_task_queue(struct io_kiocb *req); 11038c2ecf20Sopenharmony_cistatic void io_submit_flush_completions(struct io_ring_ctx *ctx); 11048c2ecf20Sopenharmony_cistatic int io_req_prep_async(struct io_kiocb *req); 11058c2ecf20Sopenharmony_ci 11068c2ecf20Sopenharmony_cistatic int io_install_fixed_file(struct io_kiocb *req, struct file *file, 11078c2ecf20Sopenharmony_ci unsigned int issue_flags, u32 slot_index); 11088c2ecf20Sopenharmony_cistatic int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_cistatic enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_cistatic struct kmem_cache *req_cachep; 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_cistatic const struct file_operations io_uring_fops; 11158c2ecf20Sopenharmony_ci 11168c2ecf20Sopenharmony_cistruct sock *io_uring_get_socket(struct file *file) 11178c2ecf20Sopenharmony_ci{ 11188c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 11198c2ecf20Sopenharmony_ci if (file->f_op == &io_uring_fops) { 11208c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = file->private_data; 11218c2ecf20Sopenharmony_ci 11228c2ecf20Sopenharmony_ci return ctx->ring_sock->sk; 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci#endif 11258c2ecf20Sopenharmony_ci return NULL; 11268c2ecf20Sopenharmony_ci} 11278c2ecf20Sopenharmony_ciEXPORT_SYMBOL(io_uring_get_socket); 11288c2ecf20Sopenharmony_ci 11298c2ecf20Sopenharmony_cistatic inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) 11308c2ecf20Sopenharmony_ci{ 11318c2ecf20Sopenharmony_ci if (!*locked) { 11328c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 11338c2ecf20Sopenharmony_ci *locked = true; 11348c2ecf20Sopenharmony_ci } 11358c2ecf20Sopenharmony_ci} 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci#define io_for_each_link(pos, head) \ 11388c2ecf20Sopenharmony_ci for (pos = (head); pos; pos = pos->link) 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_ci/* 11418c2ecf20Sopenharmony_ci * Shamelessly stolen from the mm implementation of page reference checking, 11428c2ecf20Sopenharmony_ci * see commit f958d7b528b1 for details. 11438c2ecf20Sopenharmony_ci */ 11448c2ecf20Sopenharmony_ci#define req_ref_zero_or_close_to_overflow(req) \ 11458c2ecf20Sopenharmony_ci ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u) 11468c2ecf20Sopenharmony_ci 11478c2ecf20Sopenharmony_cistatic inline bool req_ref_inc_not_zero(struct io_kiocb *req) 11488c2ecf20Sopenharmony_ci{ 11498c2ecf20Sopenharmony_ci WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); 11508c2ecf20Sopenharmony_ci return atomic_inc_not_zero(&req->refs); 11518c2ecf20Sopenharmony_ci} 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_cistatic inline bool req_ref_put_and_test(struct io_kiocb *req) 11548c2ecf20Sopenharmony_ci{ 11558c2ecf20Sopenharmony_ci if (likely(!(req->flags & REQ_F_REFCOUNT))) 11568c2ecf20Sopenharmony_ci return true; 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); 11598c2ecf20Sopenharmony_ci return atomic_dec_and_test(&req->refs); 11608c2ecf20Sopenharmony_ci} 11618c2ecf20Sopenharmony_ci 11628c2ecf20Sopenharmony_cistatic inline void req_ref_get(struct io_kiocb *req) 11638c2ecf20Sopenharmony_ci{ 11648c2ecf20Sopenharmony_ci WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); 11658c2ecf20Sopenharmony_ci WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); 11668c2ecf20Sopenharmony_ci atomic_inc(&req->refs); 11678c2ecf20Sopenharmony_ci} 11688c2ecf20Sopenharmony_ci 11698c2ecf20Sopenharmony_cistatic inline void __io_req_set_refcount(struct io_kiocb *req, int nr) 11708c2ecf20Sopenharmony_ci{ 11718c2ecf20Sopenharmony_ci if (!(req->flags & REQ_F_REFCOUNT)) { 11728c2ecf20Sopenharmony_ci req->flags |= REQ_F_REFCOUNT; 11738c2ecf20Sopenharmony_ci atomic_set(&req->refs, nr); 11748c2ecf20Sopenharmony_ci } 11758c2ecf20Sopenharmony_ci} 11768c2ecf20Sopenharmony_ci 11778c2ecf20Sopenharmony_cistatic inline void io_req_set_refcount(struct io_kiocb *req) 11788c2ecf20Sopenharmony_ci{ 11798c2ecf20Sopenharmony_ci __io_req_set_refcount(req, 1); 11808c2ecf20Sopenharmony_ci} 11818c2ecf20Sopenharmony_ci 11828c2ecf20Sopenharmony_cistatic inline void io_req_set_rsrc_node(struct io_kiocb *req) 11838c2ecf20Sopenharmony_ci{ 11848c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 11858c2ecf20Sopenharmony_ci 11868c2ecf20Sopenharmony_ci if (!req->fixed_rsrc_refs) { 11878c2ecf20Sopenharmony_ci req->fixed_rsrc_refs = &ctx->rsrc_node->refs; 11888c2ecf20Sopenharmony_ci percpu_ref_get(req->fixed_rsrc_refs); 11898c2ecf20Sopenharmony_ci } 11908c2ecf20Sopenharmony_ci} 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_cistatic void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) 11938c2ecf20Sopenharmony_ci{ 11948c2ecf20Sopenharmony_ci bool got = percpu_ref_tryget(ref); 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_ci /* already at zero, wait for ->release() */ 11978c2ecf20Sopenharmony_ci if (!got) 11988c2ecf20Sopenharmony_ci wait_for_completion(compl); 11998c2ecf20Sopenharmony_ci percpu_ref_resurrect(ref); 12008c2ecf20Sopenharmony_ci if (got) 12018c2ecf20Sopenharmony_ci percpu_ref_put(ref); 12028c2ecf20Sopenharmony_ci} 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_cistatic bool io_match_task(struct io_kiocb *head, struct task_struct *task, 12058c2ecf20Sopenharmony_ci bool cancel_all) 12068c2ecf20Sopenharmony_ci __must_hold(&req->ctx->timeout_lock) 12078c2ecf20Sopenharmony_ci{ 12088c2ecf20Sopenharmony_ci struct io_kiocb *req; 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci if (task && head->task != task) 12118c2ecf20Sopenharmony_ci return false; 12128c2ecf20Sopenharmony_ci if (cancel_all) 12138c2ecf20Sopenharmony_ci return true; 12148c2ecf20Sopenharmony_ci 12158c2ecf20Sopenharmony_ci io_for_each_link(req, head) { 12168c2ecf20Sopenharmony_ci if (req->flags & REQ_F_INFLIGHT) 12178c2ecf20Sopenharmony_ci return true; 12188c2ecf20Sopenharmony_ci } 12198c2ecf20Sopenharmony_ci return false; 12208c2ecf20Sopenharmony_ci} 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_cistatic bool io_match_linked(struct io_kiocb *head) 12238c2ecf20Sopenharmony_ci{ 12248c2ecf20Sopenharmony_ci struct io_kiocb *req; 12258c2ecf20Sopenharmony_ci 12268c2ecf20Sopenharmony_ci io_for_each_link(req, head) { 12278c2ecf20Sopenharmony_ci if (req->flags & REQ_F_INFLIGHT) 12288c2ecf20Sopenharmony_ci return true; 12298c2ecf20Sopenharmony_ci } 12308c2ecf20Sopenharmony_ci return false; 12318c2ecf20Sopenharmony_ci} 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ci/* 12348c2ecf20Sopenharmony_ci * As io_match_task() but protected against racing with linked timeouts. 12358c2ecf20Sopenharmony_ci * User must not hold timeout_lock. 12368c2ecf20Sopenharmony_ci */ 12378c2ecf20Sopenharmony_cistatic bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, 12388c2ecf20Sopenharmony_ci bool cancel_all) 12398c2ecf20Sopenharmony_ci{ 12408c2ecf20Sopenharmony_ci bool matched; 12418c2ecf20Sopenharmony_ci 12428c2ecf20Sopenharmony_ci if (task && head->task != task) 12438c2ecf20Sopenharmony_ci return false; 12448c2ecf20Sopenharmony_ci if (cancel_all) 12458c2ecf20Sopenharmony_ci return true; 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci if (head->flags & REQ_F_LINK_TIMEOUT) { 12488c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = head->ctx; 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci /* protect against races with linked timeouts */ 12518c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 12528c2ecf20Sopenharmony_ci matched = io_match_linked(head); 12538c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 12548c2ecf20Sopenharmony_ci } else { 12558c2ecf20Sopenharmony_ci matched = io_match_linked(head); 12568c2ecf20Sopenharmony_ci } 12578c2ecf20Sopenharmony_ci return matched; 12588c2ecf20Sopenharmony_ci} 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_cistatic inline void req_set_fail(struct io_kiocb *req) 12618c2ecf20Sopenharmony_ci{ 12628c2ecf20Sopenharmony_ci req->flags |= REQ_F_FAIL; 12638c2ecf20Sopenharmony_ci} 12648c2ecf20Sopenharmony_ci 12658c2ecf20Sopenharmony_cistatic inline void req_fail_link_node(struct io_kiocb *req, int res) 12668c2ecf20Sopenharmony_ci{ 12678c2ecf20Sopenharmony_ci req_set_fail(req); 12688c2ecf20Sopenharmony_ci req->result = res; 12698c2ecf20Sopenharmony_ci} 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_cistatic void io_ring_ctx_ref_free(struct percpu_ref *ref) 12728c2ecf20Sopenharmony_ci{ 12738c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); 12748c2ecf20Sopenharmony_ci 12758c2ecf20Sopenharmony_ci complete(&ctx->ref_comp); 12768c2ecf20Sopenharmony_ci} 12778c2ecf20Sopenharmony_ci 12788c2ecf20Sopenharmony_cistatic inline bool io_is_timeout_noseq(struct io_kiocb *req) 12798c2ecf20Sopenharmony_ci{ 12808c2ecf20Sopenharmony_ci return !req->timeout.off; 12818c2ecf20Sopenharmony_ci} 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_cistatic void io_fallback_req_func(struct work_struct *work) 12848c2ecf20Sopenharmony_ci{ 12858c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, 12868c2ecf20Sopenharmony_ci fallback_work.work); 12878c2ecf20Sopenharmony_ci struct llist_node *node = llist_del_all(&ctx->fallback_llist); 12888c2ecf20Sopenharmony_ci struct io_kiocb *req, *tmp; 12898c2ecf20Sopenharmony_ci bool locked = false; 12908c2ecf20Sopenharmony_ci 12918c2ecf20Sopenharmony_ci percpu_ref_get(&ctx->refs); 12928c2ecf20Sopenharmony_ci llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) 12938c2ecf20Sopenharmony_ci req->io_task_work.func(req, &locked); 12948c2ecf20Sopenharmony_ci 12958c2ecf20Sopenharmony_ci if (locked) { 12968c2ecf20Sopenharmony_ci if (ctx->submit_state.compl_nr) 12978c2ecf20Sopenharmony_ci io_submit_flush_completions(ctx); 12988c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 12998c2ecf20Sopenharmony_ci } 13008c2ecf20Sopenharmony_ci percpu_ref_put(&ctx->refs); 13018c2ecf20Sopenharmony_ci 13028c2ecf20Sopenharmony_ci} 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_cistatic struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) 13058c2ecf20Sopenharmony_ci{ 13068c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 13078c2ecf20Sopenharmony_ci int hash_bits; 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 13108c2ecf20Sopenharmony_ci if (!ctx) 13118c2ecf20Sopenharmony_ci return NULL; 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci /* 13148c2ecf20Sopenharmony_ci * Use 5 bits less than the max cq entries, that should give us around 13158c2ecf20Sopenharmony_ci * 32 entries per hash list if totally full and uniformly spread. 13168c2ecf20Sopenharmony_ci */ 13178c2ecf20Sopenharmony_ci hash_bits = ilog2(p->cq_entries); 13188c2ecf20Sopenharmony_ci hash_bits -= 5; 13198c2ecf20Sopenharmony_ci if (hash_bits <= 0) 13208c2ecf20Sopenharmony_ci hash_bits = 1; 13218c2ecf20Sopenharmony_ci ctx->cancel_hash_bits = hash_bits; 13228c2ecf20Sopenharmony_ci ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), 13238c2ecf20Sopenharmony_ci GFP_KERNEL); 13248c2ecf20Sopenharmony_ci if (!ctx->cancel_hash) 13258c2ecf20Sopenharmony_ci goto err; 13268c2ecf20Sopenharmony_ci __hash_init(ctx->cancel_hash, 1U << hash_bits); 13278c2ecf20Sopenharmony_ci 13288c2ecf20Sopenharmony_ci ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); 13298c2ecf20Sopenharmony_ci if (!ctx->dummy_ubuf) 13308c2ecf20Sopenharmony_ci goto err; 13318c2ecf20Sopenharmony_ci /* set invalid range, so io_import_fixed() fails meeting it */ 13328c2ecf20Sopenharmony_ci ctx->dummy_ubuf->ubuf = -1UL; 13338c2ecf20Sopenharmony_ci 13348c2ecf20Sopenharmony_ci if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, 13358c2ecf20Sopenharmony_ci PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) 13368c2ecf20Sopenharmony_ci goto err; 13378c2ecf20Sopenharmony_ci 13388c2ecf20Sopenharmony_ci ctx->flags = p->flags; 13398c2ecf20Sopenharmony_ci init_waitqueue_head(&ctx->sqo_sq_wait); 13408c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->sqd_list); 13418c2ecf20Sopenharmony_ci init_waitqueue_head(&ctx->poll_wait); 13428c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->cq_overflow_list); 13438c2ecf20Sopenharmony_ci init_completion(&ctx->ref_comp); 13448c2ecf20Sopenharmony_ci xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1); 13458c2ecf20Sopenharmony_ci xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); 13468c2ecf20Sopenharmony_ci mutex_init(&ctx->uring_lock); 13478c2ecf20Sopenharmony_ci init_waitqueue_head(&ctx->cq_wait); 13488c2ecf20Sopenharmony_ci spin_lock_init(&ctx->completion_lock); 13498c2ecf20Sopenharmony_ci spin_lock_init(&ctx->timeout_lock); 13508c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->iopoll_list); 13518c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->defer_list); 13528c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->timeout_list); 13538c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->ltimeout_list); 13548c2ecf20Sopenharmony_ci spin_lock_init(&ctx->rsrc_ref_lock); 13558c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->rsrc_ref_list); 13568c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); 13578c2ecf20Sopenharmony_ci init_llist_head(&ctx->rsrc_put_llist); 13588c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->tctx_list); 13598c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->submit_state.free_list); 13608c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ctx->locked_free_list); 13618c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); 13628c2ecf20Sopenharmony_ci return ctx; 13638c2ecf20Sopenharmony_cierr: 13648c2ecf20Sopenharmony_ci kfree(ctx->dummy_ubuf); 13658c2ecf20Sopenharmony_ci kfree(ctx->cancel_hash); 13668c2ecf20Sopenharmony_ci kfree(ctx); 13678c2ecf20Sopenharmony_ci return NULL; 13688c2ecf20Sopenharmony_ci} 13698c2ecf20Sopenharmony_ci 13708c2ecf20Sopenharmony_cistatic void io_account_cq_overflow(struct io_ring_ctx *ctx) 13718c2ecf20Sopenharmony_ci{ 13728c2ecf20Sopenharmony_ci struct io_rings *r = ctx->rings; 13738c2ecf20Sopenharmony_ci 13748c2ecf20Sopenharmony_ci WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); 13758c2ecf20Sopenharmony_ci ctx->cq_extra--; 13768c2ecf20Sopenharmony_ci} 13778c2ecf20Sopenharmony_ci 13788c2ecf20Sopenharmony_cistatic bool req_need_defer(struct io_kiocb *req, u32 seq) 13798c2ecf20Sopenharmony_ci{ 13808c2ecf20Sopenharmony_ci if (unlikely(req->flags & REQ_F_IO_DRAIN)) { 13818c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; 13848c2ecf20Sopenharmony_ci } 13858c2ecf20Sopenharmony_ci 13868c2ecf20Sopenharmony_ci return false; 13878c2ecf20Sopenharmony_ci} 13888c2ecf20Sopenharmony_ci 13898c2ecf20Sopenharmony_ci#define FFS_ASYNC_READ 0x1UL 13908c2ecf20Sopenharmony_ci#define FFS_ASYNC_WRITE 0x2UL 13918c2ecf20Sopenharmony_ci#ifdef CONFIG_64BIT 13928c2ecf20Sopenharmony_ci#define FFS_ISREG 0x4UL 13938c2ecf20Sopenharmony_ci#else 13948c2ecf20Sopenharmony_ci#define FFS_ISREG 0x0UL 13958c2ecf20Sopenharmony_ci#endif 13968c2ecf20Sopenharmony_ci#define FFS_MASK ~(FFS_ASYNC_READ|FFS_ASYNC_WRITE|FFS_ISREG) 13978c2ecf20Sopenharmony_ci 13988c2ecf20Sopenharmony_cistatic inline bool io_req_ffs_set(struct io_kiocb *req) 13998c2ecf20Sopenharmony_ci{ 14008c2ecf20Sopenharmony_ci return IS_ENABLED(CONFIG_64BIT) && (req->flags & REQ_F_FIXED_FILE); 14018c2ecf20Sopenharmony_ci} 14028c2ecf20Sopenharmony_ci 14038c2ecf20Sopenharmony_cistatic void io_req_track_inflight(struct io_kiocb *req) 14048c2ecf20Sopenharmony_ci{ 14058c2ecf20Sopenharmony_ci if (!(req->flags & REQ_F_INFLIGHT)) { 14068c2ecf20Sopenharmony_ci req->flags |= REQ_F_INFLIGHT; 14078c2ecf20Sopenharmony_ci atomic_inc(&req->task->io_uring->inflight_tracked); 14088c2ecf20Sopenharmony_ci } 14098c2ecf20Sopenharmony_ci} 14108c2ecf20Sopenharmony_ci 14118c2ecf20Sopenharmony_cistatic struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) 14128c2ecf20Sopenharmony_ci{ 14138c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!req->link)) 14148c2ecf20Sopenharmony_ci return NULL; 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_ARM_LTIMEOUT; 14178c2ecf20Sopenharmony_ci req->flags |= REQ_F_LINK_TIMEOUT; 14188c2ecf20Sopenharmony_ci 14198c2ecf20Sopenharmony_ci /* linked timeouts should have two refs once prep'ed */ 14208c2ecf20Sopenharmony_ci io_req_set_refcount(req); 14218c2ecf20Sopenharmony_ci __io_req_set_refcount(req->link, 2); 14228c2ecf20Sopenharmony_ci return req->link; 14238c2ecf20Sopenharmony_ci} 14248c2ecf20Sopenharmony_ci 14258c2ecf20Sopenharmony_cistatic inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) 14268c2ecf20Sopenharmony_ci{ 14278c2ecf20Sopenharmony_ci if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) 14288c2ecf20Sopenharmony_ci return NULL; 14298c2ecf20Sopenharmony_ci return __io_prep_linked_timeout(req); 14308c2ecf20Sopenharmony_ci} 14318c2ecf20Sopenharmony_ci 14328c2ecf20Sopenharmony_cistatic void io_prep_async_work(struct io_kiocb *req) 14338c2ecf20Sopenharmony_ci{ 14348c2ecf20Sopenharmony_ci const struct io_op_def *def = &io_op_defs[req->opcode]; 14358c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 14368c2ecf20Sopenharmony_ci 14378c2ecf20Sopenharmony_ci if (!(req->flags & REQ_F_CREDS)) { 14388c2ecf20Sopenharmony_ci req->flags |= REQ_F_CREDS; 14398c2ecf20Sopenharmony_ci req->creds = get_current_cred(); 14408c2ecf20Sopenharmony_ci } 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci req->work.list.next = NULL; 14438c2ecf20Sopenharmony_ci req->work.flags = 0; 14448c2ecf20Sopenharmony_ci if (req->flags & REQ_F_FORCE_ASYNC) 14458c2ecf20Sopenharmony_ci req->work.flags |= IO_WQ_WORK_CONCURRENT; 14468c2ecf20Sopenharmony_ci 14478c2ecf20Sopenharmony_ci if (req->flags & REQ_F_ISREG) { 14488c2ecf20Sopenharmony_ci if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) 14498c2ecf20Sopenharmony_ci io_wq_hash_work(&req->work, file_inode(req->file)); 14508c2ecf20Sopenharmony_ci } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { 14518c2ecf20Sopenharmony_ci if (def->unbound_nonreg_file) 14528c2ecf20Sopenharmony_ci req->work.flags |= IO_WQ_WORK_UNBOUND; 14538c2ecf20Sopenharmony_ci } 14548c2ecf20Sopenharmony_ci} 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_cistatic void io_prep_async_link(struct io_kiocb *req) 14578c2ecf20Sopenharmony_ci{ 14588c2ecf20Sopenharmony_ci struct io_kiocb *cur; 14598c2ecf20Sopenharmony_ci 14608c2ecf20Sopenharmony_ci if (req->flags & REQ_F_LINK_TIMEOUT) { 14618c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 14648c2ecf20Sopenharmony_ci io_for_each_link(cur, req) 14658c2ecf20Sopenharmony_ci io_prep_async_work(cur); 14668c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 14678c2ecf20Sopenharmony_ci } else { 14688c2ecf20Sopenharmony_ci io_for_each_link(cur, req) 14698c2ecf20Sopenharmony_ci io_prep_async_work(cur); 14708c2ecf20Sopenharmony_ci } 14718c2ecf20Sopenharmony_ci} 14728c2ecf20Sopenharmony_ci 14738c2ecf20Sopenharmony_cistatic void io_queue_async_work(struct io_kiocb *req, bool *locked) 14748c2ecf20Sopenharmony_ci{ 14758c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 14768c2ecf20Sopenharmony_ci struct io_kiocb *link = io_prep_linked_timeout(req); 14778c2ecf20Sopenharmony_ci struct io_uring_task *tctx = req->task->io_uring; 14788c2ecf20Sopenharmony_ci 14798c2ecf20Sopenharmony_ci /* must not take the lock, NULL it as a precaution */ 14808c2ecf20Sopenharmony_ci locked = NULL; 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci BUG_ON(!tctx); 14838c2ecf20Sopenharmony_ci BUG_ON(!tctx->io_wq); 14848c2ecf20Sopenharmony_ci 14858c2ecf20Sopenharmony_ci /* init ->work of the whole link before punting */ 14868c2ecf20Sopenharmony_ci io_prep_async_link(req); 14878c2ecf20Sopenharmony_ci 14888c2ecf20Sopenharmony_ci /* 14898c2ecf20Sopenharmony_ci * Not expected to happen, but if we do have a bug where this _can_ 14908c2ecf20Sopenharmony_ci * happen, catch it here and ensure the request is marked as 14918c2ecf20Sopenharmony_ci * canceled. That will make io-wq go through the usual work cancel 14928c2ecf20Sopenharmony_ci * procedure rather than attempt to run this request (or create a new 14938c2ecf20Sopenharmony_ci * worker for it). 14948c2ecf20Sopenharmony_ci */ 14958c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!same_thread_group(req->task, current))) 14968c2ecf20Sopenharmony_ci req->work.flags |= IO_WQ_WORK_CANCEL; 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, 14998c2ecf20Sopenharmony_ci &req->work, req->flags); 15008c2ecf20Sopenharmony_ci io_wq_enqueue(tctx->io_wq, &req->work); 15018c2ecf20Sopenharmony_ci if (link) 15028c2ecf20Sopenharmony_ci io_queue_linked_timeout(link); 15038c2ecf20Sopenharmony_ci} 15048c2ecf20Sopenharmony_ci 15058c2ecf20Sopenharmony_cistatic void io_kill_timeout(struct io_kiocb *req, int status) 15068c2ecf20Sopenharmony_ci __must_hold(&req->ctx->completion_lock) 15078c2ecf20Sopenharmony_ci __must_hold(&req->ctx->timeout_lock) 15088c2ecf20Sopenharmony_ci{ 15098c2ecf20Sopenharmony_ci struct io_timeout_data *io = req->async_data; 15108c2ecf20Sopenharmony_ci 15118c2ecf20Sopenharmony_ci if (hrtimer_try_to_cancel(&io->timer) != -1) { 15128c2ecf20Sopenharmony_ci if (status) 15138c2ecf20Sopenharmony_ci req_set_fail(req); 15148c2ecf20Sopenharmony_ci atomic_set(&req->ctx->cq_timeouts, 15158c2ecf20Sopenharmony_ci atomic_read(&req->ctx->cq_timeouts) + 1); 15168c2ecf20Sopenharmony_ci list_del_init(&req->timeout.list); 15178c2ecf20Sopenharmony_ci io_fill_cqe_req(req, status, 0); 15188c2ecf20Sopenharmony_ci io_put_req_deferred(req); 15198c2ecf20Sopenharmony_ci } 15208c2ecf20Sopenharmony_ci} 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_cistatic void io_queue_deferred(struct io_ring_ctx *ctx) 15238c2ecf20Sopenharmony_ci{ 15248c2ecf20Sopenharmony_ci lockdep_assert_held(&ctx->completion_lock); 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci while (!list_empty(&ctx->defer_list)) { 15278c2ecf20Sopenharmony_ci struct io_defer_entry *de = list_first_entry(&ctx->defer_list, 15288c2ecf20Sopenharmony_ci struct io_defer_entry, list); 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci if (req_need_defer(de->req, de->seq)) 15318c2ecf20Sopenharmony_ci break; 15328c2ecf20Sopenharmony_ci list_del_init(&de->list); 15338c2ecf20Sopenharmony_ci io_req_task_queue(de->req); 15348c2ecf20Sopenharmony_ci kfree(de); 15358c2ecf20Sopenharmony_ci } 15368c2ecf20Sopenharmony_ci} 15378c2ecf20Sopenharmony_ci 15388c2ecf20Sopenharmony_cistatic void io_flush_timeouts(struct io_ring_ctx *ctx) 15398c2ecf20Sopenharmony_ci __must_hold(&ctx->completion_lock) 15408c2ecf20Sopenharmony_ci{ 15418c2ecf20Sopenharmony_ci u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); 15428c2ecf20Sopenharmony_ci struct io_kiocb *req, *tmp; 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 15458c2ecf20Sopenharmony_ci list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { 15468c2ecf20Sopenharmony_ci u32 events_needed, events_got; 15478c2ecf20Sopenharmony_ci 15488c2ecf20Sopenharmony_ci if (io_is_timeout_noseq(req)) 15498c2ecf20Sopenharmony_ci break; 15508c2ecf20Sopenharmony_ci 15518c2ecf20Sopenharmony_ci /* 15528c2ecf20Sopenharmony_ci * Since seq can easily wrap around over time, subtract 15538c2ecf20Sopenharmony_ci * the last seq at which timeouts were flushed before comparing. 15548c2ecf20Sopenharmony_ci * Assuming not more than 2^31-1 events have happened since, 15558c2ecf20Sopenharmony_ci * these subtractions won't have wrapped, so we can check if 15568c2ecf20Sopenharmony_ci * target is in [last_seq, current_seq] by comparing the two. 15578c2ecf20Sopenharmony_ci */ 15588c2ecf20Sopenharmony_ci events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; 15598c2ecf20Sopenharmony_ci events_got = seq - ctx->cq_last_tm_flush; 15608c2ecf20Sopenharmony_ci if (events_got < events_needed) 15618c2ecf20Sopenharmony_ci break; 15628c2ecf20Sopenharmony_ci 15638c2ecf20Sopenharmony_ci io_kill_timeout(req, 0); 15648c2ecf20Sopenharmony_ci } 15658c2ecf20Sopenharmony_ci ctx->cq_last_tm_flush = seq; 15668c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 15678c2ecf20Sopenharmony_ci} 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_cistatic void __io_commit_cqring_flush(struct io_ring_ctx *ctx) 15708c2ecf20Sopenharmony_ci{ 15718c2ecf20Sopenharmony_ci if (ctx->off_timeout_used) 15728c2ecf20Sopenharmony_ci io_flush_timeouts(ctx); 15738c2ecf20Sopenharmony_ci if (ctx->drain_active) 15748c2ecf20Sopenharmony_ci io_queue_deferred(ctx); 15758c2ecf20Sopenharmony_ci} 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_cistatic inline bool io_commit_needs_flush(struct io_ring_ctx *ctx) 15788c2ecf20Sopenharmony_ci{ 15798c2ecf20Sopenharmony_ci return ctx->off_timeout_used || ctx->drain_active; 15808c2ecf20Sopenharmony_ci} 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_cistatic inline void __io_commit_cqring(struct io_ring_ctx *ctx) 15838c2ecf20Sopenharmony_ci{ 15848c2ecf20Sopenharmony_ci /* order cqe stores with ring update */ 15858c2ecf20Sopenharmony_ci smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); 15868c2ecf20Sopenharmony_ci} 15878c2ecf20Sopenharmony_ci 15888c2ecf20Sopenharmony_cistatic inline void io_commit_cqring(struct io_ring_ctx *ctx) 15898c2ecf20Sopenharmony_ci{ 15908c2ecf20Sopenharmony_ci if (unlikely(io_commit_needs_flush(ctx))) 15918c2ecf20Sopenharmony_ci __io_commit_cqring_flush(ctx); 15928c2ecf20Sopenharmony_ci __io_commit_cqring(ctx); 15938c2ecf20Sopenharmony_ci} 15948c2ecf20Sopenharmony_ci 15958c2ecf20Sopenharmony_cistatic inline bool io_sqring_full(struct io_ring_ctx *ctx) 15968c2ecf20Sopenharmony_ci{ 15978c2ecf20Sopenharmony_ci struct io_rings *r = ctx->rings; 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_ci return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries; 16008c2ecf20Sopenharmony_ci} 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_cistatic inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) 16038c2ecf20Sopenharmony_ci{ 16048c2ecf20Sopenharmony_ci return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); 16058c2ecf20Sopenharmony_ci} 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_cistatic inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) 16088c2ecf20Sopenharmony_ci{ 16098c2ecf20Sopenharmony_ci struct io_rings *rings = ctx->rings; 16108c2ecf20Sopenharmony_ci unsigned tail, mask = ctx->cq_entries - 1; 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci /* 16138c2ecf20Sopenharmony_ci * writes to the cq entry need to come after reading head; the 16148c2ecf20Sopenharmony_ci * control dependency is enough as we're using WRITE_ONCE to 16158c2ecf20Sopenharmony_ci * fill the cq entry 16168c2ecf20Sopenharmony_ci */ 16178c2ecf20Sopenharmony_ci if (__io_cqring_events(ctx) == ctx->cq_entries) 16188c2ecf20Sopenharmony_ci return NULL; 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci tail = ctx->cached_cq_tail++; 16218c2ecf20Sopenharmony_ci return &rings->cqes[tail & mask]; 16228c2ecf20Sopenharmony_ci} 16238c2ecf20Sopenharmony_ci 16248c2ecf20Sopenharmony_cistatic inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) 16258c2ecf20Sopenharmony_ci{ 16268c2ecf20Sopenharmony_ci if (likely(!ctx->cq_ev_fd)) 16278c2ecf20Sopenharmony_ci return false; 16288c2ecf20Sopenharmony_ci if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) 16298c2ecf20Sopenharmony_ci return false; 16308c2ecf20Sopenharmony_ci return !ctx->eventfd_async || io_wq_current_is_worker(); 16318c2ecf20Sopenharmony_ci} 16328c2ecf20Sopenharmony_ci 16338c2ecf20Sopenharmony_ci/* 16348c2ecf20Sopenharmony_ci * This should only get called when at least one event has been posted. 16358c2ecf20Sopenharmony_ci * Some applications rely on the eventfd notification count only changing 16368c2ecf20Sopenharmony_ci * IFF a new CQE has been added to the CQ ring. There's no depedency on 16378c2ecf20Sopenharmony_ci * 1:1 relationship between how many times this function is called (and 16388c2ecf20Sopenharmony_ci * hence the eventfd count) and number of CQEs posted to the CQ ring. 16398c2ecf20Sopenharmony_ci */ 16408c2ecf20Sopenharmony_cistatic void io_cqring_ev_posted(struct io_ring_ctx *ctx) 16418c2ecf20Sopenharmony_ci{ 16428c2ecf20Sopenharmony_ci /* 16438c2ecf20Sopenharmony_ci * wake_up_all() may seem excessive, but io_wake_function() and 16448c2ecf20Sopenharmony_ci * io_should_wake() handle the termination of the loop and only 16458c2ecf20Sopenharmony_ci * wake as many waiters as we need to. 16468c2ecf20Sopenharmony_ci */ 16478c2ecf20Sopenharmony_ci if (wq_has_sleeper(&ctx->cq_wait)) 16488c2ecf20Sopenharmony_ci __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, 16498c2ecf20Sopenharmony_ci poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); 16508c2ecf20Sopenharmony_ci if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait)) 16518c2ecf20Sopenharmony_ci wake_up(&ctx->sq_data->wait); 16528c2ecf20Sopenharmony_ci if (io_should_trigger_evfd(ctx)) 16538c2ecf20Sopenharmony_ci eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE); 16548c2ecf20Sopenharmony_ci if (waitqueue_active(&ctx->poll_wait)) 16558c2ecf20Sopenharmony_ci __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0, 16568c2ecf20Sopenharmony_ci poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); 16578c2ecf20Sopenharmony_ci} 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_cistatic void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) 16608c2ecf20Sopenharmony_ci{ 16618c2ecf20Sopenharmony_ci /* see waitqueue_active() comment */ 16628c2ecf20Sopenharmony_ci smp_mb(); 16638c2ecf20Sopenharmony_ci 16648c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_SQPOLL) { 16658c2ecf20Sopenharmony_ci if (waitqueue_active(&ctx->cq_wait)) 16668c2ecf20Sopenharmony_ci __wake_up(&ctx->cq_wait, TASK_NORMAL, 0, 16678c2ecf20Sopenharmony_ci poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); 16688c2ecf20Sopenharmony_ci } 16698c2ecf20Sopenharmony_ci if (io_should_trigger_evfd(ctx)) 16708c2ecf20Sopenharmony_ci eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE); 16718c2ecf20Sopenharmony_ci if (waitqueue_active(&ctx->poll_wait)) 16728c2ecf20Sopenharmony_ci __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0, 16738c2ecf20Sopenharmony_ci poll_to_key(EPOLL_URING_WAKE | EPOLLIN)); 16748c2ecf20Sopenharmony_ci} 16758c2ecf20Sopenharmony_ci 16768c2ecf20Sopenharmony_ci/* Returns true if there are no backlogged entries after the flush */ 16778c2ecf20Sopenharmony_cistatic bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) 16788c2ecf20Sopenharmony_ci{ 16798c2ecf20Sopenharmony_ci bool all_flushed, posted; 16808c2ecf20Sopenharmony_ci 16818c2ecf20Sopenharmony_ci if (!force && __io_cqring_events(ctx) == ctx->cq_entries) 16828c2ecf20Sopenharmony_ci return false; 16838c2ecf20Sopenharmony_ci 16848c2ecf20Sopenharmony_ci posted = false; 16858c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 16868c2ecf20Sopenharmony_ci while (!list_empty(&ctx->cq_overflow_list)) { 16878c2ecf20Sopenharmony_ci struct io_uring_cqe *cqe = io_get_cqe(ctx); 16888c2ecf20Sopenharmony_ci struct io_overflow_cqe *ocqe; 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_ci if (!cqe && !force) 16918c2ecf20Sopenharmony_ci break; 16928c2ecf20Sopenharmony_ci ocqe = list_first_entry(&ctx->cq_overflow_list, 16938c2ecf20Sopenharmony_ci struct io_overflow_cqe, list); 16948c2ecf20Sopenharmony_ci if (cqe) 16958c2ecf20Sopenharmony_ci memcpy(cqe, &ocqe->cqe, sizeof(*cqe)); 16968c2ecf20Sopenharmony_ci else 16978c2ecf20Sopenharmony_ci io_account_cq_overflow(ctx); 16988c2ecf20Sopenharmony_ci 16998c2ecf20Sopenharmony_ci posted = true; 17008c2ecf20Sopenharmony_ci list_del(&ocqe->list); 17018c2ecf20Sopenharmony_ci kfree(ocqe); 17028c2ecf20Sopenharmony_ci } 17038c2ecf20Sopenharmony_ci 17048c2ecf20Sopenharmony_ci all_flushed = list_empty(&ctx->cq_overflow_list); 17058c2ecf20Sopenharmony_ci if (all_flushed) { 17068c2ecf20Sopenharmony_ci clear_bit(0, &ctx->check_cq_overflow); 17078c2ecf20Sopenharmony_ci WRITE_ONCE(ctx->rings->sq_flags, 17088c2ecf20Sopenharmony_ci ctx->rings->sq_flags & ~IORING_SQ_CQ_OVERFLOW); 17098c2ecf20Sopenharmony_ci } 17108c2ecf20Sopenharmony_ci 17118c2ecf20Sopenharmony_ci if (posted) 17128c2ecf20Sopenharmony_ci io_commit_cqring(ctx); 17138c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 17148c2ecf20Sopenharmony_ci if (posted) 17158c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 17168c2ecf20Sopenharmony_ci return all_flushed; 17178c2ecf20Sopenharmony_ci} 17188c2ecf20Sopenharmony_ci 17198c2ecf20Sopenharmony_cistatic bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) 17208c2ecf20Sopenharmony_ci{ 17218c2ecf20Sopenharmony_ci bool ret = true; 17228c2ecf20Sopenharmony_ci 17238c2ecf20Sopenharmony_ci if (test_bit(0, &ctx->check_cq_overflow)) { 17248c2ecf20Sopenharmony_ci /* iopoll syncs against uring_lock, not completion_lock */ 17258c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_IOPOLL) 17268c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 17278c2ecf20Sopenharmony_ci ret = __io_cqring_overflow_flush(ctx, false); 17288c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_IOPOLL) 17298c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 17308c2ecf20Sopenharmony_ci } 17318c2ecf20Sopenharmony_ci 17328c2ecf20Sopenharmony_ci return ret; 17338c2ecf20Sopenharmony_ci} 17348c2ecf20Sopenharmony_ci 17358c2ecf20Sopenharmony_ci/* must to be called somewhat shortly after putting a request */ 17368c2ecf20Sopenharmony_cistatic inline void io_put_task(struct task_struct *task, int nr) 17378c2ecf20Sopenharmony_ci{ 17388c2ecf20Sopenharmony_ci struct io_uring_task *tctx = task->io_uring; 17398c2ecf20Sopenharmony_ci 17408c2ecf20Sopenharmony_ci if (likely(task == current)) { 17418c2ecf20Sopenharmony_ci tctx->cached_refs += nr; 17428c2ecf20Sopenharmony_ci } else { 17438c2ecf20Sopenharmony_ci percpu_counter_sub(&tctx->inflight, nr); 17448c2ecf20Sopenharmony_ci if (unlikely(atomic_read(&tctx->in_idle))) 17458c2ecf20Sopenharmony_ci wake_up(&tctx->wait); 17468c2ecf20Sopenharmony_ci put_task_struct_many(task, nr); 17478c2ecf20Sopenharmony_ci } 17488c2ecf20Sopenharmony_ci} 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_cistatic void io_task_refs_refill(struct io_uring_task *tctx) 17518c2ecf20Sopenharmony_ci{ 17528c2ecf20Sopenharmony_ci unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; 17538c2ecf20Sopenharmony_ci 17548c2ecf20Sopenharmony_ci percpu_counter_add(&tctx->inflight, refill); 17558c2ecf20Sopenharmony_ci refcount_add(refill, ¤t->usage); 17568c2ecf20Sopenharmony_ci tctx->cached_refs += refill; 17578c2ecf20Sopenharmony_ci} 17588c2ecf20Sopenharmony_ci 17598c2ecf20Sopenharmony_cistatic inline void io_get_task_refs(int nr) 17608c2ecf20Sopenharmony_ci{ 17618c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 17628c2ecf20Sopenharmony_ci 17638c2ecf20Sopenharmony_ci tctx->cached_refs -= nr; 17648c2ecf20Sopenharmony_ci if (unlikely(tctx->cached_refs < 0)) 17658c2ecf20Sopenharmony_ci io_task_refs_refill(tctx); 17668c2ecf20Sopenharmony_ci} 17678c2ecf20Sopenharmony_ci 17688c2ecf20Sopenharmony_cistatic __cold void io_uring_drop_tctx_refs(struct task_struct *task) 17698c2ecf20Sopenharmony_ci{ 17708c2ecf20Sopenharmony_ci struct io_uring_task *tctx = task->io_uring; 17718c2ecf20Sopenharmony_ci unsigned int refs = tctx->cached_refs; 17728c2ecf20Sopenharmony_ci 17738c2ecf20Sopenharmony_ci if (refs) { 17748c2ecf20Sopenharmony_ci tctx->cached_refs = 0; 17758c2ecf20Sopenharmony_ci percpu_counter_sub(&tctx->inflight, refs); 17768c2ecf20Sopenharmony_ci put_task_struct_many(task, refs); 17778c2ecf20Sopenharmony_ci } 17788c2ecf20Sopenharmony_ci} 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_cistatic bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, 17818c2ecf20Sopenharmony_ci s32 res, u32 cflags) 17828c2ecf20Sopenharmony_ci{ 17838c2ecf20Sopenharmony_ci struct io_overflow_cqe *ocqe; 17848c2ecf20Sopenharmony_ci 17858c2ecf20Sopenharmony_ci ocqe = kmalloc(sizeof(*ocqe), GFP_ATOMIC | __GFP_ACCOUNT); 17868c2ecf20Sopenharmony_ci if (!ocqe) { 17878c2ecf20Sopenharmony_ci /* 17888c2ecf20Sopenharmony_ci * If we're in ring overflow flush mode, or in task cancel mode, 17898c2ecf20Sopenharmony_ci * or cannot allocate an overflow entry, then we need to drop it 17908c2ecf20Sopenharmony_ci * on the floor. 17918c2ecf20Sopenharmony_ci */ 17928c2ecf20Sopenharmony_ci io_account_cq_overflow(ctx); 17938c2ecf20Sopenharmony_ci return false; 17948c2ecf20Sopenharmony_ci } 17958c2ecf20Sopenharmony_ci if (list_empty(&ctx->cq_overflow_list)) { 17968c2ecf20Sopenharmony_ci set_bit(0, &ctx->check_cq_overflow); 17978c2ecf20Sopenharmony_ci WRITE_ONCE(ctx->rings->sq_flags, 17988c2ecf20Sopenharmony_ci ctx->rings->sq_flags | IORING_SQ_CQ_OVERFLOW); 17998c2ecf20Sopenharmony_ci 18008c2ecf20Sopenharmony_ci } 18018c2ecf20Sopenharmony_ci ocqe->cqe.user_data = user_data; 18028c2ecf20Sopenharmony_ci ocqe->cqe.res = res; 18038c2ecf20Sopenharmony_ci ocqe->cqe.flags = cflags; 18048c2ecf20Sopenharmony_ci list_add_tail(&ocqe->list, &ctx->cq_overflow_list); 18058c2ecf20Sopenharmony_ci return true; 18068c2ecf20Sopenharmony_ci} 18078c2ecf20Sopenharmony_ci 18088c2ecf20Sopenharmony_cistatic inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data, 18098c2ecf20Sopenharmony_ci s32 res, u32 cflags) 18108c2ecf20Sopenharmony_ci{ 18118c2ecf20Sopenharmony_ci struct io_uring_cqe *cqe; 18128c2ecf20Sopenharmony_ci 18138c2ecf20Sopenharmony_ci trace_io_uring_complete(ctx, user_data, res, cflags); 18148c2ecf20Sopenharmony_ci 18158c2ecf20Sopenharmony_ci /* 18168c2ecf20Sopenharmony_ci * If we can't get a cq entry, userspace overflowed the 18178c2ecf20Sopenharmony_ci * submission (by quite a lot). Increment the overflow count in 18188c2ecf20Sopenharmony_ci * the ring. 18198c2ecf20Sopenharmony_ci */ 18208c2ecf20Sopenharmony_ci cqe = io_get_cqe(ctx); 18218c2ecf20Sopenharmony_ci if (likely(cqe)) { 18228c2ecf20Sopenharmony_ci WRITE_ONCE(cqe->user_data, user_data); 18238c2ecf20Sopenharmony_ci WRITE_ONCE(cqe->res, res); 18248c2ecf20Sopenharmony_ci WRITE_ONCE(cqe->flags, cflags); 18258c2ecf20Sopenharmony_ci return true; 18268c2ecf20Sopenharmony_ci } 18278c2ecf20Sopenharmony_ci return io_cqring_event_overflow(ctx, user_data, res, cflags); 18288c2ecf20Sopenharmony_ci} 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_cistatic noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags) 18318c2ecf20Sopenharmony_ci{ 18328c2ecf20Sopenharmony_ci __io_fill_cqe(req->ctx, req->user_data, res, cflags); 18338c2ecf20Sopenharmony_ci} 18348c2ecf20Sopenharmony_ci 18358c2ecf20Sopenharmony_cistatic noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, 18368c2ecf20Sopenharmony_ci s32 res, u32 cflags) 18378c2ecf20Sopenharmony_ci{ 18388c2ecf20Sopenharmony_ci ctx->cq_extra++; 18398c2ecf20Sopenharmony_ci return __io_fill_cqe(ctx, user_data, res, cflags); 18408c2ecf20Sopenharmony_ci} 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_cistatic void io_req_complete_post(struct io_kiocb *req, s32 res, 18438c2ecf20Sopenharmony_ci u32 cflags) 18448c2ecf20Sopenharmony_ci{ 18458c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 18468c2ecf20Sopenharmony_ci 18478c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 18488c2ecf20Sopenharmony_ci __io_fill_cqe(ctx, req->user_data, res, cflags); 18498c2ecf20Sopenharmony_ci /* 18508c2ecf20Sopenharmony_ci * If we're the last reference to this request, add to our locked 18518c2ecf20Sopenharmony_ci * free_list cache. 18528c2ecf20Sopenharmony_ci */ 18538c2ecf20Sopenharmony_ci if (req_ref_put_and_test(req)) { 18548c2ecf20Sopenharmony_ci if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { 18558c2ecf20Sopenharmony_ci if (req->flags & IO_DISARM_MASK) 18568c2ecf20Sopenharmony_ci io_disarm_next(req); 18578c2ecf20Sopenharmony_ci if (req->link) { 18588c2ecf20Sopenharmony_ci io_req_task_queue(req->link); 18598c2ecf20Sopenharmony_ci req->link = NULL; 18608c2ecf20Sopenharmony_ci } 18618c2ecf20Sopenharmony_ci } 18628c2ecf20Sopenharmony_ci io_dismantle_req(req); 18638c2ecf20Sopenharmony_ci io_put_task(req->task, 1); 18648c2ecf20Sopenharmony_ci list_add(&req->inflight_entry, &ctx->locked_free_list); 18658c2ecf20Sopenharmony_ci ctx->locked_free_nr++; 18668c2ecf20Sopenharmony_ci } else { 18678c2ecf20Sopenharmony_ci if (!percpu_ref_tryget(&ctx->refs)) 18688c2ecf20Sopenharmony_ci req = NULL; 18698c2ecf20Sopenharmony_ci } 18708c2ecf20Sopenharmony_ci io_commit_cqring(ctx); 18718c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 18728c2ecf20Sopenharmony_ci 18738c2ecf20Sopenharmony_ci if (req) { 18748c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 18758c2ecf20Sopenharmony_ci percpu_ref_put(&ctx->refs); 18768c2ecf20Sopenharmony_ci } 18778c2ecf20Sopenharmony_ci} 18788c2ecf20Sopenharmony_ci 18798c2ecf20Sopenharmony_cistatic inline bool io_req_needs_clean(struct io_kiocb *req) 18808c2ecf20Sopenharmony_ci{ 18818c2ecf20Sopenharmony_ci return req->flags & IO_REQ_CLEAN_FLAGS; 18828c2ecf20Sopenharmony_ci} 18838c2ecf20Sopenharmony_ci 18848c2ecf20Sopenharmony_cistatic inline void io_req_complete_state(struct io_kiocb *req, s32 res, 18858c2ecf20Sopenharmony_ci u32 cflags) 18868c2ecf20Sopenharmony_ci{ 18878c2ecf20Sopenharmony_ci if (io_req_needs_clean(req)) 18888c2ecf20Sopenharmony_ci io_clean_op(req); 18898c2ecf20Sopenharmony_ci req->result = res; 18908c2ecf20Sopenharmony_ci req->compl.cflags = cflags; 18918c2ecf20Sopenharmony_ci req->flags |= REQ_F_COMPLETE_INLINE; 18928c2ecf20Sopenharmony_ci} 18938c2ecf20Sopenharmony_ci 18948c2ecf20Sopenharmony_cistatic inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, 18958c2ecf20Sopenharmony_ci s32 res, u32 cflags) 18968c2ecf20Sopenharmony_ci{ 18978c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_COMPLETE_DEFER) 18988c2ecf20Sopenharmony_ci io_req_complete_state(req, res, cflags); 18998c2ecf20Sopenharmony_ci else 19008c2ecf20Sopenharmony_ci io_req_complete_post(req, res, cflags); 19018c2ecf20Sopenharmony_ci} 19028c2ecf20Sopenharmony_ci 19038c2ecf20Sopenharmony_cistatic inline void io_req_complete(struct io_kiocb *req, s32 res) 19048c2ecf20Sopenharmony_ci{ 19058c2ecf20Sopenharmony_ci __io_req_complete(req, 0, res, 0); 19068c2ecf20Sopenharmony_ci} 19078c2ecf20Sopenharmony_ci 19088c2ecf20Sopenharmony_cistatic void io_req_complete_failed(struct io_kiocb *req, s32 res) 19098c2ecf20Sopenharmony_ci{ 19108c2ecf20Sopenharmony_ci req_set_fail(req); 19118c2ecf20Sopenharmony_ci io_req_complete_post(req, res, 0); 19128c2ecf20Sopenharmony_ci} 19138c2ecf20Sopenharmony_ci 19148c2ecf20Sopenharmony_cistatic void io_req_complete_fail_submit(struct io_kiocb *req) 19158c2ecf20Sopenharmony_ci{ 19168c2ecf20Sopenharmony_ci /* 19178c2ecf20Sopenharmony_ci * We don't submit, fail them all, for that replace hardlinks with 19188c2ecf20Sopenharmony_ci * normal links. Extra REQ_F_LINK is tolerated. 19198c2ecf20Sopenharmony_ci */ 19208c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_HARDLINK; 19218c2ecf20Sopenharmony_ci req->flags |= REQ_F_LINK; 19228c2ecf20Sopenharmony_ci io_req_complete_failed(req, req->result); 19238c2ecf20Sopenharmony_ci} 19248c2ecf20Sopenharmony_ci 19258c2ecf20Sopenharmony_ci/* 19268c2ecf20Sopenharmony_ci * Don't initialise the fields below on every allocation, but do that in 19278c2ecf20Sopenharmony_ci * advance and keep them valid across allocations. 19288c2ecf20Sopenharmony_ci */ 19298c2ecf20Sopenharmony_cistatic void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx) 19308c2ecf20Sopenharmony_ci{ 19318c2ecf20Sopenharmony_ci req->ctx = ctx; 19328c2ecf20Sopenharmony_ci req->link = NULL; 19338c2ecf20Sopenharmony_ci req->async_data = NULL; 19348c2ecf20Sopenharmony_ci /* not necessary, but safer to zero */ 19358c2ecf20Sopenharmony_ci req->result = 0; 19368c2ecf20Sopenharmony_ci} 19378c2ecf20Sopenharmony_ci 19388c2ecf20Sopenharmony_cistatic void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx, 19398c2ecf20Sopenharmony_ci struct io_submit_state *state) 19408c2ecf20Sopenharmony_ci{ 19418c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 19428c2ecf20Sopenharmony_ci list_splice_init(&ctx->locked_free_list, &state->free_list); 19438c2ecf20Sopenharmony_ci ctx->locked_free_nr = 0; 19448c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 19458c2ecf20Sopenharmony_ci} 19468c2ecf20Sopenharmony_ci 19478c2ecf20Sopenharmony_ci/* Returns true IFF there are requests in the cache */ 19488c2ecf20Sopenharmony_cistatic bool io_flush_cached_reqs(struct io_ring_ctx *ctx) 19498c2ecf20Sopenharmony_ci{ 19508c2ecf20Sopenharmony_ci struct io_submit_state *state = &ctx->submit_state; 19518c2ecf20Sopenharmony_ci int nr; 19528c2ecf20Sopenharmony_ci 19538c2ecf20Sopenharmony_ci /* 19548c2ecf20Sopenharmony_ci * If we have more than a batch's worth of requests in our IRQ side 19558c2ecf20Sopenharmony_ci * locked cache, grab the lock and move them over to our submission 19568c2ecf20Sopenharmony_ci * side cache. 19578c2ecf20Sopenharmony_ci */ 19588c2ecf20Sopenharmony_ci if (READ_ONCE(ctx->locked_free_nr) > IO_COMPL_BATCH) 19598c2ecf20Sopenharmony_ci io_flush_cached_locked_reqs(ctx, state); 19608c2ecf20Sopenharmony_ci 19618c2ecf20Sopenharmony_ci nr = state->free_reqs; 19628c2ecf20Sopenharmony_ci while (!list_empty(&state->free_list)) { 19638c2ecf20Sopenharmony_ci struct io_kiocb *req = list_first_entry(&state->free_list, 19648c2ecf20Sopenharmony_ci struct io_kiocb, inflight_entry); 19658c2ecf20Sopenharmony_ci 19668c2ecf20Sopenharmony_ci list_del(&req->inflight_entry); 19678c2ecf20Sopenharmony_ci state->reqs[nr++] = req; 19688c2ecf20Sopenharmony_ci if (nr == ARRAY_SIZE(state->reqs)) 19698c2ecf20Sopenharmony_ci break; 19708c2ecf20Sopenharmony_ci } 19718c2ecf20Sopenharmony_ci 19728c2ecf20Sopenharmony_ci state->free_reqs = nr; 19738c2ecf20Sopenharmony_ci return nr != 0; 19748c2ecf20Sopenharmony_ci} 19758c2ecf20Sopenharmony_ci 19768c2ecf20Sopenharmony_ci/* 19778c2ecf20Sopenharmony_ci * A request might get retired back into the request caches even before opcode 19788c2ecf20Sopenharmony_ci * handlers and io_issue_sqe() are done with it, e.g. inline completion path. 19798c2ecf20Sopenharmony_ci * Because of that, io_alloc_req() should be called only under ->uring_lock 19808c2ecf20Sopenharmony_ci * and with extra caution to not get a request that is still worked on. 19818c2ecf20Sopenharmony_ci */ 19828c2ecf20Sopenharmony_cistatic struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) 19838c2ecf20Sopenharmony_ci __must_hold(&ctx->uring_lock) 19848c2ecf20Sopenharmony_ci{ 19858c2ecf20Sopenharmony_ci struct io_submit_state *state = &ctx->submit_state; 19868c2ecf20Sopenharmony_ci gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; 19878c2ecf20Sopenharmony_ci int ret, i; 19888c2ecf20Sopenharmony_ci 19898c2ecf20Sopenharmony_ci BUILD_BUG_ON(ARRAY_SIZE(state->reqs) < IO_REQ_ALLOC_BATCH); 19908c2ecf20Sopenharmony_ci 19918c2ecf20Sopenharmony_ci if (likely(state->free_reqs || io_flush_cached_reqs(ctx))) 19928c2ecf20Sopenharmony_ci goto got_req; 19938c2ecf20Sopenharmony_ci 19948c2ecf20Sopenharmony_ci ret = kmem_cache_alloc_bulk(req_cachep, gfp, IO_REQ_ALLOC_BATCH, 19958c2ecf20Sopenharmony_ci state->reqs); 19968c2ecf20Sopenharmony_ci 19978c2ecf20Sopenharmony_ci /* 19988c2ecf20Sopenharmony_ci * Bulk alloc is all-or-nothing. If we fail to get a batch, 19998c2ecf20Sopenharmony_ci * retry single alloc to be on the safe side. 20008c2ecf20Sopenharmony_ci */ 20018c2ecf20Sopenharmony_ci if (unlikely(ret <= 0)) { 20028c2ecf20Sopenharmony_ci state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); 20038c2ecf20Sopenharmony_ci if (!state->reqs[0]) 20048c2ecf20Sopenharmony_ci return NULL; 20058c2ecf20Sopenharmony_ci ret = 1; 20068c2ecf20Sopenharmony_ci } 20078c2ecf20Sopenharmony_ci 20088c2ecf20Sopenharmony_ci for (i = 0; i < ret; i++) 20098c2ecf20Sopenharmony_ci io_preinit_req(state->reqs[i], ctx); 20108c2ecf20Sopenharmony_ci state->free_reqs = ret; 20118c2ecf20Sopenharmony_cigot_req: 20128c2ecf20Sopenharmony_ci state->free_reqs--; 20138c2ecf20Sopenharmony_ci return state->reqs[state->free_reqs]; 20148c2ecf20Sopenharmony_ci} 20158c2ecf20Sopenharmony_ci 20168c2ecf20Sopenharmony_cistatic inline void io_put_file(struct file *file) 20178c2ecf20Sopenharmony_ci{ 20188c2ecf20Sopenharmony_ci if (file) 20198c2ecf20Sopenharmony_ci fput(file); 20208c2ecf20Sopenharmony_ci} 20218c2ecf20Sopenharmony_ci 20228c2ecf20Sopenharmony_cistatic void io_dismantle_req(struct io_kiocb *req) 20238c2ecf20Sopenharmony_ci{ 20248c2ecf20Sopenharmony_ci unsigned int flags = req->flags; 20258c2ecf20Sopenharmony_ci 20268c2ecf20Sopenharmony_ci if (io_req_needs_clean(req)) 20278c2ecf20Sopenharmony_ci io_clean_op(req); 20288c2ecf20Sopenharmony_ci if (!(flags & REQ_F_FIXED_FILE)) 20298c2ecf20Sopenharmony_ci io_put_file(req->file); 20308c2ecf20Sopenharmony_ci if (req->fixed_rsrc_refs) 20318c2ecf20Sopenharmony_ci percpu_ref_put(req->fixed_rsrc_refs); 20328c2ecf20Sopenharmony_ci if (req->async_data) { 20338c2ecf20Sopenharmony_ci kfree(req->async_data); 20348c2ecf20Sopenharmony_ci req->async_data = NULL; 20358c2ecf20Sopenharmony_ci } 20368c2ecf20Sopenharmony_ci} 20378c2ecf20Sopenharmony_ci 20388c2ecf20Sopenharmony_cistatic void __io_free_req(struct io_kiocb *req) 20398c2ecf20Sopenharmony_ci{ 20408c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 20418c2ecf20Sopenharmony_ci 20428c2ecf20Sopenharmony_ci io_dismantle_req(req); 20438c2ecf20Sopenharmony_ci io_put_task(req->task, 1); 20448c2ecf20Sopenharmony_ci 20458c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 20468c2ecf20Sopenharmony_ci list_add(&req->inflight_entry, &ctx->locked_free_list); 20478c2ecf20Sopenharmony_ci ctx->locked_free_nr++; 20488c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 20498c2ecf20Sopenharmony_ci 20508c2ecf20Sopenharmony_ci percpu_ref_put(&ctx->refs); 20518c2ecf20Sopenharmony_ci} 20528c2ecf20Sopenharmony_ci 20538c2ecf20Sopenharmony_cistatic inline void io_remove_next_linked(struct io_kiocb *req) 20548c2ecf20Sopenharmony_ci{ 20558c2ecf20Sopenharmony_ci struct io_kiocb *nxt = req->link; 20568c2ecf20Sopenharmony_ci 20578c2ecf20Sopenharmony_ci req->link = nxt->link; 20588c2ecf20Sopenharmony_ci nxt->link = NULL; 20598c2ecf20Sopenharmony_ci} 20608c2ecf20Sopenharmony_ci 20618c2ecf20Sopenharmony_cistatic bool io_kill_linked_timeout(struct io_kiocb *req) 20628c2ecf20Sopenharmony_ci __must_hold(&req->ctx->completion_lock) 20638c2ecf20Sopenharmony_ci __must_hold(&req->ctx->timeout_lock) 20648c2ecf20Sopenharmony_ci{ 20658c2ecf20Sopenharmony_ci struct io_kiocb *link = req->link; 20668c2ecf20Sopenharmony_ci 20678c2ecf20Sopenharmony_ci if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { 20688c2ecf20Sopenharmony_ci struct io_timeout_data *io = link->async_data; 20698c2ecf20Sopenharmony_ci 20708c2ecf20Sopenharmony_ci io_remove_next_linked(req); 20718c2ecf20Sopenharmony_ci link->timeout.head = NULL; 20728c2ecf20Sopenharmony_ci if (hrtimer_try_to_cancel(&io->timer) != -1) { 20738c2ecf20Sopenharmony_ci list_del(&link->timeout.list); 20748c2ecf20Sopenharmony_ci io_fill_cqe_req(link, -ECANCELED, 0); 20758c2ecf20Sopenharmony_ci io_put_req_deferred(link); 20768c2ecf20Sopenharmony_ci return true; 20778c2ecf20Sopenharmony_ci } 20788c2ecf20Sopenharmony_ci } 20798c2ecf20Sopenharmony_ci return false; 20808c2ecf20Sopenharmony_ci} 20818c2ecf20Sopenharmony_ci 20828c2ecf20Sopenharmony_cistatic void io_fail_links(struct io_kiocb *req) 20838c2ecf20Sopenharmony_ci __must_hold(&req->ctx->completion_lock) 20848c2ecf20Sopenharmony_ci{ 20858c2ecf20Sopenharmony_ci struct io_kiocb *nxt, *link = req->link; 20868c2ecf20Sopenharmony_ci 20878c2ecf20Sopenharmony_ci req->link = NULL; 20888c2ecf20Sopenharmony_ci while (link) { 20898c2ecf20Sopenharmony_ci long res = -ECANCELED; 20908c2ecf20Sopenharmony_ci 20918c2ecf20Sopenharmony_ci if (link->flags & REQ_F_FAIL) 20928c2ecf20Sopenharmony_ci res = link->result; 20938c2ecf20Sopenharmony_ci 20948c2ecf20Sopenharmony_ci nxt = link->link; 20958c2ecf20Sopenharmony_ci link->link = NULL; 20968c2ecf20Sopenharmony_ci 20978c2ecf20Sopenharmony_ci trace_io_uring_fail_link(req, link); 20988c2ecf20Sopenharmony_ci io_fill_cqe_req(link, res, 0); 20998c2ecf20Sopenharmony_ci io_put_req_deferred(link); 21008c2ecf20Sopenharmony_ci link = nxt; 21018c2ecf20Sopenharmony_ci } 21028c2ecf20Sopenharmony_ci} 21038c2ecf20Sopenharmony_ci 21048c2ecf20Sopenharmony_cistatic bool io_disarm_next(struct io_kiocb *req) 21058c2ecf20Sopenharmony_ci __must_hold(&req->ctx->completion_lock) 21068c2ecf20Sopenharmony_ci{ 21078c2ecf20Sopenharmony_ci bool posted = false; 21088c2ecf20Sopenharmony_ci 21098c2ecf20Sopenharmony_ci if (req->flags & REQ_F_ARM_LTIMEOUT) { 21108c2ecf20Sopenharmony_ci struct io_kiocb *link = req->link; 21118c2ecf20Sopenharmony_ci 21128c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_ARM_LTIMEOUT; 21138c2ecf20Sopenharmony_ci if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { 21148c2ecf20Sopenharmony_ci io_remove_next_linked(req); 21158c2ecf20Sopenharmony_ci io_fill_cqe_req(link, -ECANCELED, 0); 21168c2ecf20Sopenharmony_ci io_put_req_deferred(link); 21178c2ecf20Sopenharmony_ci posted = true; 21188c2ecf20Sopenharmony_ci } 21198c2ecf20Sopenharmony_ci } else if (req->flags & REQ_F_LINK_TIMEOUT) { 21208c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 21238c2ecf20Sopenharmony_ci posted = io_kill_linked_timeout(req); 21248c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 21258c2ecf20Sopenharmony_ci } 21268c2ecf20Sopenharmony_ci if (unlikely((req->flags & REQ_F_FAIL) && 21278c2ecf20Sopenharmony_ci !(req->flags & REQ_F_HARDLINK))) { 21288c2ecf20Sopenharmony_ci posted |= (req->link != NULL); 21298c2ecf20Sopenharmony_ci io_fail_links(req); 21308c2ecf20Sopenharmony_ci } 21318c2ecf20Sopenharmony_ci return posted; 21328c2ecf20Sopenharmony_ci} 21338c2ecf20Sopenharmony_ci 21348c2ecf20Sopenharmony_cistatic struct io_kiocb *__io_req_find_next(struct io_kiocb *req) 21358c2ecf20Sopenharmony_ci{ 21368c2ecf20Sopenharmony_ci struct io_kiocb *nxt; 21378c2ecf20Sopenharmony_ci 21388c2ecf20Sopenharmony_ci /* 21398c2ecf20Sopenharmony_ci * If LINK is set, we have dependent requests in this chain. If we 21408c2ecf20Sopenharmony_ci * didn't fail this request, queue the first one up, moving any other 21418c2ecf20Sopenharmony_ci * dependencies to the next request. In case of failure, fail the rest 21428c2ecf20Sopenharmony_ci * of the chain. 21438c2ecf20Sopenharmony_ci */ 21448c2ecf20Sopenharmony_ci if (req->flags & IO_DISARM_MASK) { 21458c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 21468c2ecf20Sopenharmony_ci bool posted; 21478c2ecf20Sopenharmony_ci 21488c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 21498c2ecf20Sopenharmony_ci posted = io_disarm_next(req); 21508c2ecf20Sopenharmony_ci if (posted) 21518c2ecf20Sopenharmony_ci io_commit_cqring(req->ctx); 21528c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 21538c2ecf20Sopenharmony_ci if (posted) 21548c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 21558c2ecf20Sopenharmony_ci } 21568c2ecf20Sopenharmony_ci nxt = req->link; 21578c2ecf20Sopenharmony_ci req->link = NULL; 21588c2ecf20Sopenharmony_ci return nxt; 21598c2ecf20Sopenharmony_ci} 21608c2ecf20Sopenharmony_ci 21618c2ecf20Sopenharmony_cistatic inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) 21628c2ecf20Sopenharmony_ci{ 21638c2ecf20Sopenharmony_ci if (likely(!(req->flags & (REQ_F_LINK|REQ_F_HARDLINK)))) 21648c2ecf20Sopenharmony_ci return NULL; 21658c2ecf20Sopenharmony_ci return __io_req_find_next(req); 21668c2ecf20Sopenharmony_ci} 21678c2ecf20Sopenharmony_ci 21688c2ecf20Sopenharmony_cistatic void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) 21698c2ecf20Sopenharmony_ci{ 21708c2ecf20Sopenharmony_ci if (!ctx) 21718c2ecf20Sopenharmony_ci return; 21728c2ecf20Sopenharmony_ci if (*locked) { 21738c2ecf20Sopenharmony_ci if (ctx->submit_state.compl_nr) 21748c2ecf20Sopenharmony_ci io_submit_flush_completions(ctx); 21758c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 21768c2ecf20Sopenharmony_ci *locked = false; 21778c2ecf20Sopenharmony_ci } 21788c2ecf20Sopenharmony_ci percpu_ref_put(&ctx->refs); 21798c2ecf20Sopenharmony_ci} 21808c2ecf20Sopenharmony_ci 21818c2ecf20Sopenharmony_cistatic void tctx_task_work(struct callback_head *cb) 21828c2ecf20Sopenharmony_ci{ 21838c2ecf20Sopenharmony_ci bool locked = false; 21848c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = NULL; 21858c2ecf20Sopenharmony_ci struct io_uring_task *tctx = container_of(cb, struct io_uring_task, 21868c2ecf20Sopenharmony_ci task_work); 21878c2ecf20Sopenharmony_ci 21888c2ecf20Sopenharmony_ci while (1) { 21898c2ecf20Sopenharmony_ci struct io_wq_work_node *node; 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_ci if (!tctx->task_list.first && locked && ctx->submit_state.compl_nr) 21928c2ecf20Sopenharmony_ci io_submit_flush_completions(ctx); 21938c2ecf20Sopenharmony_ci 21948c2ecf20Sopenharmony_ci spin_lock_irq(&tctx->task_lock); 21958c2ecf20Sopenharmony_ci node = tctx->task_list.first; 21968c2ecf20Sopenharmony_ci INIT_WQ_LIST(&tctx->task_list); 21978c2ecf20Sopenharmony_ci if (!node) 21988c2ecf20Sopenharmony_ci tctx->task_running = false; 21998c2ecf20Sopenharmony_ci spin_unlock_irq(&tctx->task_lock); 22008c2ecf20Sopenharmony_ci if (!node) 22018c2ecf20Sopenharmony_ci break; 22028c2ecf20Sopenharmony_ci 22038c2ecf20Sopenharmony_ci do { 22048c2ecf20Sopenharmony_ci struct io_wq_work_node *next = node->next; 22058c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(node, struct io_kiocb, 22068c2ecf20Sopenharmony_ci io_task_work.node); 22078c2ecf20Sopenharmony_ci 22088c2ecf20Sopenharmony_ci if (req->ctx != ctx) { 22098c2ecf20Sopenharmony_ci ctx_flush_and_put(ctx, &locked); 22108c2ecf20Sopenharmony_ci ctx = req->ctx; 22118c2ecf20Sopenharmony_ci /* if not contended, grab and improve batching */ 22128c2ecf20Sopenharmony_ci locked = mutex_trylock(&ctx->uring_lock); 22138c2ecf20Sopenharmony_ci percpu_ref_get(&ctx->refs); 22148c2ecf20Sopenharmony_ci } 22158c2ecf20Sopenharmony_ci req->io_task_work.func(req, &locked); 22168c2ecf20Sopenharmony_ci node = next; 22178c2ecf20Sopenharmony_ci if (unlikely(need_resched())) { 22188c2ecf20Sopenharmony_ci ctx_flush_and_put(ctx, &locked); 22198c2ecf20Sopenharmony_ci ctx = NULL; 22208c2ecf20Sopenharmony_ci cond_resched(); 22218c2ecf20Sopenharmony_ci } 22228c2ecf20Sopenharmony_ci } while (node); 22238c2ecf20Sopenharmony_ci } 22248c2ecf20Sopenharmony_ci 22258c2ecf20Sopenharmony_ci ctx_flush_and_put(ctx, &locked); 22268c2ecf20Sopenharmony_ci 22278c2ecf20Sopenharmony_ci /* relaxed read is enough as only the task itself sets ->in_idle */ 22288c2ecf20Sopenharmony_ci if (unlikely(atomic_read(&tctx->in_idle))) 22298c2ecf20Sopenharmony_ci io_uring_drop_tctx_refs(current); 22308c2ecf20Sopenharmony_ci} 22318c2ecf20Sopenharmony_ci 22328c2ecf20Sopenharmony_cistatic void io_req_task_work_add(struct io_kiocb *req) 22338c2ecf20Sopenharmony_ci{ 22348c2ecf20Sopenharmony_ci struct task_struct *tsk = req->task; 22358c2ecf20Sopenharmony_ci struct io_uring_task *tctx = tsk->io_uring; 22368c2ecf20Sopenharmony_ci enum task_work_notify_mode notify; 22378c2ecf20Sopenharmony_ci struct io_wq_work_node *node; 22388c2ecf20Sopenharmony_ci unsigned long flags; 22398c2ecf20Sopenharmony_ci bool running; 22408c2ecf20Sopenharmony_ci 22418c2ecf20Sopenharmony_ci WARN_ON_ONCE(!tctx); 22428c2ecf20Sopenharmony_ci 22438c2ecf20Sopenharmony_ci spin_lock_irqsave(&tctx->task_lock, flags); 22448c2ecf20Sopenharmony_ci wq_list_add_tail(&req->io_task_work.node, &tctx->task_list); 22458c2ecf20Sopenharmony_ci running = tctx->task_running; 22468c2ecf20Sopenharmony_ci if (!running) 22478c2ecf20Sopenharmony_ci tctx->task_running = true; 22488c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&tctx->task_lock, flags); 22498c2ecf20Sopenharmony_ci 22508c2ecf20Sopenharmony_ci /* task_work already pending, we're done */ 22518c2ecf20Sopenharmony_ci if (running) 22528c2ecf20Sopenharmony_ci return; 22538c2ecf20Sopenharmony_ci 22548c2ecf20Sopenharmony_ci /* 22558c2ecf20Sopenharmony_ci * SQPOLL kernel thread doesn't need notification, just a wakeup. For 22568c2ecf20Sopenharmony_ci * all other cases, use TWA_SIGNAL unconditionally to ensure we're 22578c2ecf20Sopenharmony_ci * processing task_work. There's no reliable way to tell if TWA_RESUME 22588c2ecf20Sopenharmony_ci * will do the job. 22598c2ecf20Sopenharmony_ci */ 22608c2ecf20Sopenharmony_ci notify = (req->ctx->flags & IORING_SETUP_SQPOLL) ? TWA_NONE : TWA_SIGNAL; 22618c2ecf20Sopenharmony_ci if (!task_work_add(tsk, &tctx->task_work, notify)) { 22628c2ecf20Sopenharmony_ci wake_up_process(tsk); 22638c2ecf20Sopenharmony_ci return; 22648c2ecf20Sopenharmony_ci } 22658c2ecf20Sopenharmony_ci 22668c2ecf20Sopenharmony_ci spin_lock_irqsave(&tctx->task_lock, flags); 22678c2ecf20Sopenharmony_ci tctx->task_running = false; 22688c2ecf20Sopenharmony_ci node = tctx->task_list.first; 22698c2ecf20Sopenharmony_ci INIT_WQ_LIST(&tctx->task_list); 22708c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&tctx->task_lock, flags); 22718c2ecf20Sopenharmony_ci 22728c2ecf20Sopenharmony_ci while (node) { 22738c2ecf20Sopenharmony_ci req = container_of(node, struct io_kiocb, io_task_work.node); 22748c2ecf20Sopenharmony_ci node = node->next; 22758c2ecf20Sopenharmony_ci if (llist_add(&req->io_task_work.fallback_node, 22768c2ecf20Sopenharmony_ci &req->ctx->fallback_llist)) 22778c2ecf20Sopenharmony_ci schedule_delayed_work(&req->ctx->fallback_work, 1); 22788c2ecf20Sopenharmony_ci } 22798c2ecf20Sopenharmony_ci} 22808c2ecf20Sopenharmony_ci 22818c2ecf20Sopenharmony_cistatic void io_req_task_cancel(struct io_kiocb *req, bool *locked) 22828c2ecf20Sopenharmony_ci{ 22838c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 22848c2ecf20Sopenharmony_ci 22858c2ecf20Sopenharmony_ci /* not needed for normal modes, but SQPOLL depends on it */ 22868c2ecf20Sopenharmony_ci io_tw_lock(ctx, locked); 22878c2ecf20Sopenharmony_ci io_req_complete_failed(req, req->result); 22888c2ecf20Sopenharmony_ci} 22898c2ecf20Sopenharmony_ci 22908c2ecf20Sopenharmony_cistatic void io_req_task_submit(struct io_kiocb *req, bool *locked) 22918c2ecf20Sopenharmony_ci{ 22928c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 22938c2ecf20Sopenharmony_ci 22948c2ecf20Sopenharmony_ci io_tw_lock(ctx, locked); 22958c2ecf20Sopenharmony_ci /* req->task == current here, checking PF_EXITING is safe */ 22968c2ecf20Sopenharmony_ci if (likely(!(req->task->flags & PF_EXITING))) 22978c2ecf20Sopenharmony_ci __io_queue_sqe(req); 22988c2ecf20Sopenharmony_ci else 22998c2ecf20Sopenharmony_ci io_req_complete_failed(req, -EFAULT); 23008c2ecf20Sopenharmony_ci} 23018c2ecf20Sopenharmony_ci 23028c2ecf20Sopenharmony_cistatic void io_req_task_queue_fail(struct io_kiocb *req, int ret) 23038c2ecf20Sopenharmony_ci{ 23048c2ecf20Sopenharmony_ci req->result = ret; 23058c2ecf20Sopenharmony_ci req->io_task_work.func = io_req_task_cancel; 23068c2ecf20Sopenharmony_ci io_req_task_work_add(req); 23078c2ecf20Sopenharmony_ci} 23088c2ecf20Sopenharmony_ci 23098c2ecf20Sopenharmony_cistatic void io_req_task_queue(struct io_kiocb *req) 23108c2ecf20Sopenharmony_ci{ 23118c2ecf20Sopenharmony_ci req->io_task_work.func = io_req_task_submit; 23128c2ecf20Sopenharmony_ci io_req_task_work_add(req); 23138c2ecf20Sopenharmony_ci} 23148c2ecf20Sopenharmony_ci 23158c2ecf20Sopenharmony_cistatic void io_req_task_queue_reissue(struct io_kiocb *req) 23168c2ecf20Sopenharmony_ci{ 23178c2ecf20Sopenharmony_ci req->io_task_work.func = io_queue_async_work; 23188c2ecf20Sopenharmony_ci io_req_task_work_add(req); 23198c2ecf20Sopenharmony_ci} 23208c2ecf20Sopenharmony_ci 23218c2ecf20Sopenharmony_cistatic inline void io_queue_next(struct io_kiocb *req) 23228c2ecf20Sopenharmony_ci{ 23238c2ecf20Sopenharmony_ci struct io_kiocb *nxt = io_req_find_next(req); 23248c2ecf20Sopenharmony_ci 23258c2ecf20Sopenharmony_ci if (nxt) 23268c2ecf20Sopenharmony_ci io_req_task_queue(nxt); 23278c2ecf20Sopenharmony_ci} 23288c2ecf20Sopenharmony_ci 23298c2ecf20Sopenharmony_cistatic void io_free_req(struct io_kiocb *req) 23308c2ecf20Sopenharmony_ci{ 23318c2ecf20Sopenharmony_ci io_queue_next(req); 23328c2ecf20Sopenharmony_ci __io_free_req(req); 23338c2ecf20Sopenharmony_ci} 23348c2ecf20Sopenharmony_ci 23358c2ecf20Sopenharmony_cistatic void io_free_req_work(struct io_kiocb *req, bool *locked) 23368c2ecf20Sopenharmony_ci{ 23378c2ecf20Sopenharmony_ci io_free_req(req); 23388c2ecf20Sopenharmony_ci} 23398c2ecf20Sopenharmony_ci 23408c2ecf20Sopenharmony_cistruct req_batch { 23418c2ecf20Sopenharmony_ci struct task_struct *task; 23428c2ecf20Sopenharmony_ci int task_refs; 23438c2ecf20Sopenharmony_ci int ctx_refs; 23448c2ecf20Sopenharmony_ci}; 23458c2ecf20Sopenharmony_ci 23468c2ecf20Sopenharmony_cistatic inline void io_init_req_batch(struct req_batch *rb) 23478c2ecf20Sopenharmony_ci{ 23488c2ecf20Sopenharmony_ci rb->task_refs = 0; 23498c2ecf20Sopenharmony_ci rb->ctx_refs = 0; 23508c2ecf20Sopenharmony_ci rb->task = NULL; 23518c2ecf20Sopenharmony_ci} 23528c2ecf20Sopenharmony_ci 23538c2ecf20Sopenharmony_cistatic void io_req_free_batch_finish(struct io_ring_ctx *ctx, 23548c2ecf20Sopenharmony_ci struct req_batch *rb) 23558c2ecf20Sopenharmony_ci{ 23568c2ecf20Sopenharmony_ci if (rb->ctx_refs) 23578c2ecf20Sopenharmony_ci percpu_ref_put_many(&ctx->refs, rb->ctx_refs); 23588c2ecf20Sopenharmony_ci if (rb->task) 23598c2ecf20Sopenharmony_ci io_put_task(rb->task, rb->task_refs); 23608c2ecf20Sopenharmony_ci} 23618c2ecf20Sopenharmony_ci 23628c2ecf20Sopenharmony_cistatic void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req, 23638c2ecf20Sopenharmony_ci struct io_submit_state *state) 23648c2ecf20Sopenharmony_ci{ 23658c2ecf20Sopenharmony_ci io_queue_next(req); 23668c2ecf20Sopenharmony_ci io_dismantle_req(req); 23678c2ecf20Sopenharmony_ci 23688c2ecf20Sopenharmony_ci if (req->task != rb->task) { 23698c2ecf20Sopenharmony_ci if (rb->task) 23708c2ecf20Sopenharmony_ci io_put_task(rb->task, rb->task_refs); 23718c2ecf20Sopenharmony_ci rb->task = req->task; 23728c2ecf20Sopenharmony_ci rb->task_refs = 0; 23738c2ecf20Sopenharmony_ci } 23748c2ecf20Sopenharmony_ci rb->task_refs++; 23758c2ecf20Sopenharmony_ci rb->ctx_refs++; 23768c2ecf20Sopenharmony_ci 23778c2ecf20Sopenharmony_ci if (state->free_reqs != ARRAY_SIZE(state->reqs)) 23788c2ecf20Sopenharmony_ci state->reqs[state->free_reqs++] = req; 23798c2ecf20Sopenharmony_ci else 23808c2ecf20Sopenharmony_ci list_add(&req->inflight_entry, &state->free_list); 23818c2ecf20Sopenharmony_ci} 23828c2ecf20Sopenharmony_ci 23838c2ecf20Sopenharmony_cistatic void io_submit_flush_completions(struct io_ring_ctx *ctx) 23848c2ecf20Sopenharmony_ci __must_hold(&ctx->uring_lock) 23858c2ecf20Sopenharmony_ci{ 23868c2ecf20Sopenharmony_ci struct io_submit_state *state = &ctx->submit_state; 23878c2ecf20Sopenharmony_ci int i, nr = state->compl_nr; 23888c2ecf20Sopenharmony_ci struct req_batch rb; 23898c2ecf20Sopenharmony_ci 23908c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 23918c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 23928c2ecf20Sopenharmony_ci struct io_kiocb *req = state->compl_reqs[i]; 23938c2ecf20Sopenharmony_ci 23948c2ecf20Sopenharmony_ci __io_fill_cqe(ctx, req->user_data, req->result, 23958c2ecf20Sopenharmony_ci req->compl.cflags); 23968c2ecf20Sopenharmony_ci } 23978c2ecf20Sopenharmony_ci io_commit_cqring(ctx); 23988c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 23998c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 24008c2ecf20Sopenharmony_ci 24018c2ecf20Sopenharmony_ci io_init_req_batch(&rb); 24028c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 24038c2ecf20Sopenharmony_ci struct io_kiocb *req = state->compl_reqs[i]; 24048c2ecf20Sopenharmony_ci 24058c2ecf20Sopenharmony_ci if (req_ref_put_and_test(req)) 24068c2ecf20Sopenharmony_ci io_req_free_batch(&rb, req, &ctx->submit_state); 24078c2ecf20Sopenharmony_ci } 24088c2ecf20Sopenharmony_ci 24098c2ecf20Sopenharmony_ci io_req_free_batch_finish(ctx, &rb); 24108c2ecf20Sopenharmony_ci state->compl_nr = 0; 24118c2ecf20Sopenharmony_ci} 24128c2ecf20Sopenharmony_ci 24138c2ecf20Sopenharmony_ci/* 24148c2ecf20Sopenharmony_ci * Drop reference to request, return next in chain (if there is one) if this 24158c2ecf20Sopenharmony_ci * was the last reference to this request. 24168c2ecf20Sopenharmony_ci */ 24178c2ecf20Sopenharmony_cistatic inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) 24188c2ecf20Sopenharmony_ci{ 24198c2ecf20Sopenharmony_ci struct io_kiocb *nxt = NULL; 24208c2ecf20Sopenharmony_ci 24218c2ecf20Sopenharmony_ci if (req_ref_put_and_test(req)) { 24228c2ecf20Sopenharmony_ci nxt = io_req_find_next(req); 24238c2ecf20Sopenharmony_ci __io_free_req(req); 24248c2ecf20Sopenharmony_ci } 24258c2ecf20Sopenharmony_ci return nxt; 24268c2ecf20Sopenharmony_ci} 24278c2ecf20Sopenharmony_ci 24288c2ecf20Sopenharmony_cistatic inline void io_put_req(struct io_kiocb *req) 24298c2ecf20Sopenharmony_ci{ 24308c2ecf20Sopenharmony_ci if (req_ref_put_and_test(req)) 24318c2ecf20Sopenharmony_ci io_free_req(req); 24328c2ecf20Sopenharmony_ci} 24338c2ecf20Sopenharmony_ci 24348c2ecf20Sopenharmony_cistatic inline void io_put_req_deferred(struct io_kiocb *req) 24358c2ecf20Sopenharmony_ci{ 24368c2ecf20Sopenharmony_ci if (req_ref_put_and_test(req)) { 24378c2ecf20Sopenharmony_ci req->io_task_work.func = io_free_req_work; 24388c2ecf20Sopenharmony_ci io_req_task_work_add(req); 24398c2ecf20Sopenharmony_ci } 24408c2ecf20Sopenharmony_ci} 24418c2ecf20Sopenharmony_ci 24428c2ecf20Sopenharmony_cistatic unsigned io_cqring_events(struct io_ring_ctx *ctx) 24438c2ecf20Sopenharmony_ci{ 24448c2ecf20Sopenharmony_ci /* See comment at the top of this file */ 24458c2ecf20Sopenharmony_ci smp_rmb(); 24468c2ecf20Sopenharmony_ci return __io_cqring_events(ctx); 24478c2ecf20Sopenharmony_ci} 24488c2ecf20Sopenharmony_ci 24498c2ecf20Sopenharmony_cistatic inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) 24508c2ecf20Sopenharmony_ci{ 24518c2ecf20Sopenharmony_ci struct io_rings *rings = ctx->rings; 24528c2ecf20Sopenharmony_ci 24538c2ecf20Sopenharmony_ci /* make sure SQ entry isn't read before tail */ 24548c2ecf20Sopenharmony_ci return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; 24558c2ecf20Sopenharmony_ci} 24568c2ecf20Sopenharmony_ci 24578c2ecf20Sopenharmony_cistatic unsigned int io_put_kbuf(struct io_kiocb *req, struct io_buffer *kbuf) 24588c2ecf20Sopenharmony_ci{ 24598c2ecf20Sopenharmony_ci unsigned int cflags; 24608c2ecf20Sopenharmony_ci 24618c2ecf20Sopenharmony_ci cflags = kbuf->bid << IORING_CQE_BUFFER_SHIFT; 24628c2ecf20Sopenharmony_ci cflags |= IORING_CQE_F_BUFFER; 24638c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_BUFFER_SELECTED; 24648c2ecf20Sopenharmony_ci kfree(kbuf); 24658c2ecf20Sopenharmony_ci return cflags; 24668c2ecf20Sopenharmony_ci} 24678c2ecf20Sopenharmony_ci 24688c2ecf20Sopenharmony_cistatic inline unsigned int io_put_rw_kbuf(struct io_kiocb *req) 24698c2ecf20Sopenharmony_ci{ 24708c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 24718c2ecf20Sopenharmony_ci 24728c2ecf20Sopenharmony_ci if (likely(!(req->flags & REQ_F_BUFFER_SELECTED))) 24738c2ecf20Sopenharmony_ci return 0; 24748c2ecf20Sopenharmony_ci kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; 24758c2ecf20Sopenharmony_ci return io_put_kbuf(req, kbuf); 24768c2ecf20Sopenharmony_ci} 24778c2ecf20Sopenharmony_ci 24788c2ecf20Sopenharmony_cistatic inline bool io_run_task_work(void) 24798c2ecf20Sopenharmony_ci{ 24808c2ecf20Sopenharmony_ci /* 24818c2ecf20Sopenharmony_ci * PF_IO_WORKER never returns to userspace, so check here if we have 24828c2ecf20Sopenharmony_ci * notify work that needs processing. 24838c2ecf20Sopenharmony_ci */ 24848c2ecf20Sopenharmony_ci if (current->flags & PF_IO_WORKER && 24858c2ecf20Sopenharmony_ci test_thread_flag(TIF_NOTIFY_RESUME)) { 24868c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 24878c2ecf20Sopenharmony_ci tracehook_notify_resume(NULL); 24888c2ecf20Sopenharmony_ci } 24898c2ecf20Sopenharmony_ci if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { 24908c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 24918c2ecf20Sopenharmony_ci tracehook_notify_signal(); 24928c2ecf20Sopenharmony_ci return true; 24938c2ecf20Sopenharmony_ci } 24948c2ecf20Sopenharmony_ci 24958c2ecf20Sopenharmony_ci return false; 24968c2ecf20Sopenharmony_ci} 24978c2ecf20Sopenharmony_ci 24988c2ecf20Sopenharmony_ci/* 24998c2ecf20Sopenharmony_ci * Find and free completed poll iocbs 25008c2ecf20Sopenharmony_ci */ 25018c2ecf20Sopenharmony_cistatic void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, 25028c2ecf20Sopenharmony_ci struct list_head *done) 25038c2ecf20Sopenharmony_ci{ 25048c2ecf20Sopenharmony_ci struct req_batch rb; 25058c2ecf20Sopenharmony_ci struct io_kiocb *req; 25068c2ecf20Sopenharmony_ci 25078c2ecf20Sopenharmony_ci /* order with ->result store in io_complete_rw_iopoll() */ 25088c2ecf20Sopenharmony_ci smp_rmb(); 25098c2ecf20Sopenharmony_ci 25108c2ecf20Sopenharmony_ci io_init_req_batch(&rb); 25118c2ecf20Sopenharmony_ci while (!list_empty(done)) { 25128c2ecf20Sopenharmony_ci struct io_uring_cqe *cqe; 25138c2ecf20Sopenharmony_ci unsigned cflags; 25148c2ecf20Sopenharmony_ci 25158c2ecf20Sopenharmony_ci req = list_first_entry(done, struct io_kiocb, inflight_entry); 25168c2ecf20Sopenharmony_ci list_del(&req->inflight_entry); 25178c2ecf20Sopenharmony_ci cflags = io_put_rw_kbuf(req); 25188c2ecf20Sopenharmony_ci (*nr_events)++; 25198c2ecf20Sopenharmony_ci 25208c2ecf20Sopenharmony_ci cqe = io_get_cqe(ctx); 25218c2ecf20Sopenharmony_ci if (cqe) { 25228c2ecf20Sopenharmony_ci WRITE_ONCE(cqe->user_data, req->user_data); 25238c2ecf20Sopenharmony_ci WRITE_ONCE(cqe->res, req->result); 25248c2ecf20Sopenharmony_ci WRITE_ONCE(cqe->flags, cflags); 25258c2ecf20Sopenharmony_ci } else { 25268c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 25278c2ecf20Sopenharmony_ci io_cqring_event_overflow(ctx, req->user_data, 25288c2ecf20Sopenharmony_ci req->result, cflags); 25298c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 25308c2ecf20Sopenharmony_ci } 25318c2ecf20Sopenharmony_ci 25328c2ecf20Sopenharmony_ci if (req_ref_put_and_test(req)) 25338c2ecf20Sopenharmony_ci io_req_free_batch(&rb, req, &ctx->submit_state); 25348c2ecf20Sopenharmony_ci } 25358c2ecf20Sopenharmony_ci 25368c2ecf20Sopenharmony_ci if (io_commit_needs_flush(ctx)) { 25378c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 25388c2ecf20Sopenharmony_ci __io_commit_cqring_flush(ctx); 25398c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 25408c2ecf20Sopenharmony_ci } 25418c2ecf20Sopenharmony_ci __io_commit_cqring(ctx); 25428c2ecf20Sopenharmony_ci io_cqring_ev_posted_iopoll(ctx); 25438c2ecf20Sopenharmony_ci io_req_free_batch_finish(ctx, &rb); 25448c2ecf20Sopenharmony_ci} 25458c2ecf20Sopenharmony_ci 25468c2ecf20Sopenharmony_cistatic int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, 25478c2ecf20Sopenharmony_ci long min) 25488c2ecf20Sopenharmony_ci{ 25498c2ecf20Sopenharmony_ci struct io_kiocb *req, *tmp; 25508c2ecf20Sopenharmony_ci LIST_HEAD(done); 25518c2ecf20Sopenharmony_ci bool spin; 25528c2ecf20Sopenharmony_ci 25538c2ecf20Sopenharmony_ci /* 25548c2ecf20Sopenharmony_ci * Only spin for completions if we don't have multiple devices hanging 25558c2ecf20Sopenharmony_ci * off our complete list, and we're under the requested amount. 25568c2ecf20Sopenharmony_ci */ 25578c2ecf20Sopenharmony_ci spin = !ctx->poll_multi_queue && *nr_events < min; 25588c2ecf20Sopenharmony_ci 25598c2ecf20Sopenharmony_ci list_for_each_entry_safe(req, tmp, &ctx->iopoll_list, inflight_entry) { 25608c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 25618c2ecf20Sopenharmony_ci int ret; 25628c2ecf20Sopenharmony_ci 25638c2ecf20Sopenharmony_ci /* 25648c2ecf20Sopenharmony_ci * Move completed and retryable entries to our local lists. 25658c2ecf20Sopenharmony_ci * If we find a request that requires polling, break out 25668c2ecf20Sopenharmony_ci * and complete those lists first, if we have entries there. 25678c2ecf20Sopenharmony_ci */ 25688c2ecf20Sopenharmony_ci if (READ_ONCE(req->iopoll_completed)) { 25698c2ecf20Sopenharmony_ci list_move_tail(&req->inflight_entry, &done); 25708c2ecf20Sopenharmony_ci continue; 25718c2ecf20Sopenharmony_ci } 25728c2ecf20Sopenharmony_ci if (!list_empty(&done)) 25738c2ecf20Sopenharmony_ci break; 25748c2ecf20Sopenharmony_ci 25758c2ecf20Sopenharmony_ci ret = kiocb->ki_filp->f_op->iopoll(kiocb, spin); 25768c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) 25778c2ecf20Sopenharmony_ci return ret; 25788c2ecf20Sopenharmony_ci else if (ret) 25798c2ecf20Sopenharmony_ci spin = false; 25808c2ecf20Sopenharmony_ci 25818c2ecf20Sopenharmony_ci /* iopoll may have completed current req */ 25828c2ecf20Sopenharmony_ci if (READ_ONCE(req->iopoll_completed)) 25838c2ecf20Sopenharmony_ci list_move_tail(&req->inflight_entry, &done); 25848c2ecf20Sopenharmony_ci } 25858c2ecf20Sopenharmony_ci 25868c2ecf20Sopenharmony_ci if (!list_empty(&done)) 25878c2ecf20Sopenharmony_ci io_iopoll_complete(ctx, nr_events, &done); 25888c2ecf20Sopenharmony_ci 25898c2ecf20Sopenharmony_ci return 0; 25908c2ecf20Sopenharmony_ci} 25918c2ecf20Sopenharmony_ci 25928c2ecf20Sopenharmony_ci/* 25938c2ecf20Sopenharmony_ci * We can't just wait for polled events to come to us, we have to actively 25948c2ecf20Sopenharmony_ci * find and complete them. 25958c2ecf20Sopenharmony_ci */ 25968c2ecf20Sopenharmony_cistatic void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) 25978c2ecf20Sopenharmony_ci{ 25988c2ecf20Sopenharmony_ci if (!(ctx->flags & IORING_SETUP_IOPOLL)) 25998c2ecf20Sopenharmony_ci return; 26008c2ecf20Sopenharmony_ci 26018c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 26028c2ecf20Sopenharmony_ci while (!list_empty(&ctx->iopoll_list)) { 26038c2ecf20Sopenharmony_ci unsigned int nr_events = 0; 26048c2ecf20Sopenharmony_ci 26058c2ecf20Sopenharmony_ci io_do_iopoll(ctx, &nr_events, 0); 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci /* let it sleep and repeat later if can't complete a request */ 26088c2ecf20Sopenharmony_ci if (nr_events == 0) 26098c2ecf20Sopenharmony_ci break; 26108c2ecf20Sopenharmony_ci /* 26118c2ecf20Sopenharmony_ci * Ensure we allow local-to-the-cpu processing to take place, 26128c2ecf20Sopenharmony_ci * in this case we need to ensure that we reap all events. 26138c2ecf20Sopenharmony_ci * Also let task_work, etc. to progress by releasing the mutex 26148c2ecf20Sopenharmony_ci */ 26158c2ecf20Sopenharmony_ci if (need_resched()) { 26168c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 26178c2ecf20Sopenharmony_ci cond_resched(); 26188c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 26198c2ecf20Sopenharmony_ci } 26208c2ecf20Sopenharmony_ci } 26218c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 26228c2ecf20Sopenharmony_ci} 26238c2ecf20Sopenharmony_ci 26248c2ecf20Sopenharmony_cistatic int io_iopoll_check(struct io_ring_ctx *ctx, long min) 26258c2ecf20Sopenharmony_ci{ 26268c2ecf20Sopenharmony_ci unsigned int nr_events = 0; 26278c2ecf20Sopenharmony_ci int ret = 0; 26288c2ecf20Sopenharmony_ci 26298c2ecf20Sopenharmony_ci /* 26308c2ecf20Sopenharmony_ci * We disallow the app entering submit/complete with polling, but we 26318c2ecf20Sopenharmony_ci * still need to lock the ring to prevent racing with polled issue 26328c2ecf20Sopenharmony_ci * that got punted to a workqueue. 26338c2ecf20Sopenharmony_ci */ 26348c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 26358c2ecf20Sopenharmony_ci /* 26368c2ecf20Sopenharmony_ci * Don't enter poll loop if we already have events pending. 26378c2ecf20Sopenharmony_ci * If we do, we can potentially be spinning for commands that 26388c2ecf20Sopenharmony_ci * already triggered a CQE (eg in error). 26398c2ecf20Sopenharmony_ci */ 26408c2ecf20Sopenharmony_ci if (test_bit(0, &ctx->check_cq_overflow)) 26418c2ecf20Sopenharmony_ci __io_cqring_overflow_flush(ctx, false); 26428c2ecf20Sopenharmony_ci if (io_cqring_events(ctx)) 26438c2ecf20Sopenharmony_ci goto out; 26448c2ecf20Sopenharmony_ci do { 26458c2ecf20Sopenharmony_ci /* 26468c2ecf20Sopenharmony_ci * If a submit got punted to a workqueue, we can have the 26478c2ecf20Sopenharmony_ci * application entering polling for a command before it gets 26488c2ecf20Sopenharmony_ci * issued. That app will hold the uring_lock for the duration 26498c2ecf20Sopenharmony_ci * of the poll right here, so we need to take a breather every 26508c2ecf20Sopenharmony_ci * now and then to ensure that the issue has a chance to add 26518c2ecf20Sopenharmony_ci * the poll to the issued list. Otherwise we can spin here 26528c2ecf20Sopenharmony_ci * forever, while the workqueue is stuck trying to acquire the 26538c2ecf20Sopenharmony_ci * very same mutex. 26548c2ecf20Sopenharmony_ci */ 26558c2ecf20Sopenharmony_ci if (list_empty(&ctx->iopoll_list)) { 26568c2ecf20Sopenharmony_ci u32 tail = ctx->cached_cq_tail; 26578c2ecf20Sopenharmony_ci 26588c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 26598c2ecf20Sopenharmony_ci io_run_task_work(); 26608c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 26618c2ecf20Sopenharmony_ci 26628c2ecf20Sopenharmony_ci /* some requests don't go through iopoll_list */ 26638c2ecf20Sopenharmony_ci if (tail != ctx->cached_cq_tail || 26648c2ecf20Sopenharmony_ci list_empty(&ctx->iopoll_list)) 26658c2ecf20Sopenharmony_ci break; 26668c2ecf20Sopenharmony_ci } 26678c2ecf20Sopenharmony_ci ret = io_do_iopoll(ctx, &nr_events, min); 26688c2ecf20Sopenharmony_ci 26698c2ecf20Sopenharmony_ci if (task_sigpending(current)) { 26708c2ecf20Sopenharmony_ci ret = -EINTR; 26718c2ecf20Sopenharmony_ci goto out; 26728c2ecf20Sopenharmony_ci } 26738c2ecf20Sopenharmony_ci } while (!ret && nr_events < min && !need_resched()); 26748c2ecf20Sopenharmony_ciout: 26758c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 26768c2ecf20Sopenharmony_ci return ret; 26778c2ecf20Sopenharmony_ci} 26788c2ecf20Sopenharmony_ci 26798c2ecf20Sopenharmony_cistatic void kiocb_end_write(struct io_kiocb *req) 26808c2ecf20Sopenharmony_ci{ 26818c2ecf20Sopenharmony_ci /* 26828c2ecf20Sopenharmony_ci * Tell lockdep we inherited freeze protection from submission 26838c2ecf20Sopenharmony_ci * thread. 26848c2ecf20Sopenharmony_ci */ 26858c2ecf20Sopenharmony_ci if (req->flags & REQ_F_ISREG) { 26868c2ecf20Sopenharmony_ci struct super_block *sb = file_inode(req->file)->i_sb; 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci __sb_writers_acquired(sb, SB_FREEZE_WRITE); 26898c2ecf20Sopenharmony_ci sb_end_write(sb); 26908c2ecf20Sopenharmony_ci } 26918c2ecf20Sopenharmony_ci} 26928c2ecf20Sopenharmony_ci 26938c2ecf20Sopenharmony_ci#ifdef CONFIG_BLOCK 26948c2ecf20Sopenharmony_cistatic bool io_resubmit_prep(struct io_kiocb *req) 26958c2ecf20Sopenharmony_ci{ 26968c2ecf20Sopenharmony_ci struct io_async_rw *rw = req->async_data; 26978c2ecf20Sopenharmony_ci 26988c2ecf20Sopenharmony_ci if (!rw) 26998c2ecf20Sopenharmony_ci return !io_req_prep_async(req); 27008c2ecf20Sopenharmony_ci iov_iter_restore(&rw->iter, &rw->iter_state); 27018c2ecf20Sopenharmony_ci return true; 27028c2ecf20Sopenharmony_ci} 27038c2ecf20Sopenharmony_ci 27048c2ecf20Sopenharmony_cistatic bool io_rw_should_reissue(struct io_kiocb *req) 27058c2ecf20Sopenharmony_ci{ 27068c2ecf20Sopenharmony_ci umode_t mode = file_inode(req->file)->i_mode; 27078c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 27088c2ecf20Sopenharmony_ci 27098c2ecf20Sopenharmony_ci if (!S_ISBLK(mode) && !S_ISREG(mode)) 27108c2ecf20Sopenharmony_ci return false; 27118c2ecf20Sopenharmony_ci if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && 27128c2ecf20Sopenharmony_ci !(ctx->flags & IORING_SETUP_IOPOLL))) 27138c2ecf20Sopenharmony_ci return false; 27148c2ecf20Sopenharmony_ci /* 27158c2ecf20Sopenharmony_ci * If ref is dying, we might be running poll reap from the exit work. 27168c2ecf20Sopenharmony_ci * Don't attempt to reissue from that path, just let it fail with 27178c2ecf20Sopenharmony_ci * -EAGAIN. 27188c2ecf20Sopenharmony_ci */ 27198c2ecf20Sopenharmony_ci if (percpu_ref_is_dying(&ctx->refs)) 27208c2ecf20Sopenharmony_ci return false; 27218c2ecf20Sopenharmony_ci /* 27228c2ecf20Sopenharmony_ci * Play it safe and assume not safe to re-import and reissue if we're 27238c2ecf20Sopenharmony_ci * not in the original thread group (or in task context). 27248c2ecf20Sopenharmony_ci */ 27258c2ecf20Sopenharmony_ci if (!same_thread_group(req->task, current) || !in_task()) 27268c2ecf20Sopenharmony_ci return false; 27278c2ecf20Sopenharmony_ci return true; 27288c2ecf20Sopenharmony_ci} 27298c2ecf20Sopenharmony_ci#else 27308c2ecf20Sopenharmony_cistatic bool io_resubmit_prep(struct io_kiocb *req) 27318c2ecf20Sopenharmony_ci{ 27328c2ecf20Sopenharmony_ci return false; 27338c2ecf20Sopenharmony_ci} 27348c2ecf20Sopenharmony_cistatic bool io_rw_should_reissue(struct io_kiocb *req) 27358c2ecf20Sopenharmony_ci{ 27368c2ecf20Sopenharmony_ci return false; 27378c2ecf20Sopenharmony_ci} 27388c2ecf20Sopenharmony_ci#endif 27398c2ecf20Sopenharmony_ci 27408c2ecf20Sopenharmony_ci/* 27418c2ecf20Sopenharmony_ci * Trigger the notifications after having done some IO, and finish the write 27428c2ecf20Sopenharmony_ci * accounting, if any. 27438c2ecf20Sopenharmony_ci */ 27448c2ecf20Sopenharmony_cistatic void io_req_io_end(struct io_kiocb *req) 27458c2ecf20Sopenharmony_ci{ 27468c2ecf20Sopenharmony_ci struct io_rw *rw = &req->rw; 27478c2ecf20Sopenharmony_ci 27488c2ecf20Sopenharmony_ci if (rw->kiocb.ki_flags & IOCB_WRITE) { 27498c2ecf20Sopenharmony_ci kiocb_end_write(req); 27508c2ecf20Sopenharmony_ci fsnotify_modify(req->file); 27518c2ecf20Sopenharmony_ci } else { 27528c2ecf20Sopenharmony_ci fsnotify_access(req->file); 27538c2ecf20Sopenharmony_ci } 27548c2ecf20Sopenharmony_ci} 27558c2ecf20Sopenharmony_ci 27568c2ecf20Sopenharmony_cistatic bool __io_complete_rw_common(struct io_kiocb *req, long res) 27578c2ecf20Sopenharmony_ci{ 27588c2ecf20Sopenharmony_ci if (res != req->result) { 27598c2ecf20Sopenharmony_ci if ((res == -EAGAIN || res == -EOPNOTSUPP) && 27608c2ecf20Sopenharmony_ci io_rw_should_reissue(req)) { 27618c2ecf20Sopenharmony_ci /* 27628c2ecf20Sopenharmony_ci * Reissue will start accounting again, finish the 27638c2ecf20Sopenharmony_ci * current cycle. 27648c2ecf20Sopenharmony_ci */ 27658c2ecf20Sopenharmony_ci io_req_io_end(req); 27668c2ecf20Sopenharmony_ci req->flags |= REQ_F_REISSUE; 27678c2ecf20Sopenharmony_ci return true; 27688c2ecf20Sopenharmony_ci } 27698c2ecf20Sopenharmony_ci req_set_fail(req); 27708c2ecf20Sopenharmony_ci req->result = res; 27718c2ecf20Sopenharmony_ci } 27728c2ecf20Sopenharmony_ci return false; 27738c2ecf20Sopenharmony_ci} 27748c2ecf20Sopenharmony_ci 27758c2ecf20Sopenharmony_cistatic inline int io_fixup_rw_res(struct io_kiocb *req, long res) 27768c2ecf20Sopenharmony_ci{ 27778c2ecf20Sopenharmony_ci struct io_async_rw *io = req->async_data; 27788c2ecf20Sopenharmony_ci 27798c2ecf20Sopenharmony_ci /* add previously done IO, if any */ 27808c2ecf20Sopenharmony_ci if (io && io->bytes_done > 0) { 27818c2ecf20Sopenharmony_ci if (res < 0) 27828c2ecf20Sopenharmony_ci res = io->bytes_done; 27838c2ecf20Sopenharmony_ci else 27848c2ecf20Sopenharmony_ci res += io->bytes_done; 27858c2ecf20Sopenharmony_ci } 27868c2ecf20Sopenharmony_ci return res; 27878c2ecf20Sopenharmony_ci} 27888c2ecf20Sopenharmony_ci 27898c2ecf20Sopenharmony_cistatic void io_req_task_complete(struct io_kiocb *req, bool *locked) 27908c2ecf20Sopenharmony_ci{ 27918c2ecf20Sopenharmony_ci unsigned int cflags = io_put_rw_kbuf(req); 27928c2ecf20Sopenharmony_ci int res = req->result; 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_ci if (*locked) { 27958c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 27968c2ecf20Sopenharmony_ci struct io_submit_state *state = &ctx->submit_state; 27978c2ecf20Sopenharmony_ci 27988c2ecf20Sopenharmony_ci io_req_complete_state(req, res, cflags); 27998c2ecf20Sopenharmony_ci state->compl_reqs[state->compl_nr++] = req; 28008c2ecf20Sopenharmony_ci if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) 28018c2ecf20Sopenharmony_ci io_submit_flush_completions(ctx); 28028c2ecf20Sopenharmony_ci } else { 28038c2ecf20Sopenharmony_ci io_req_complete_post(req, res, cflags); 28048c2ecf20Sopenharmony_ci } 28058c2ecf20Sopenharmony_ci} 28068c2ecf20Sopenharmony_ci 28078c2ecf20Sopenharmony_cistatic void io_req_rw_complete(struct io_kiocb *req, bool *locked) 28088c2ecf20Sopenharmony_ci{ 28098c2ecf20Sopenharmony_ci io_req_io_end(req); 28108c2ecf20Sopenharmony_ci io_req_task_complete(req, locked); 28118c2ecf20Sopenharmony_ci} 28128c2ecf20Sopenharmony_ci 28138c2ecf20Sopenharmony_cistatic void io_complete_rw(struct kiocb *kiocb, long res, long res2) 28148c2ecf20Sopenharmony_ci{ 28158c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); 28168c2ecf20Sopenharmony_ci 28178c2ecf20Sopenharmony_ci if (__io_complete_rw_common(req, res)) 28188c2ecf20Sopenharmony_ci return; 28198c2ecf20Sopenharmony_ci req->result = io_fixup_rw_res(req, res); 28208c2ecf20Sopenharmony_ci req->io_task_work.func = io_req_rw_complete; 28218c2ecf20Sopenharmony_ci io_req_task_work_add(req); 28228c2ecf20Sopenharmony_ci} 28238c2ecf20Sopenharmony_ci 28248c2ecf20Sopenharmony_cistatic void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) 28258c2ecf20Sopenharmony_ci{ 28268c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); 28278c2ecf20Sopenharmony_ci 28288c2ecf20Sopenharmony_ci if (kiocb->ki_flags & IOCB_WRITE) 28298c2ecf20Sopenharmony_ci kiocb_end_write(req); 28308c2ecf20Sopenharmony_ci if (unlikely(res != req->result)) { 28318c2ecf20Sopenharmony_ci if (res == -EAGAIN && io_rw_should_reissue(req)) { 28328c2ecf20Sopenharmony_ci req->flags |= REQ_F_REISSUE; 28338c2ecf20Sopenharmony_ci return; 28348c2ecf20Sopenharmony_ci } 28358c2ecf20Sopenharmony_ci } 28368c2ecf20Sopenharmony_ci 28378c2ecf20Sopenharmony_ci WRITE_ONCE(req->result, res); 28388c2ecf20Sopenharmony_ci /* order with io_iopoll_complete() checking ->result */ 28398c2ecf20Sopenharmony_ci smp_wmb(); 28408c2ecf20Sopenharmony_ci WRITE_ONCE(req->iopoll_completed, 1); 28418c2ecf20Sopenharmony_ci} 28428c2ecf20Sopenharmony_ci 28438c2ecf20Sopenharmony_ci/* 28448c2ecf20Sopenharmony_ci * After the iocb has been issued, it's safe to be found on the poll list. 28458c2ecf20Sopenharmony_ci * Adding the kiocb to the list AFTER submission ensures that we don't 28468c2ecf20Sopenharmony_ci * find it from a io_do_iopoll() thread before the issuer is done 28478c2ecf20Sopenharmony_ci * accessing the kiocb cookie. 28488c2ecf20Sopenharmony_ci */ 28498c2ecf20Sopenharmony_cistatic void io_iopoll_req_issued(struct io_kiocb *req) 28508c2ecf20Sopenharmony_ci{ 28518c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 28528c2ecf20Sopenharmony_ci const bool in_async = io_wq_current_is_worker(); 28538c2ecf20Sopenharmony_ci 28548c2ecf20Sopenharmony_ci /* workqueue context doesn't hold uring_lock, grab it now */ 28558c2ecf20Sopenharmony_ci if (unlikely(in_async)) 28568c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 28578c2ecf20Sopenharmony_ci 28588c2ecf20Sopenharmony_ci /* 28598c2ecf20Sopenharmony_ci * Track whether we have multiple files in our lists. This will impact 28608c2ecf20Sopenharmony_ci * how we do polling eventually, not spinning if we're on potentially 28618c2ecf20Sopenharmony_ci * different devices. 28628c2ecf20Sopenharmony_ci */ 28638c2ecf20Sopenharmony_ci if (list_empty(&ctx->iopoll_list)) { 28648c2ecf20Sopenharmony_ci ctx->poll_multi_queue = false; 28658c2ecf20Sopenharmony_ci } else if (!ctx->poll_multi_queue) { 28668c2ecf20Sopenharmony_ci struct io_kiocb *list_req; 28678c2ecf20Sopenharmony_ci unsigned int queue_num0, queue_num1; 28688c2ecf20Sopenharmony_ci 28698c2ecf20Sopenharmony_ci list_req = list_first_entry(&ctx->iopoll_list, struct io_kiocb, 28708c2ecf20Sopenharmony_ci inflight_entry); 28718c2ecf20Sopenharmony_ci 28728c2ecf20Sopenharmony_ci if (list_req->file != req->file) { 28738c2ecf20Sopenharmony_ci ctx->poll_multi_queue = true; 28748c2ecf20Sopenharmony_ci } else { 28758c2ecf20Sopenharmony_ci queue_num0 = blk_qc_t_to_queue_num(list_req->rw.kiocb.ki_cookie); 28768c2ecf20Sopenharmony_ci queue_num1 = blk_qc_t_to_queue_num(req->rw.kiocb.ki_cookie); 28778c2ecf20Sopenharmony_ci if (queue_num0 != queue_num1) 28788c2ecf20Sopenharmony_ci ctx->poll_multi_queue = true; 28798c2ecf20Sopenharmony_ci } 28808c2ecf20Sopenharmony_ci } 28818c2ecf20Sopenharmony_ci 28828c2ecf20Sopenharmony_ci /* 28838c2ecf20Sopenharmony_ci * For fast devices, IO may have already completed. If it has, add 28848c2ecf20Sopenharmony_ci * it to the front so we find it first. 28858c2ecf20Sopenharmony_ci */ 28868c2ecf20Sopenharmony_ci if (READ_ONCE(req->iopoll_completed)) 28878c2ecf20Sopenharmony_ci list_add(&req->inflight_entry, &ctx->iopoll_list); 28888c2ecf20Sopenharmony_ci else 28898c2ecf20Sopenharmony_ci list_add_tail(&req->inflight_entry, &ctx->iopoll_list); 28908c2ecf20Sopenharmony_ci 28918c2ecf20Sopenharmony_ci if (unlikely(in_async)) { 28928c2ecf20Sopenharmony_ci /* 28938c2ecf20Sopenharmony_ci * If IORING_SETUP_SQPOLL is enabled, sqes are either handle 28948c2ecf20Sopenharmony_ci * in sq thread task context or in io worker task context. If 28958c2ecf20Sopenharmony_ci * current task context is sq thread, we don't need to check 28968c2ecf20Sopenharmony_ci * whether should wake up sq thread. 28978c2ecf20Sopenharmony_ci */ 28988c2ecf20Sopenharmony_ci if ((ctx->flags & IORING_SETUP_SQPOLL) && 28998c2ecf20Sopenharmony_ci wq_has_sleeper(&ctx->sq_data->wait)) 29008c2ecf20Sopenharmony_ci wake_up(&ctx->sq_data->wait); 29018c2ecf20Sopenharmony_ci 29028c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 29038c2ecf20Sopenharmony_ci } 29048c2ecf20Sopenharmony_ci} 29058c2ecf20Sopenharmony_ci 29068c2ecf20Sopenharmony_cistatic bool io_bdev_nowait(struct block_device *bdev) 29078c2ecf20Sopenharmony_ci{ 29088c2ecf20Sopenharmony_ci return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); 29098c2ecf20Sopenharmony_ci} 29108c2ecf20Sopenharmony_ci 29118c2ecf20Sopenharmony_ci/* 29128c2ecf20Sopenharmony_ci * If we tracked the file through the SCM inflight mechanism, we could support 29138c2ecf20Sopenharmony_ci * any file. For now, just ensure that anything potentially problematic is done 29148c2ecf20Sopenharmony_ci * inline. 29158c2ecf20Sopenharmony_ci */ 29168c2ecf20Sopenharmony_cistatic bool __io_file_supports_nowait(struct file *file, int rw) 29178c2ecf20Sopenharmony_ci{ 29188c2ecf20Sopenharmony_ci umode_t mode = file_inode(file)->i_mode; 29198c2ecf20Sopenharmony_ci 29208c2ecf20Sopenharmony_ci if (S_ISBLK(mode)) { 29218c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_BLOCK) && 29228c2ecf20Sopenharmony_ci io_bdev_nowait(I_BDEV(file->f_mapping->host))) 29238c2ecf20Sopenharmony_ci return true; 29248c2ecf20Sopenharmony_ci return false; 29258c2ecf20Sopenharmony_ci } 29268c2ecf20Sopenharmony_ci if (S_ISSOCK(mode)) 29278c2ecf20Sopenharmony_ci return true; 29288c2ecf20Sopenharmony_ci if (S_ISREG(mode)) { 29298c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_BLOCK) && 29308c2ecf20Sopenharmony_ci io_bdev_nowait(file->f_inode->i_sb->s_bdev) && 29318c2ecf20Sopenharmony_ci file->f_op != &io_uring_fops) 29328c2ecf20Sopenharmony_ci return true; 29338c2ecf20Sopenharmony_ci return false; 29348c2ecf20Sopenharmony_ci } 29358c2ecf20Sopenharmony_ci 29368c2ecf20Sopenharmony_ci /* any ->read/write should understand O_NONBLOCK */ 29378c2ecf20Sopenharmony_ci if (file->f_flags & O_NONBLOCK) 29388c2ecf20Sopenharmony_ci return true; 29398c2ecf20Sopenharmony_ci 29408c2ecf20Sopenharmony_ci if (!(file->f_mode & FMODE_NOWAIT)) 29418c2ecf20Sopenharmony_ci return false; 29428c2ecf20Sopenharmony_ci 29438c2ecf20Sopenharmony_ci if (rw == READ) 29448c2ecf20Sopenharmony_ci return file->f_op->read_iter != NULL; 29458c2ecf20Sopenharmony_ci 29468c2ecf20Sopenharmony_ci return file->f_op->write_iter != NULL; 29478c2ecf20Sopenharmony_ci} 29488c2ecf20Sopenharmony_ci 29498c2ecf20Sopenharmony_cistatic bool io_file_supports_nowait(struct io_kiocb *req, int rw) 29508c2ecf20Sopenharmony_ci{ 29518c2ecf20Sopenharmony_ci if (rw == READ && (req->flags & REQ_F_NOWAIT_READ)) 29528c2ecf20Sopenharmony_ci return true; 29538c2ecf20Sopenharmony_ci else if (rw == WRITE && (req->flags & REQ_F_NOWAIT_WRITE)) 29548c2ecf20Sopenharmony_ci return true; 29558c2ecf20Sopenharmony_ci 29568c2ecf20Sopenharmony_ci return __io_file_supports_nowait(req->file, rw); 29578c2ecf20Sopenharmony_ci} 29588c2ecf20Sopenharmony_ci 29598c2ecf20Sopenharmony_cistatic int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, 29608c2ecf20Sopenharmony_ci int rw) 29618c2ecf20Sopenharmony_ci{ 29628c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 29638c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 29648c2ecf20Sopenharmony_ci struct file *file = req->file; 29658c2ecf20Sopenharmony_ci unsigned ioprio; 29668c2ecf20Sopenharmony_ci int ret; 29678c2ecf20Sopenharmony_ci 29688c2ecf20Sopenharmony_ci if (!io_req_ffs_set(req) && S_ISREG(file_inode(file)->i_mode)) 29698c2ecf20Sopenharmony_ci req->flags |= REQ_F_ISREG; 29708c2ecf20Sopenharmony_ci 29718c2ecf20Sopenharmony_ci kiocb->ki_pos = READ_ONCE(sqe->off); 29728c2ecf20Sopenharmony_ci kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); 29738c2ecf20Sopenharmony_ci kiocb->ki_flags = iocb_flags(kiocb->ki_filp); 29748c2ecf20Sopenharmony_ci ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); 29758c2ecf20Sopenharmony_ci if (unlikely(ret)) 29768c2ecf20Sopenharmony_ci return ret; 29778c2ecf20Sopenharmony_ci 29788c2ecf20Sopenharmony_ci /* 29798c2ecf20Sopenharmony_ci * If the file is marked O_NONBLOCK, still allow retry for it if it 29808c2ecf20Sopenharmony_ci * supports async. Otherwise it's impossible to use O_NONBLOCK files 29818c2ecf20Sopenharmony_ci * reliably. If not, or it IOCB_NOWAIT is set, don't retry. 29828c2ecf20Sopenharmony_ci */ 29838c2ecf20Sopenharmony_ci if ((kiocb->ki_flags & IOCB_NOWAIT) || 29848c2ecf20Sopenharmony_ci ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req, rw))) 29858c2ecf20Sopenharmony_ci req->flags |= REQ_F_NOWAIT; 29868c2ecf20Sopenharmony_ci 29878c2ecf20Sopenharmony_ci ioprio = READ_ONCE(sqe->ioprio); 29888c2ecf20Sopenharmony_ci if (ioprio) { 29898c2ecf20Sopenharmony_ci ret = ioprio_check_cap(ioprio); 29908c2ecf20Sopenharmony_ci if (ret) 29918c2ecf20Sopenharmony_ci return ret; 29928c2ecf20Sopenharmony_ci 29938c2ecf20Sopenharmony_ci kiocb->ki_ioprio = ioprio; 29948c2ecf20Sopenharmony_ci } else 29958c2ecf20Sopenharmony_ci kiocb->ki_ioprio = get_current_ioprio(); 29968c2ecf20Sopenharmony_ci 29978c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_IOPOLL) { 29988c2ecf20Sopenharmony_ci if (!(kiocb->ki_flags & IOCB_DIRECT) || 29998c2ecf20Sopenharmony_ci !kiocb->ki_filp->f_op->iopoll) 30008c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 30018c2ecf20Sopenharmony_ci 30028c2ecf20Sopenharmony_ci kiocb->ki_flags |= IOCB_HIPRI; 30038c2ecf20Sopenharmony_ci kiocb->ki_complete = io_complete_rw_iopoll; 30048c2ecf20Sopenharmony_ci req->iopoll_completed = 0; 30058c2ecf20Sopenharmony_ci } else { 30068c2ecf20Sopenharmony_ci if (kiocb->ki_flags & IOCB_HIPRI) 30078c2ecf20Sopenharmony_ci return -EINVAL; 30088c2ecf20Sopenharmony_ci kiocb->ki_complete = io_complete_rw; 30098c2ecf20Sopenharmony_ci } 30108c2ecf20Sopenharmony_ci 30118c2ecf20Sopenharmony_ci /* used for fixed read/write too - just read unconditionally */ 30128c2ecf20Sopenharmony_ci req->buf_index = READ_ONCE(sqe->buf_index); 30138c2ecf20Sopenharmony_ci req->imu = NULL; 30148c2ecf20Sopenharmony_ci 30158c2ecf20Sopenharmony_ci if (req->opcode == IORING_OP_READ_FIXED || 30168c2ecf20Sopenharmony_ci req->opcode == IORING_OP_WRITE_FIXED) { 30178c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 30188c2ecf20Sopenharmony_ci u16 index; 30198c2ecf20Sopenharmony_ci 30208c2ecf20Sopenharmony_ci if (unlikely(req->buf_index >= ctx->nr_user_bufs)) 30218c2ecf20Sopenharmony_ci return -EFAULT; 30228c2ecf20Sopenharmony_ci index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); 30238c2ecf20Sopenharmony_ci req->imu = ctx->user_bufs[index]; 30248c2ecf20Sopenharmony_ci io_req_set_rsrc_node(req); 30258c2ecf20Sopenharmony_ci } 30268c2ecf20Sopenharmony_ci 30278c2ecf20Sopenharmony_ci req->rw.addr = READ_ONCE(sqe->addr); 30288c2ecf20Sopenharmony_ci req->rw.len = READ_ONCE(sqe->len); 30298c2ecf20Sopenharmony_ci return 0; 30308c2ecf20Sopenharmony_ci} 30318c2ecf20Sopenharmony_ci 30328c2ecf20Sopenharmony_cistatic inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) 30338c2ecf20Sopenharmony_ci{ 30348c2ecf20Sopenharmony_ci switch (ret) { 30358c2ecf20Sopenharmony_ci case -EIOCBQUEUED: 30368c2ecf20Sopenharmony_ci break; 30378c2ecf20Sopenharmony_ci case -ERESTARTSYS: 30388c2ecf20Sopenharmony_ci case -ERESTARTNOINTR: 30398c2ecf20Sopenharmony_ci case -ERESTARTNOHAND: 30408c2ecf20Sopenharmony_ci case -ERESTART_RESTARTBLOCK: 30418c2ecf20Sopenharmony_ci /* 30428c2ecf20Sopenharmony_ci * We can't just restart the syscall, since previously 30438c2ecf20Sopenharmony_ci * submitted sqes may already be in progress. Just fail this 30448c2ecf20Sopenharmony_ci * IO with EINTR. 30458c2ecf20Sopenharmony_ci */ 30468c2ecf20Sopenharmony_ci ret = -EINTR; 30478c2ecf20Sopenharmony_ci fallthrough; 30488c2ecf20Sopenharmony_ci default: 30498c2ecf20Sopenharmony_ci kiocb->ki_complete(kiocb, ret, 0); 30508c2ecf20Sopenharmony_ci } 30518c2ecf20Sopenharmony_ci} 30528c2ecf20Sopenharmony_ci 30538c2ecf20Sopenharmony_cistatic inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) 30548c2ecf20Sopenharmony_ci{ 30558c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 30568c2ecf20Sopenharmony_ci 30578c2ecf20Sopenharmony_ci if (kiocb->ki_pos != -1) 30588c2ecf20Sopenharmony_ci return &kiocb->ki_pos; 30598c2ecf20Sopenharmony_ci 30608c2ecf20Sopenharmony_ci if (!(req->file->f_mode & FMODE_STREAM)) { 30618c2ecf20Sopenharmony_ci req->flags |= REQ_F_CUR_POS; 30628c2ecf20Sopenharmony_ci kiocb->ki_pos = req->file->f_pos; 30638c2ecf20Sopenharmony_ci return &kiocb->ki_pos; 30648c2ecf20Sopenharmony_ci } 30658c2ecf20Sopenharmony_ci 30668c2ecf20Sopenharmony_ci kiocb->ki_pos = 0; 30678c2ecf20Sopenharmony_ci return NULL; 30688c2ecf20Sopenharmony_ci} 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_cistatic void kiocb_done(struct kiocb *kiocb, ssize_t ret, 30718c2ecf20Sopenharmony_ci unsigned int issue_flags) 30728c2ecf20Sopenharmony_ci{ 30738c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); 30748c2ecf20Sopenharmony_ci 30758c2ecf20Sopenharmony_ci if (req->flags & REQ_F_CUR_POS) 30768c2ecf20Sopenharmony_ci req->file->f_pos = kiocb->ki_pos; 30778c2ecf20Sopenharmony_ci if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) { 30788c2ecf20Sopenharmony_ci if (!__io_complete_rw_common(req, ret)) { 30798c2ecf20Sopenharmony_ci /* 30808c2ecf20Sopenharmony_ci * Safe to call io_end from here as we're inline 30818c2ecf20Sopenharmony_ci * from the submission path. 30828c2ecf20Sopenharmony_ci */ 30838c2ecf20Sopenharmony_ci io_req_io_end(req); 30848c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, 30858c2ecf20Sopenharmony_ci io_fixup_rw_res(req, ret), 30868c2ecf20Sopenharmony_ci io_put_rw_kbuf(req)); 30878c2ecf20Sopenharmony_ci } 30888c2ecf20Sopenharmony_ci } else { 30898c2ecf20Sopenharmony_ci io_rw_done(kiocb, ret); 30908c2ecf20Sopenharmony_ci } 30918c2ecf20Sopenharmony_ci 30928c2ecf20Sopenharmony_ci if (req->flags & REQ_F_REISSUE) { 30938c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_REISSUE; 30948c2ecf20Sopenharmony_ci if (io_resubmit_prep(req)) { 30958c2ecf20Sopenharmony_ci io_req_task_queue_reissue(req); 30968c2ecf20Sopenharmony_ci } else { 30978c2ecf20Sopenharmony_ci unsigned int cflags = io_put_rw_kbuf(req); 30988c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 30998c2ecf20Sopenharmony_ci 31008c2ecf20Sopenharmony_ci ret = io_fixup_rw_res(req, ret); 31018c2ecf20Sopenharmony_ci req_set_fail(req); 31028c2ecf20Sopenharmony_ci if (!(issue_flags & IO_URING_F_NONBLOCK)) { 31038c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 31048c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, cflags); 31058c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 31068c2ecf20Sopenharmony_ci } else { 31078c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, cflags); 31088c2ecf20Sopenharmony_ci } 31098c2ecf20Sopenharmony_ci } 31108c2ecf20Sopenharmony_ci } 31118c2ecf20Sopenharmony_ci} 31128c2ecf20Sopenharmony_ci 31138c2ecf20Sopenharmony_cistatic int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, 31148c2ecf20Sopenharmony_ci struct io_mapped_ubuf *imu) 31158c2ecf20Sopenharmony_ci{ 31168c2ecf20Sopenharmony_ci size_t len = req->rw.len; 31178c2ecf20Sopenharmony_ci u64 buf_end, buf_addr = req->rw.addr; 31188c2ecf20Sopenharmony_ci size_t offset; 31198c2ecf20Sopenharmony_ci 31208c2ecf20Sopenharmony_ci if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) 31218c2ecf20Sopenharmony_ci return -EFAULT; 31228c2ecf20Sopenharmony_ci /* not inside the mapped region */ 31238c2ecf20Sopenharmony_ci if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) 31248c2ecf20Sopenharmony_ci return -EFAULT; 31258c2ecf20Sopenharmony_ci 31268c2ecf20Sopenharmony_ci /* 31278c2ecf20Sopenharmony_ci * May not be a start of buffer, set size appropriately 31288c2ecf20Sopenharmony_ci * and advance us to the beginning. 31298c2ecf20Sopenharmony_ci */ 31308c2ecf20Sopenharmony_ci offset = buf_addr - imu->ubuf; 31318c2ecf20Sopenharmony_ci iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); 31328c2ecf20Sopenharmony_ci 31338c2ecf20Sopenharmony_ci if (offset) { 31348c2ecf20Sopenharmony_ci /* 31358c2ecf20Sopenharmony_ci * Don't use iov_iter_advance() here, as it's really slow for 31368c2ecf20Sopenharmony_ci * using the latter parts of a big fixed buffer - it iterates 31378c2ecf20Sopenharmony_ci * over each segment manually. We can cheat a bit here, because 31388c2ecf20Sopenharmony_ci * we know that: 31398c2ecf20Sopenharmony_ci * 31408c2ecf20Sopenharmony_ci * 1) it's a BVEC iter, we set it up 31418c2ecf20Sopenharmony_ci * 2) all bvecs are PAGE_SIZE in size, except potentially the 31428c2ecf20Sopenharmony_ci * first and last bvec 31438c2ecf20Sopenharmony_ci * 31448c2ecf20Sopenharmony_ci * So just find our index, and adjust the iterator afterwards. 31458c2ecf20Sopenharmony_ci * If the offset is within the first bvec (or the whole first 31468c2ecf20Sopenharmony_ci * bvec, just use iov_iter_advance(). This makes it easier 31478c2ecf20Sopenharmony_ci * since we can just skip the first segment, which may not 31488c2ecf20Sopenharmony_ci * be PAGE_SIZE aligned. 31498c2ecf20Sopenharmony_ci */ 31508c2ecf20Sopenharmony_ci const struct bio_vec *bvec = imu->bvec; 31518c2ecf20Sopenharmony_ci 31528c2ecf20Sopenharmony_ci if (offset < bvec->bv_len) { 31538c2ecf20Sopenharmony_ci iov_iter_advance(iter, offset); 31548c2ecf20Sopenharmony_ci } else { 31558c2ecf20Sopenharmony_ci unsigned long seg_skip; 31568c2ecf20Sopenharmony_ci 31578c2ecf20Sopenharmony_ci /* skip first vec */ 31588c2ecf20Sopenharmony_ci offset -= bvec->bv_len; 31598c2ecf20Sopenharmony_ci seg_skip = 1 + (offset >> PAGE_SHIFT); 31608c2ecf20Sopenharmony_ci 31618c2ecf20Sopenharmony_ci iter->bvec = bvec + seg_skip; 31628c2ecf20Sopenharmony_ci iter->nr_segs -= seg_skip; 31638c2ecf20Sopenharmony_ci iter->count -= bvec->bv_len + offset; 31648c2ecf20Sopenharmony_ci iter->iov_offset = offset & ~PAGE_MASK; 31658c2ecf20Sopenharmony_ci } 31668c2ecf20Sopenharmony_ci } 31678c2ecf20Sopenharmony_ci 31688c2ecf20Sopenharmony_ci return 0; 31698c2ecf20Sopenharmony_ci} 31708c2ecf20Sopenharmony_ci 31718c2ecf20Sopenharmony_cistatic int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) 31728c2ecf20Sopenharmony_ci{ 31738c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!req->imu)) 31748c2ecf20Sopenharmony_ci return -EFAULT; 31758c2ecf20Sopenharmony_ci return __io_import_fixed(req, rw, iter, req->imu); 31768c2ecf20Sopenharmony_ci} 31778c2ecf20Sopenharmony_ci 31788c2ecf20Sopenharmony_cistatic void io_ring_submit_unlock(struct io_ring_ctx *ctx, bool needs_lock) 31798c2ecf20Sopenharmony_ci{ 31808c2ecf20Sopenharmony_ci if (needs_lock) 31818c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 31828c2ecf20Sopenharmony_ci} 31838c2ecf20Sopenharmony_ci 31848c2ecf20Sopenharmony_cistatic void io_ring_submit_lock(struct io_ring_ctx *ctx, bool needs_lock) 31858c2ecf20Sopenharmony_ci{ 31868c2ecf20Sopenharmony_ci /* 31878c2ecf20Sopenharmony_ci * "Normal" inline submissions always hold the uring_lock, since we 31888c2ecf20Sopenharmony_ci * grab it from the system call. Same is true for the SQPOLL offload. 31898c2ecf20Sopenharmony_ci * The only exception is when we've detached the request and issue it 31908c2ecf20Sopenharmony_ci * from an async worker thread, grab the lock for that case. 31918c2ecf20Sopenharmony_ci */ 31928c2ecf20Sopenharmony_ci if (needs_lock) 31938c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 31948c2ecf20Sopenharmony_ci} 31958c2ecf20Sopenharmony_ci 31968c2ecf20Sopenharmony_cistatic struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len, 31978c2ecf20Sopenharmony_ci int bgid, struct io_buffer *kbuf, 31988c2ecf20Sopenharmony_ci bool needs_lock) 31998c2ecf20Sopenharmony_ci{ 32008c2ecf20Sopenharmony_ci struct io_buffer *head; 32018c2ecf20Sopenharmony_ci 32028c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECTED) 32038c2ecf20Sopenharmony_ci return kbuf; 32048c2ecf20Sopenharmony_ci 32058c2ecf20Sopenharmony_ci io_ring_submit_lock(req->ctx, needs_lock); 32068c2ecf20Sopenharmony_ci 32078c2ecf20Sopenharmony_ci lockdep_assert_held(&req->ctx->uring_lock); 32088c2ecf20Sopenharmony_ci 32098c2ecf20Sopenharmony_ci head = xa_load(&req->ctx->io_buffers, bgid); 32108c2ecf20Sopenharmony_ci if (head) { 32118c2ecf20Sopenharmony_ci if (!list_empty(&head->list)) { 32128c2ecf20Sopenharmony_ci kbuf = list_last_entry(&head->list, struct io_buffer, 32138c2ecf20Sopenharmony_ci list); 32148c2ecf20Sopenharmony_ci list_del(&kbuf->list); 32158c2ecf20Sopenharmony_ci } else { 32168c2ecf20Sopenharmony_ci kbuf = head; 32178c2ecf20Sopenharmony_ci xa_erase(&req->ctx->io_buffers, bgid); 32188c2ecf20Sopenharmony_ci } 32198c2ecf20Sopenharmony_ci if (*len > kbuf->len) 32208c2ecf20Sopenharmony_ci *len = kbuf->len; 32218c2ecf20Sopenharmony_ci } else { 32228c2ecf20Sopenharmony_ci kbuf = ERR_PTR(-ENOBUFS); 32238c2ecf20Sopenharmony_ci } 32248c2ecf20Sopenharmony_ci 32258c2ecf20Sopenharmony_ci io_ring_submit_unlock(req->ctx, needs_lock); 32268c2ecf20Sopenharmony_ci 32278c2ecf20Sopenharmony_ci return kbuf; 32288c2ecf20Sopenharmony_ci} 32298c2ecf20Sopenharmony_ci 32308c2ecf20Sopenharmony_cistatic void __user *io_rw_buffer_select(struct io_kiocb *req, size_t *len, 32318c2ecf20Sopenharmony_ci bool needs_lock) 32328c2ecf20Sopenharmony_ci{ 32338c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 32348c2ecf20Sopenharmony_ci u16 bgid; 32358c2ecf20Sopenharmony_ci 32368c2ecf20Sopenharmony_ci kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; 32378c2ecf20Sopenharmony_ci bgid = req->buf_index; 32388c2ecf20Sopenharmony_ci kbuf = io_buffer_select(req, len, bgid, kbuf, needs_lock); 32398c2ecf20Sopenharmony_ci if (IS_ERR(kbuf)) 32408c2ecf20Sopenharmony_ci return kbuf; 32418c2ecf20Sopenharmony_ci req->rw.addr = (u64) (unsigned long) kbuf; 32428c2ecf20Sopenharmony_ci req->flags |= REQ_F_BUFFER_SELECTED; 32438c2ecf20Sopenharmony_ci return u64_to_user_ptr(kbuf->addr); 32448c2ecf20Sopenharmony_ci} 32458c2ecf20Sopenharmony_ci 32468c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 32478c2ecf20Sopenharmony_cistatic ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov, 32488c2ecf20Sopenharmony_ci bool needs_lock) 32498c2ecf20Sopenharmony_ci{ 32508c2ecf20Sopenharmony_ci struct compat_iovec __user *uiov; 32518c2ecf20Sopenharmony_ci compat_ssize_t clen; 32528c2ecf20Sopenharmony_ci void __user *buf; 32538c2ecf20Sopenharmony_ci ssize_t len; 32548c2ecf20Sopenharmony_ci 32558c2ecf20Sopenharmony_ci uiov = u64_to_user_ptr(req->rw.addr); 32568c2ecf20Sopenharmony_ci if (!access_ok(uiov, sizeof(*uiov))) 32578c2ecf20Sopenharmony_ci return -EFAULT; 32588c2ecf20Sopenharmony_ci if (__get_user(clen, &uiov->iov_len)) 32598c2ecf20Sopenharmony_ci return -EFAULT; 32608c2ecf20Sopenharmony_ci if (clen < 0) 32618c2ecf20Sopenharmony_ci return -EINVAL; 32628c2ecf20Sopenharmony_ci 32638c2ecf20Sopenharmony_ci len = clen; 32648c2ecf20Sopenharmony_ci buf = io_rw_buffer_select(req, &len, needs_lock); 32658c2ecf20Sopenharmony_ci if (IS_ERR(buf)) 32668c2ecf20Sopenharmony_ci return PTR_ERR(buf); 32678c2ecf20Sopenharmony_ci iov[0].iov_base = buf; 32688c2ecf20Sopenharmony_ci iov[0].iov_len = (compat_size_t) len; 32698c2ecf20Sopenharmony_ci return 0; 32708c2ecf20Sopenharmony_ci} 32718c2ecf20Sopenharmony_ci#endif 32728c2ecf20Sopenharmony_ci 32738c2ecf20Sopenharmony_cistatic ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, 32748c2ecf20Sopenharmony_ci bool needs_lock) 32758c2ecf20Sopenharmony_ci{ 32768c2ecf20Sopenharmony_ci struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr); 32778c2ecf20Sopenharmony_ci void __user *buf; 32788c2ecf20Sopenharmony_ci ssize_t len; 32798c2ecf20Sopenharmony_ci 32808c2ecf20Sopenharmony_ci if (copy_from_user(iov, uiov, sizeof(*uiov))) 32818c2ecf20Sopenharmony_ci return -EFAULT; 32828c2ecf20Sopenharmony_ci 32838c2ecf20Sopenharmony_ci len = iov[0].iov_len; 32848c2ecf20Sopenharmony_ci if (len < 0) 32858c2ecf20Sopenharmony_ci return -EINVAL; 32868c2ecf20Sopenharmony_ci buf = io_rw_buffer_select(req, &len, needs_lock); 32878c2ecf20Sopenharmony_ci if (IS_ERR(buf)) 32888c2ecf20Sopenharmony_ci return PTR_ERR(buf); 32898c2ecf20Sopenharmony_ci iov[0].iov_base = buf; 32908c2ecf20Sopenharmony_ci iov[0].iov_len = len; 32918c2ecf20Sopenharmony_ci return 0; 32928c2ecf20Sopenharmony_ci} 32938c2ecf20Sopenharmony_ci 32948c2ecf20Sopenharmony_cistatic ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, 32958c2ecf20Sopenharmony_ci bool needs_lock) 32968c2ecf20Sopenharmony_ci{ 32978c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECTED) { 32988c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 32998c2ecf20Sopenharmony_ci 33008c2ecf20Sopenharmony_ci kbuf = (struct io_buffer *) (unsigned long) req->rw.addr; 33018c2ecf20Sopenharmony_ci iov[0].iov_base = u64_to_user_ptr(kbuf->addr); 33028c2ecf20Sopenharmony_ci iov[0].iov_len = kbuf->len; 33038c2ecf20Sopenharmony_ci return 0; 33048c2ecf20Sopenharmony_ci } 33058c2ecf20Sopenharmony_ci if (req->rw.len != 1) 33068c2ecf20Sopenharmony_ci return -EINVAL; 33078c2ecf20Sopenharmony_ci 33088c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 33098c2ecf20Sopenharmony_ci if (req->ctx->compat) 33108c2ecf20Sopenharmony_ci return io_compat_import(req, iov, needs_lock); 33118c2ecf20Sopenharmony_ci#endif 33128c2ecf20Sopenharmony_ci 33138c2ecf20Sopenharmony_ci return __io_iov_buffer_select(req, iov, needs_lock); 33148c2ecf20Sopenharmony_ci} 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_cistatic int io_import_iovec(int rw, struct io_kiocb *req, struct iovec **iovec, 33178c2ecf20Sopenharmony_ci struct iov_iter *iter, bool needs_lock) 33188c2ecf20Sopenharmony_ci{ 33198c2ecf20Sopenharmony_ci void __user *buf = u64_to_user_ptr(req->rw.addr); 33208c2ecf20Sopenharmony_ci size_t sqe_len = req->rw.len; 33218c2ecf20Sopenharmony_ci u8 opcode = req->opcode; 33228c2ecf20Sopenharmony_ci ssize_t ret; 33238c2ecf20Sopenharmony_ci 33248c2ecf20Sopenharmony_ci if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { 33258c2ecf20Sopenharmony_ci *iovec = NULL; 33268c2ecf20Sopenharmony_ci return io_import_fixed(req, rw, iter); 33278c2ecf20Sopenharmony_ci } 33288c2ecf20Sopenharmony_ci 33298c2ecf20Sopenharmony_ci /* buffer index only valid with fixed read/write, or buffer select */ 33308c2ecf20Sopenharmony_ci if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) 33318c2ecf20Sopenharmony_ci return -EINVAL; 33328c2ecf20Sopenharmony_ci 33338c2ecf20Sopenharmony_ci if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { 33348c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECT) { 33358c2ecf20Sopenharmony_ci buf = io_rw_buffer_select(req, &sqe_len, needs_lock); 33368c2ecf20Sopenharmony_ci if (IS_ERR(buf)) 33378c2ecf20Sopenharmony_ci return PTR_ERR(buf); 33388c2ecf20Sopenharmony_ci req->rw.len = sqe_len; 33398c2ecf20Sopenharmony_ci } 33408c2ecf20Sopenharmony_ci 33418c2ecf20Sopenharmony_ci ret = import_single_range(rw, buf, sqe_len, *iovec, iter); 33428c2ecf20Sopenharmony_ci *iovec = NULL; 33438c2ecf20Sopenharmony_ci return ret; 33448c2ecf20Sopenharmony_ci } 33458c2ecf20Sopenharmony_ci 33468c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECT) { 33478c2ecf20Sopenharmony_ci ret = io_iov_buffer_select(req, *iovec, needs_lock); 33488c2ecf20Sopenharmony_ci if (!ret) 33498c2ecf20Sopenharmony_ci iov_iter_init(iter, rw, *iovec, 1, (*iovec)->iov_len); 33508c2ecf20Sopenharmony_ci *iovec = NULL; 33518c2ecf20Sopenharmony_ci return ret; 33528c2ecf20Sopenharmony_ci } 33538c2ecf20Sopenharmony_ci 33548c2ecf20Sopenharmony_ci return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter, 33558c2ecf20Sopenharmony_ci req->ctx->compat); 33568c2ecf20Sopenharmony_ci} 33578c2ecf20Sopenharmony_ci 33588c2ecf20Sopenharmony_cistatic inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) 33598c2ecf20Sopenharmony_ci{ 33608c2ecf20Sopenharmony_ci return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; 33618c2ecf20Sopenharmony_ci} 33628c2ecf20Sopenharmony_ci 33638c2ecf20Sopenharmony_ci/* 33648c2ecf20Sopenharmony_ci * For files that don't have ->read_iter() and ->write_iter(), handle them 33658c2ecf20Sopenharmony_ci * by looping over ->read() or ->write() manually. 33668c2ecf20Sopenharmony_ci */ 33678c2ecf20Sopenharmony_cistatic ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) 33688c2ecf20Sopenharmony_ci{ 33698c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 33708c2ecf20Sopenharmony_ci struct file *file = req->file; 33718c2ecf20Sopenharmony_ci ssize_t ret = 0; 33728c2ecf20Sopenharmony_ci loff_t *ppos; 33738c2ecf20Sopenharmony_ci 33748c2ecf20Sopenharmony_ci /* 33758c2ecf20Sopenharmony_ci * Don't support polled IO through this interface, and we can't 33768c2ecf20Sopenharmony_ci * support non-blocking either. For the latter, this just causes 33778c2ecf20Sopenharmony_ci * the kiocb to be handled from an async context. 33788c2ecf20Sopenharmony_ci */ 33798c2ecf20Sopenharmony_ci if (kiocb->ki_flags & IOCB_HIPRI) 33808c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 33818c2ecf20Sopenharmony_ci if (kiocb->ki_flags & IOCB_NOWAIT) 33828c2ecf20Sopenharmony_ci return -EAGAIN; 33838c2ecf20Sopenharmony_ci 33848c2ecf20Sopenharmony_ci ppos = io_kiocb_ppos(kiocb); 33858c2ecf20Sopenharmony_ci 33868c2ecf20Sopenharmony_ci while (iov_iter_count(iter)) { 33878c2ecf20Sopenharmony_ci struct iovec iovec; 33888c2ecf20Sopenharmony_ci ssize_t nr; 33898c2ecf20Sopenharmony_ci 33908c2ecf20Sopenharmony_ci if (!iov_iter_is_bvec(iter)) { 33918c2ecf20Sopenharmony_ci iovec = iov_iter_iovec(iter); 33928c2ecf20Sopenharmony_ci } else { 33938c2ecf20Sopenharmony_ci iovec.iov_base = u64_to_user_ptr(req->rw.addr); 33948c2ecf20Sopenharmony_ci iovec.iov_len = req->rw.len; 33958c2ecf20Sopenharmony_ci } 33968c2ecf20Sopenharmony_ci 33978c2ecf20Sopenharmony_ci if (rw == READ) { 33988c2ecf20Sopenharmony_ci nr = file->f_op->read(file, iovec.iov_base, 33998c2ecf20Sopenharmony_ci iovec.iov_len, ppos); 34008c2ecf20Sopenharmony_ci } else { 34018c2ecf20Sopenharmony_ci nr = file->f_op->write(file, iovec.iov_base, 34028c2ecf20Sopenharmony_ci iovec.iov_len, ppos); 34038c2ecf20Sopenharmony_ci } 34048c2ecf20Sopenharmony_ci 34058c2ecf20Sopenharmony_ci if (nr < 0) { 34068c2ecf20Sopenharmony_ci if (!ret) 34078c2ecf20Sopenharmony_ci ret = nr; 34088c2ecf20Sopenharmony_ci break; 34098c2ecf20Sopenharmony_ci } 34108c2ecf20Sopenharmony_ci ret += nr; 34118c2ecf20Sopenharmony_ci if (!iov_iter_is_bvec(iter)) { 34128c2ecf20Sopenharmony_ci iov_iter_advance(iter, nr); 34138c2ecf20Sopenharmony_ci } else { 34148c2ecf20Sopenharmony_ci req->rw.addr += nr; 34158c2ecf20Sopenharmony_ci req->rw.len -= nr; 34168c2ecf20Sopenharmony_ci if (!req->rw.len) 34178c2ecf20Sopenharmony_ci break; 34188c2ecf20Sopenharmony_ci } 34198c2ecf20Sopenharmony_ci if (nr != iovec.iov_len) 34208c2ecf20Sopenharmony_ci break; 34218c2ecf20Sopenharmony_ci } 34228c2ecf20Sopenharmony_ci 34238c2ecf20Sopenharmony_ci return ret; 34248c2ecf20Sopenharmony_ci} 34258c2ecf20Sopenharmony_ci 34268c2ecf20Sopenharmony_cistatic void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, 34278c2ecf20Sopenharmony_ci const struct iovec *fast_iov, struct iov_iter *iter) 34288c2ecf20Sopenharmony_ci{ 34298c2ecf20Sopenharmony_ci struct io_async_rw *rw = req->async_data; 34308c2ecf20Sopenharmony_ci 34318c2ecf20Sopenharmony_ci memcpy(&rw->iter, iter, sizeof(*iter)); 34328c2ecf20Sopenharmony_ci rw->free_iovec = iovec; 34338c2ecf20Sopenharmony_ci rw->bytes_done = 0; 34348c2ecf20Sopenharmony_ci /* can only be fixed buffers, no need to do anything */ 34358c2ecf20Sopenharmony_ci if (iov_iter_is_bvec(iter)) 34368c2ecf20Sopenharmony_ci return; 34378c2ecf20Sopenharmony_ci if (!iovec) { 34388c2ecf20Sopenharmony_ci unsigned iov_off = 0; 34398c2ecf20Sopenharmony_ci 34408c2ecf20Sopenharmony_ci rw->iter.iov = rw->fast_iov; 34418c2ecf20Sopenharmony_ci if (iter->iov != fast_iov) { 34428c2ecf20Sopenharmony_ci iov_off = iter->iov - fast_iov; 34438c2ecf20Sopenharmony_ci rw->iter.iov += iov_off; 34448c2ecf20Sopenharmony_ci } 34458c2ecf20Sopenharmony_ci if (rw->fast_iov != fast_iov) 34468c2ecf20Sopenharmony_ci memcpy(rw->fast_iov + iov_off, fast_iov + iov_off, 34478c2ecf20Sopenharmony_ci sizeof(struct iovec) * iter->nr_segs); 34488c2ecf20Sopenharmony_ci } else { 34498c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 34508c2ecf20Sopenharmony_ci } 34518c2ecf20Sopenharmony_ci} 34528c2ecf20Sopenharmony_ci 34538c2ecf20Sopenharmony_cistatic inline int io_alloc_async_data(struct io_kiocb *req) 34548c2ecf20Sopenharmony_ci{ 34558c2ecf20Sopenharmony_ci WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); 34568c2ecf20Sopenharmony_ci req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); 34578c2ecf20Sopenharmony_ci return req->async_data == NULL; 34588c2ecf20Sopenharmony_ci} 34598c2ecf20Sopenharmony_ci 34608c2ecf20Sopenharmony_cistatic int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, 34618c2ecf20Sopenharmony_ci const struct iovec *fast_iov, 34628c2ecf20Sopenharmony_ci struct iov_iter *iter, bool force) 34638c2ecf20Sopenharmony_ci{ 34648c2ecf20Sopenharmony_ci if (!force && !io_op_defs[req->opcode].needs_async_setup) 34658c2ecf20Sopenharmony_ci return 0; 34668c2ecf20Sopenharmony_ci if (!req->async_data) { 34678c2ecf20Sopenharmony_ci struct io_async_rw *iorw; 34688c2ecf20Sopenharmony_ci 34698c2ecf20Sopenharmony_ci if (io_alloc_async_data(req)) { 34708c2ecf20Sopenharmony_ci kfree(iovec); 34718c2ecf20Sopenharmony_ci return -ENOMEM; 34728c2ecf20Sopenharmony_ci } 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci io_req_map_rw(req, iovec, fast_iov, iter); 34758c2ecf20Sopenharmony_ci iorw = req->async_data; 34768c2ecf20Sopenharmony_ci /* we've copied and mapped the iter, ensure state is saved */ 34778c2ecf20Sopenharmony_ci iov_iter_save_state(&iorw->iter, &iorw->iter_state); 34788c2ecf20Sopenharmony_ci } 34798c2ecf20Sopenharmony_ci return 0; 34808c2ecf20Sopenharmony_ci} 34818c2ecf20Sopenharmony_ci 34828c2ecf20Sopenharmony_cistatic inline int io_rw_prep_async(struct io_kiocb *req, int rw) 34838c2ecf20Sopenharmony_ci{ 34848c2ecf20Sopenharmony_ci struct io_async_rw *iorw = req->async_data; 34858c2ecf20Sopenharmony_ci struct iovec *iov = iorw->fast_iov; 34868c2ecf20Sopenharmony_ci int ret; 34878c2ecf20Sopenharmony_ci 34888c2ecf20Sopenharmony_ci iorw->bytes_done = 0; 34898c2ecf20Sopenharmony_ci iorw->free_iovec = NULL; 34908c2ecf20Sopenharmony_ci 34918c2ecf20Sopenharmony_ci ret = io_import_iovec(rw, req, &iov, &iorw->iter, false); 34928c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) 34938c2ecf20Sopenharmony_ci return ret; 34948c2ecf20Sopenharmony_ci 34958c2ecf20Sopenharmony_ci if (iov) { 34968c2ecf20Sopenharmony_ci iorw->free_iovec = iov; 34978c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 34988c2ecf20Sopenharmony_ci } 34998c2ecf20Sopenharmony_ci iov_iter_save_state(&iorw->iter, &iorw->iter_state); 35008c2ecf20Sopenharmony_ci return 0; 35018c2ecf20Sopenharmony_ci} 35028c2ecf20Sopenharmony_ci 35038c2ecf20Sopenharmony_cistatic int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 35048c2ecf20Sopenharmony_ci{ 35058c2ecf20Sopenharmony_ci if (unlikely(!(req->file->f_mode & FMODE_READ))) 35068c2ecf20Sopenharmony_ci return -EBADF; 35078c2ecf20Sopenharmony_ci return io_prep_rw(req, sqe, READ); 35088c2ecf20Sopenharmony_ci} 35098c2ecf20Sopenharmony_ci 35108c2ecf20Sopenharmony_ci/* 35118c2ecf20Sopenharmony_ci * This is our waitqueue callback handler, registered through lock_page_async() 35128c2ecf20Sopenharmony_ci * when we initially tried to do the IO with the iocb armed our waitqueue. 35138c2ecf20Sopenharmony_ci * This gets called when the page is unlocked, and we generally expect that to 35148c2ecf20Sopenharmony_ci * happen when the page IO is completed and the page is now uptodate. This will 35158c2ecf20Sopenharmony_ci * queue a task_work based retry of the operation, attempting to copy the data 35168c2ecf20Sopenharmony_ci * again. If the latter fails because the page was NOT uptodate, then we will 35178c2ecf20Sopenharmony_ci * do a thread based blocking retry of the operation. That's the unexpected 35188c2ecf20Sopenharmony_ci * slow path. 35198c2ecf20Sopenharmony_ci */ 35208c2ecf20Sopenharmony_cistatic int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, 35218c2ecf20Sopenharmony_ci int sync, void *arg) 35228c2ecf20Sopenharmony_ci{ 35238c2ecf20Sopenharmony_ci struct wait_page_queue *wpq; 35248c2ecf20Sopenharmony_ci struct io_kiocb *req = wait->private; 35258c2ecf20Sopenharmony_ci struct wait_page_key *key = arg; 35268c2ecf20Sopenharmony_ci 35278c2ecf20Sopenharmony_ci wpq = container_of(wait, struct wait_page_queue, wait); 35288c2ecf20Sopenharmony_ci 35298c2ecf20Sopenharmony_ci if (!wake_page_match(wpq, key)) 35308c2ecf20Sopenharmony_ci return 0; 35318c2ecf20Sopenharmony_ci 35328c2ecf20Sopenharmony_ci req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; 35338c2ecf20Sopenharmony_ci list_del_init(&wait->entry); 35348c2ecf20Sopenharmony_ci io_req_task_queue(req); 35358c2ecf20Sopenharmony_ci return 1; 35368c2ecf20Sopenharmony_ci} 35378c2ecf20Sopenharmony_ci 35388c2ecf20Sopenharmony_ci/* 35398c2ecf20Sopenharmony_ci * This controls whether a given IO request should be armed for async page 35408c2ecf20Sopenharmony_ci * based retry. If we return false here, the request is handed to the async 35418c2ecf20Sopenharmony_ci * worker threads for retry. If we're doing buffered reads on a regular file, 35428c2ecf20Sopenharmony_ci * we prepare a private wait_page_queue entry and retry the operation. This 35438c2ecf20Sopenharmony_ci * will either succeed because the page is now uptodate and unlocked, or it 35448c2ecf20Sopenharmony_ci * will register a callback when the page is unlocked at IO completion. Through 35458c2ecf20Sopenharmony_ci * that callback, io_uring uses task_work to setup a retry of the operation. 35468c2ecf20Sopenharmony_ci * That retry will attempt the buffered read again. The retry will generally 35478c2ecf20Sopenharmony_ci * succeed, or in rare cases where it fails, we then fall back to using the 35488c2ecf20Sopenharmony_ci * async worker threads for a blocking retry. 35498c2ecf20Sopenharmony_ci */ 35508c2ecf20Sopenharmony_cistatic bool io_rw_should_retry(struct io_kiocb *req) 35518c2ecf20Sopenharmony_ci{ 35528c2ecf20Sopenharmony_ci struct io_async_rw *rw = req->async_data; 35538c2ecf20Sopenharmony_ci struct wait_page_queue *wait = &rw->wpq; 35548c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 35558c2ecf20Sopenharmony_ci 35568c2ecf20Sopenharmony_ci /* never retry for NOWAIT, we just complete with -EAGAIN */ 35578c2ecf20Sopenharmony_ci if (req->flags & REQ_F_NOWAIT) 35588c2ecf20Sopenharmony_ci return false; 35598c2ecf20Sopenharmony_ci 35608c2ecf20Sopenharmony_ci /* Only for buffered IO */ 35618c2ecf20Sopenharmony_ci if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) 35628c2ecf20Sopenharmony_ci return false; 35638c2ecf20Sopenharmony_ci 35648c2ecf20Sopenharmony_ci /* 35658c2ecf20Sopenharmony_ci * just use poll if we can, and don't attempt if the fs doesn't 35668c2ecf20Sopenharmony_ci * support callback based unlocks 35678c2ecf20Sopenharmony_ci */ 35688c2ecf20Sopenharmony_ci if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) 35698c2ecf20Sopenharmony_ci return false; 35708c2ecf20Sopenharmony_ci 35718c2ecf20Sopenharmony_ci wait->wait.func = io_async_buf_func; 35728c2ecf20Sopenharmony_ci wait->wait.private = req; 35738c2ecf20Sopenharmony_ci wait->wait.flags = 0; 35748c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&wait->wait.entry); 35758c2ecf20Sopenharmony_ci kiocb->ki_flags |= IOCB_WAITQ; 35768c2ecf20Sopenharmony_ci kiocb->ki_flags &= ~IOCB_NOWAIT; 35778c2ecf20Sopenharmony_ci kiocb->ki_waitq = wait; 35788c2ecf20Sopenharmony_ci return true; 35798c2ecf20Sopenharmony_ci} 35808c2ecf20Sopenharmony_ci 35818c2ecf20Sopenharmony_cistatic inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) 35828c2ecf20Sopenharmony_ci{ 35838c2ecf20Sopenharmony_ci if (req->file->f_op->read_iter) 35848c2ecf20Sopenharmony_ci return call_read_iter(req->file, &req->rw.kiocb, iter); 35858c2ecf20Sopenharmony_ci else if (req->file->f_op->read) 35868c2ecf20Sopenharmony_ci return loop_rw_iter(READ, req, iter); 35878c2ecf20Sopenharmony_ci else 35888c2ecf20Sopenharmony_ci return -EINVAL; 35898c2ecf20Sopenharmony_ci} 35908c2ecf20Sopenharmony_ci 35918c2ecf20Sopenharmony_cistatic bool need_read_all(struct io_kiocb *req) 35928c2ecf20Sopenharmony_ci{ 35938c2ecf20Sopenharmony_ci return req->flags & REQ_F_ISREG || 35948c2ecf20Sopenharmony_ci S_ISBLK(file_inode(req->file)->i_mode); 35958c2ecf20Sopenharmony_ci} 35968c2ecf20Sopenharmony_ci 35978c2ecf20Sopenharmony_cistatic int io_read(struct io_kiocb *req, unsigned int issue_flags) 35988c2ecf20Sopenharmony_ci{ 35998c2ecf20Sopenharmony_ci struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 36008c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 36018c2ecf20Sopenharmony_ci struct iov_iter __iter, *iter = &__iter; 36028c2ecf20Sopenharmony_ci struct io_async_rw *rw = req->async_data; 36038c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 36048c2ecf20Sopenharmony_ci struct iov_iter_state __state, *state; 36058c2ecf20Sopenharmony_ci ssize_t ret, ret2; 36068c2ecf20Sopenharmony_ci loff_t *ppos; 36078c2ecf20Sopenharmony_ci 36088c2ecf20Sopenharmony_ci if (rw) { 36098c2ecf20Sopenharmony_ci iter = &rw->iter; 36108c2ecf20Sopenharmony_ci state = &rw->iter_state; 36118c2ecf20Sopenharmony_ci /* 36128c2ecf20Sopenharmony_ci * We come here from an earlier attempt, restore our state to 36138c2ecf20Sopenharmony_ci * match in case it doesn't. It's cheap enough that we don't 36148c2ecf20Sopenharmony_ci * need to make this conditional. 36158c2ecf20Sopenharmony_ci */ 36168c2ecf20Sopenharmony_ci iov_iter_restore(iter, state); 36178c2ecf20Sopenharmony_ci iovec = NULL; 36188c2ecf20Sopenharmony_ci } else { 36198c2ecf20Sopenharmony_ci ret = io_import_iovec(READ, req, &iovec, iter, !force_nonblock); 36208c2ecf20Sopenharmony_ci if (ret < 0) 36218c2ecf20Sopenharmony_ci return ret; 36228c2ecf20Sopenharmony_ci state = &__state; 36238c2ecf20Sopenharmony_ci iov_iter_save_state(iter, state); 36248c2ecf20Sopenharmony_ci } 36258c2ecf20Sopenharmony_ci req->result = iov_iter_count(iter); 36268c2ecf20Sopenharmony_ci 36278c2ecf20Sopenharmony_ci /* Ensure we clear previously set non-block flag */ 36288c2ecf20Sopenharmony_ci if (!force_nonblock) 36298c2ecf20Sopenharmony_ci kiocb->ki_flags &= ~IOCB_NOWAIT; 36308c2ecf20Sopenharmony_ci else 36318c2ecf20Sopenharmony_ci kiocb->ki_flags |= IOCB_NOWAIT; 36328c2ecf20Sopenharmony_ci 36338c2ecf20Sopenharmony_ci /* If the file doesn't support async, just async punt */ 36348c2ecf20Sopenharmony_ci if (force_nonblock && !io_file_supports_nowait(req, READ)) { 36358c2ecf20Sopenharmony_ci ret = io_setup_async_rw(req, iovec, inline_vecs, iter, true); 36368c2ecf20Sopenharmony_ci return ret ?: -EAGAIN; 36378c2ecf20Sopenharmony_ci } 36388c2ecf20Sopenharmony_ci 36398c2ecf20Sopenharmony_ci ppos = io_kiocb_update_pos(req); 36408c2ecf20Sopenharmony_ci 36418c2ecf20Sopenharmony_ci ret = rw_verify_area(READ, req->file, ppos, req->result); 36428c2ecf20Sopenharmony_ci if (unlikely(ret)) { 36438c2ecf20Sopenharmony_ci kfree(iovec); 36448c2ecf20Sopenharmony_ci return ret; 36458c2ecf20Sopenharmony_ci } 36468c2ecf20Sopenharmony_ci 36478c2ecf20Sopenharmony_ci ret = io_iter_do_read(req, iter); 36488c2ecf20Sopenharmony_ci 36498c2ecf20Sopenharmony_ci if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { 36508c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_REISSUE; 36518c2ecf20Sopenharmony_ci /* IOPOLL retry should happen for io-wq threads */ 36528c2ecf20Sopenharmony_ci if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) 36538c2ecf20Sopenharmony_ci goto done; 36548c2ecf20Sopenharmony_ci /* no retry on NONBLOCK nor RWF_NOWAIT */ 36558c2ecf20Sopenharmony_ci if (req->flags & REQ_F_NOWAIT) 36568c2ecf20Sopenharmony_ci goto done; 36578c2ecf20Sopenharmony_ci ret = 0; 36588c2ecf20Sopenharmony_ci } else if (ret == -EIOCBQUEUED) { 36598c2ecf20Sopenharmony_ci goto out_free; 36608c2ecf20Sopenharmony_ci } else if (ret <= 0 || ret == req->result || !force_nonblock || 36618c2ecf20Sopenharmony_ci (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { 36628c2ecf20Sopenharmony_ci /* read all, failed, already did sync or don't want to retry */ 36638c2ecf20Sopenharmony_ci goto done; 36648c2ecf20Sopenharmony_ci } 36658c2ecf20Sopenharmony_ci 36668c2ecf20Sopenharmony_ci /* 36678c2ecf20Sopenharmony_ci * Don't depend on the iter state matching what was consumed, or being 36688c2ecf20Sopenharmony_ci * untouched in case of error. Restore it and we'll advance it 36698c2ecf20Sopenharmony_ci * manually if we need to. 36708c2ecf20Sopenharmony_ci */ 36718c2ecf20Sopenharmony_ci iov_iter_restore(iter, state); 36728c2ecf20Sopenharmony_ci 36738c2ecf20Sopenharmony_ci ret2 = io_setup_async_rw(req, iovec, inline_vecs, iter, true); 36748c2ecf20Sopenharmony_ci if (ret2) 36758c2ecf20Sopenharmony_ci return ret2; 36768c2ecf20Sopenharmony_ci 36778c2ecf20Sopenharmony_ci iovec = NULL; 36788c2ecf20Sopenharmony_ci rw = req->async_data; 36798c2ecf20Sopenharmony_ci /* 36808c2ecf20Sopenharmony_ci * Now use our persistent iterator and state, if we aren't already. 36818c2ecf20Sopenharmony_ci * We've restored and mapped the iter to match. 36828c2ecf20Sopenharmony_ci */ 36838c2ecf20Sopenharmony_ci if (iter != &rw->iter) { 36848c2ecf20Sopenharmony_ci iter = &rw->iter; 36858c2ecf20Sopenharmony_ci state = &rw->iter_state; 36868c2ecf20Sopenharmony_ci } 36878c2ecf20Sopenharmony_ci 36888c2ecf20Sopenharmony_ci do { 36898c2ecf20Sopenharmony_ci /* 36908c2ecf20Sopenharmony_ci * We end up here because of a partial read, either from 36918c2ecf20Sopenharmony_ci * above or inside this loop. Advance the iter by the bytes 36928c2ecf20Sopenharmony_ci * that were consumed. 36938c2ecf20Sopenharmony_ci */ 36948c2ecf20Sopenharmony_ci iov_iter_advance(iter, ret); 36958c2ecf20Sopenharmony_ci if (!iov_iter_count(iter)) 36968c2ecf20Sopenharmony_ci break; 36978c2ecf20Sopenharmony_ci rw->bytes_done += ret; 36988c2ecf20Sopenharmony_ci iov_iter_save_state(iter, state); 36998c2ecf20Sopenharmony_ci 37008c2ecf20Sopenharmony_ci /* if we can retry, do so with the callbacks armed */ 37018c2ecf20Sopenharmony_ci if (!io_rw_should_retry(req)) { 37028c2ecf20Sopenharmony_ci kiocb->ki_flags &= ~IOCB_WAITQ; 37038c2ecf20Sopenharmony_ci return -EAGAIN; 37048c2ecf20Sopenharmony_ci } 37058c2ecf20Sopenharmony_ci 37068c2ecf20Sopenharmony_ci req->result = iov_iter_count(iter); 37078c2ecf20Sopenharmony_ci /* 37088c2ecf20Sopenharmony_ci * Now retry read with the IOCB_WAITQ parts set in the iocb. If 37098c2ecf20Sopenharmony_ci * we get -EIOCBQUEUED, then we'll get a notification when the 37108c2ecf20Sopenharmony_ci * desired page gets unlocked. We can also get a partial read 37118c2ecf20Sopenharmony_ci * here, and if we do, then just retry at the new offset. 37128c2ecf20Sopenharmony_ci */ 37138c2ecf20Sopenharmony_ci ret = io_iter_do_read(req, iter); 37148c2ecf20Sopenharmony_ci if (ret == -EIOCBQUEUED) 37158c2ecf20Sopenharmony_ci return 0; 37168c2ecf20Sopenharmony_ci /* we got some bytes, but not all. retry. */ 37178c2ecf20Sopenharmony_ci kiocb->ki_flags &= ~IOCB_WAITQ; 37188c2ecf20Sopenharmony_ci iov_iter_restore(iter, state); 37198c2ecf20Sopenharmony_ci } while (ret > 0); 37208c2ecf20Sopenharmony_cidone: 37218c2ecf20Sopenharmony_ci kiocb_done(kiocb, ret, issue_flags); 37228c2ecf20Sopenharmony_ciout_free: 37238c2ecf20Sopenharmony_ci /* it's faster to check here then delegate to kfree */ 37248c2ecf20Sopenharmony_ci if (iovec) 37258c2ecf20Sopenharmony_ci kfree(iovec); 37268c2ecf20Sopenharmony_ci return 0; 37278c2ecf20Sopenharmony_ci} 37288c2ecf20Sopenharmony_ci 37298c2ecf20Sopenharmony_cistatic int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 37308c2ecf20Sopenharmony_ci{ 37318c2ecf20Sopenharmony_ci if (unlikely(!(req->file->f_mode & FMODE_WRITE))) 37328c2ecf20Sopenharmony_ci return -EBADF; 37338c2ecf20Sopenharmony_ci return io_prep_rw(req, sqe, WRITE); 37348c2ecf20Sopenharmony_ci} 37358c2ecf20Sopenharmony_ci 37368c2ecf20Sopenharmony_cistatic int io_write(struct io_kiocb *req, unsigned int issue_flags) 37378c2ecf20Sopenharmony_ci{ 37388c2ecf20Sopenharmony_ci struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; 37398c2ecf20Sopenharmony_ci struct kiocb *kiocb = &req->rw.kiocb; 37408c2ecf20Sopenharmony_ci struct iov_iter __iter, *iter = &__iter; 37418c2ecf20Sopenharmony_ci struct io_async_rw *rw = req->async_data; 37428c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 37438c2ecf20Sopenharmony_ci struct iov_iter_state __state, *state; 37448c2ecf20Sopenharmony_ci ssize_t ret, ret2; 37458c2ecf20Sopenharmony_ci loff_t *ppos; 37468c2ecf20Sopenharmony_ci 37478c2ecf20Sopenharmony_ci if (rw) { 37488c2ecf20Sopenharmony_ci iter = &rw->iter; 37498c2ecf20Sopenharmony_ci state = &rw->iter_state; 37508c2ecf20Sopenharmony_ci iov_iter_restore(iter, state); 37518c2ecf20Sopenharmony_ci iovec = NULL; 37528c2ecf20Sopenharmony_ci } else { 37538c2ecf20Sopenharmony_ci ret = io_import_iovec(WRITE, req, &iovec, iter, !force_nonblock); 37548c2ecf20Sopenharmony_ci if (ret < 0) 37558c2ecf20Sopenharmony_ci return ret; 37568c2ecf20Sopenharmony_ci state = &__state; 37578c2ecf20Sopenharmony_ci iov_iter_save_state(iter, state); 37588c2ecf20Sopenharmony_ci } 37598c2ecf20Sopenharmony_ci req->result = iov_iter_count(iter); 37608c2ecf20Sopenharmony_ci 37618c2ecf20Sopenharmony_ci /* Ensure we clear previously set non-block flag */ 37628c2ecf20Sopenharmony_ci if (!force_nonblock) 37638c2ecf20Sopenharmony_ci kiocb->ki_flags &= ~IOCB_NOWAIT; 37648c2ecf20Sopenharmony_ci else 37658c2ecf20Sopenharmony_ci kiocb->ki_flags |= IOCB_NOWAIT; 37668c2ecf20Sopenharmony_ci 37678c2ecf20Sopenharmony_ci /* If the file doesn't support async, just async punt */ 37688c2ecf20Sopenharmony_ci if (force_nonblock && !io_file_supports_nowait(req, WRITE)) 37698c2ecf20Sopenharmony_ci goto copy_iov; 37708c2ecf20Sopenharmony_ci 37718c2ecf20Sopenharmony_ci /* file path doesn't support NOWAIT for non-direct_IO */ 37728c2ecf20Sopenharmony_ci if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && 37738c2ecf20Sopenharmony_ci (req->flags & REQ_F_ISREG)) 37748c2ecf20Sopenharmony_ci goto copy_iov; 37758c2ecf20Sopenharmony_ci 37768c2ecf20Sopenharmony_ci ppos = io_kiocb_update_pos(req); 37778c2ecf20Sopenharmony_ci 37788c2ecf20Sopenharmony_ci ret = rw_verify_area(WRITE, req->file, ppos, req->result); 37798c2ecf20Sopenharmony_ci if (unlikely(ret)) 37808c2ecf20Sopenharmony_ci goto out_free; 37818c2ecf20Sopenharmony_ci 37828c2ecf20Sopenharmony_ci /* 37838c2ecf20Sopenharmony_ci * Open-code file_start_write here to grab freeze protection, 37848c2ecf20Sopenharmony_ci * which will be released by another thread in 37858c2ecf20Sopenharmony_ci * io_complete_rw(). Fool lockdep by telling it the lock got 37868c2ecf20Sopenharmony_ci * released so that it doesn't complain about the held lock when 37878c2ecf20Sopenharmony_ci * we return to userspace. 37888c2ecf20Sopenharmony_ci */ 37898c2ecf20Sopenharmony_ci if (req->flags & REQ_F_ISREG) { 37908c2ecf20Sopenharmony_ci sb_start_write(file_inode(req->file)->i_sb); 37918c2ecf20Sopenharmony_ci __sb_writers_release(file_inode(req->file)->i_sb, 37928c2ecf20Sopenharmony_ci SB_FREEZE_WRITE); 37938c2ecf20Sopenharmony_ci } 37948c2ecf20Sopenharmony_ci kiocb->ki_flags |= IOCB_WRITE; 37958c2ecf20Sopenharmony_ci 37968c2ecf20Sopenharmony_ci if (req->file->f_op->write_iter) 37978c2ecf20Sopenharmony_ci ret2 = call_write_iter(req->file, kiocb, iter); 37988c2ecf20Sopenharmony_ci else if (req->file->f_op->write) 37998c2ecf20Sopenharmony_ci ret2 = loop_rw_iter(WRITE, req, iter); 38008c2ecf20Sopenharmony_ci else 38018c2ecf20Sopenharmony_ci ret2 = -EINVAL; 38028c2ecf20Sopenharmony_ci 38038c2ecf20Sopenharmony_ci if (req->flags & REQ_F_REISSUE) { 38048c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_REISSUE; 38058c2ecf20Sopenharmony_ci ret2 = -EAGAIN; 38068c2ecf20Sopenharmony_ci } 38078c2ecf20Sopenharmony_ci 38088c2ecf20Sopenharmony_ci /* 38098c2ecf20Sopenharmony_ci * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just 38108c2ecf20Sopenharmony_ci * retry them without IOCB_NOWAIT. 38118c2ecf20Sopenharmony_ci */ 38128c2ecf20Sopenharmony_ci if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) 38138c2ecf20Sopenharmony_ci ret2 = -EAGAIN; 38148c2ecf20Sopenharmony_ci /* no retry on NONBLOCK nor RWF_NOWAIT */ 38158c2ecf20Sopenharmony_ci if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) 38168c2ecf20Sopenharmony_ci goto done; 38178c2ecf20Sopenharmony_ci if (!force_nonblock || ret2 != -EAGAIN) { 38188c2ecf20Sopenharmony_ci /* IOPOLL retry should happen for io-wq threads */ 38198c2ecf20Sopenharmony_ci if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) 38208c2ecf20Sopenharmony_ci goto copy_iov; 38218c2ecf20Sopenharmony_cidone: 38228c2ecf20Sopenharmony_ci kiocb_done(kiocb, ret2, issue_flags); 38238c2ecf20Sopenharmony_ci } else { 38248c2ecf20Sopenharmony_cicopy_iov: 38258c2ecf20Sopenharmony_ci iov_iter_restore(iter, state); 38268c2ecf20Sopenharmony_ci ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false); 38278c2ecf20Sopenharmony_ci if (!ret) { 38288c2ecf20Sopenharmony_ci if (kiocb->ki_flags & IOCB_WRITE) 38298c2ecf20Sopenharmony_ci kiocb_end_write(req); 38308c2ecf20Sopenharmony_ci return -EAGAIN; 38318c2ecf20Sopenharmony_ci } 38328c2ecf20Sopenharmony_ci return ret; 38338c2ecf20Sopenharmony_ci } 38348c2ecf20Sopenharmony_ciout_free: 38358c2ecf20Sopenharmony_ci /* it's reportedly faster than delegating the null check to kfree() */ 38368c2ecf20Sopenharmony_ci if (iovec) 38378c2ecf20Sopenharmony_ci kfree(iovec); 38388c2ecf20Sopenharmony_ci return ret; 38398c2ecf20Sopenharmony_ci} 38408c2ecf20Sopenharmony_ci 38418c2ecf20Sopenharmony_cistatic int io_renameat_prep(struct io_kiocb *req, 38428c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 38438c2ecf20Sopenharmony_ci{ 38448c2ecf20Sopenharmony_ci struct io_rename *ren = &req->rename; 38458c2ecf20Sopenharmony_ci const char __user *oldf, *newf; 38468c2ecf20Sopenharmony_ci 38478c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 38488c2ecf20Sopenharmony_ci return -EINVAL; 38498c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) 38508c2ecf20Sopenharmony_ci return -EINVAL; 38518c2ecf20Sopenharmony_ci if (unlikely(req->flags & REQ_F_FIXED_FILE)) 38528c2ecf20Sopenharmony_ci return -EBADF; 38538c2ecf20Sopenharmony_ci 38548c2ecf20Sopenharmony_ci ren->old_dfd = READ_ONCE(sqe->fd); 38558c2ecf20Sopenharmony_ci oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); 38568c2ecf20Sopenharmony_ci newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 38578c2ecf20Sopenharmony_ci ren->new_dfd = READ_ONCE(sqe->len); 38588c2ecf20Sopenharmony_ci ren->flags = READ_ONCE(sqe->rename_flags); 38598c2ecf20Sopenharmony_ci 38608c2ecf20Sopenharmony_ci ren->oldpath = getname(oldf); 38618c2ecf20Sopenharmony_ci if (IS_ERR(ren->oldpath)) 38628c2ecf20Sopenharmony_ci return PTR_ERR(ren->oldpath); 38638c2ecf20Sopenharmony_ci 38648c2ecf20Sopenharmony_ci ren->newpath = getname(newf); 38658c2ecf20Sopenharmony_ci if (IS_ERR(ren->newpath)) { 38668c2ecf20Sopenharmony_ci putname(ren->oldpath); 38678c2ecf20Sopenharmony_ci return PTR_ERR(ren->newpath); 38688c2ecf20Sopenharmony_ci } 38698c2ecf20Sopenharmony_ci 38708c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 38718c2ecf20Sopenharmony_ci return 0; 38728c2ecf20Sopenharmony_ci} 38738c2ecf20Sopenharmony_ci 38748c2ecf20Sopenharmony_cistatic int io_renameat(struct io_kiocb *req, unsigned int issue_flags) 38758c2ecf20Sopenharmony_ci{ 38768c2ecf20Sopenharmony_ci struct io_rename *ren = &req->rename; 38778c2ecf20Sopenharmony_ci int ret; 38788c2ecf20Sopenharmony_ci 38798c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 38808c2ecf20Sopenharmony_ci return -EAGAIN; 38818c2ecf20Sopenharmony_ci 38828c2ecf20Sopenharmony_ci ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, 38838c2ecf20Sopenharmony_ci ren->newpath, ren->flags); 38848c2ecf20Sopenharmony_ci 38858c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_NEED_CLEANUP; 38868c2ecf20Sopenharmony_ci if (ret < 0) 38878c2ecf20Sopenharmony_ci req_set_fail(req); 38888c2ecf20Sopenharmony_ci io_req_complete(req, ret); 38898c2ecf20Sopenharmony_ci return 0; 38908c2ecf20Sopenharmony_ci} 38918c2ecf20Sopenharmony_ci 38928c2ecf20Sopenharmony_cistatic int io_unlinkat_prep(struct io_kiocb *req, 38938c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 38948c2ecf20Sopenharmony_ci{ 38958c2ecf20Sopenharmony_ci struct io_unlink *un = &req->unlink; 38968c2ecf20Sopenharmony_ci const char __user *fname; 38978c2ecf20Sopenharmony_ci 38988c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 38998c2ecf20Sopenharmony_ci return -EINVAL; 39008c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index || 39018c2ecf20Sopenharmony_ci sqe->splice_fd_in) 39028c2ecf20Sopenharmony_ci return -EINVAL; 39038c2ecf20Sopenharmony_ci if (unlikely(req->flags & REQ_F_FIXED_FILE)) 39048c2ecf20Sopenharmony_ci return -EBADF; 39058c2ecf20Sopenharmony_ci 39068c2ecf20Sopenharmony_ci un->dfd = READ_ONCE(sqe->fd); 39078c2ecf20Sopenharmony_ci 39088c2ecf20Sopenharmony_ci un->flags = READ_ONCE(sqe->unlink_flags); 39098c2ecf20Sopenharmony_ci if (un->flags & ~AT_REMOVEDIR) 39108c2ecf20Sopenharmony_ci return -EINVAL; 39118c2ecf20Sopenharmony_ci 39128c2ecf20Sopenharmony_ci fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); 39138c2ecf20Sopenharmony_ci un->filename = getname(fname); 39148c2ecf20Sopenharmony_ci if (IS_ERR(un->filename)) 39158c2ecf20Sopenharmony_ci return PTR_ERR(un->filename); 39168c2ecf20Sopenharmony_ci 39178c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 39188c2ecf20Sopenharmony_ci return 0; 39198c2ecf20Sopenharmony_ci} 39208c2ecf20Sopenharmony_ci 39218c2ecf20Sopenharmony_cistatic int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) 39228c2ecf20Sopenharmony_ci{ 39238c2ecf20Sopenharmony_ci struct io_unlink *un = &req->unlink; 39248c2ecf20Sopenharmony_ci int ret; 39258c2ecf20Sopenharmony_ci 39268c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 39278c2ecf20Sopenharmony_ci return -EAGAIN; 39288c2ecf20Sopenharmony_ci 39298c2ecf20Sopenharmony_ci if (un->flags & AT_REMOVEDIR) 39308c2ecf20Sopenharmony_ci ret = do_rmdir(un->dfd, un->filename); 39318c2ecf20Sopenharmony_ci else 39328c2ecf20Sopenharmony_ci ret = do_unlinkat(un->dfd, un->filename); 39338c2ecf20Sopenharmony_ci 39348c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_NEED_CLEANUP; 39358c2ecf20Sopenharmony_ci if (ret < 0) 39368c2ecf20Sopenharmony_ci req_set_fail(req); 39378c2ecf20Sopenharmony_ci io_req_complete(req, ret); 39388c2ecf20Sopenharmony_ci return 0; 39398c2ecf20Sopenharmony_ci} 39408c2ecf20Sopenharmony_ci 39418c2ecf20Sopenharmony_cistatic int io_shutdown_prep(struct io_kiocb *req, 39428c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 39438c2ecf20Sopenharmony_ci{ 39448c2ecf20Sopenharmony_ci#if defined(CONFIG_NET) 39458c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 39468c2ecf20Sopenharmony_ci return -EINVAL; 39478c2ecf20Sopenharmony_ci if (unlikely(sqe->ioprio || sqe->off || sqe->addr || sqe->rw_flags || 39488c2ecf20Sopenharmony_ci sqe->buf_index || sqe->splice_fd_in)) 39498c2ecf20Sopenharmony_ci return -EINVAL; 39508c2ecf20Sopenharmony_ci 39518c2ecf20Sopenharmony_ci req->shutdown.how = READ_ONCE(sqe->len); 39528c2ecf20Sopenharmony_ci return 0; 39538c2ecf20Sopenharmony_ci#else 39548c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 39558c2ecf20Sopenharmony_ci#endif 39568c2ecf20Sopenharmony_ci} 39578c2ecf20Sopenharmony_ci 39588c2ecf20Sopenharmony_cistatic int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) 39598c2ecf20Sopenharmony_ci{ 39608c2ecf20Sopenharmony_ci#if defined(CONFIG_NET) 39618c2ecf20Sopenharmony_ci struct socket *sock; 39628c2ecf20Sopenharmony_ci int ret; 39638c2ecf20Sopenharmony_ci 39648c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 39658c2ecf20Sopenharmony_ci return -EAGAIN; 39668c2ecf20Sopenharmony_ci 39678c2ecf20Sopenharmony_ci sock = sock_from_file(req->file, &ret); 39688c2ecf20Sopenharmony_ci if (unlikely(!sock)) 39698c2ecf20Sopenharmony_ci return ret; 39708c2ecf20Sopenharmony_ci 39718c2ecf20Sopenharmony_ci ret = __sys_shutdown_sock(sock, req->shutdown.how); 39728c2ecf20Sopenharmony_ci if (ret < 0) 39738c2ecf20Sopenharmony_ci req_set_fail(req); 39748c2ecf20Sopenharmony_ci io_req_complete(req, ret); 39758c2ecf20Sopenharmony_ci return 0; 39768c2ecf20Sopenharmony_ci#else 39778c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 39788c2ecf20Sopenharmony_ci#endif 39798c2ecf20Sopenharmony_ci} 39808c2ecf20Sopenharmony_ci 39818c2ecf20Sopenharmony_cistatic int __io_splice_prep(struct io_kiocb *req, 39828c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 39838c2ecf20Sopenharmony_ci{ 39848c2ecf20Sopenharmony_ci struct io_splice *sp = &req->splice; 39858c2ecf20Sopenharmony_ci unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; 39868c2ecf20Sopenharmony_ci 39878c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 39888c2ecf20Sopenharmony_ci return -EINVAL; 39898c2ecf20Sopenharmony_ci 39908c2ecf20Sopenharmony_ci sp->len = READ_ONCE(sqe->len); 39918c2ecf20Sopenharmony_ci sp->flags = READ_ONCE(sqe->splice_flags); 39928c2ecf20Sopenharmony_ci if (unlikely(sp->flags & ~valid_flags)) 39938c2ecf20Sopenharmony_ci return -EINVAL; 39948c2ecf20Sopenharmony_ci sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); 39958c2ecf20Sopenharmony_ci return 0; 39968c2ecf20Sopenharmony_ci} 39978c2ecf20Sopenharmony_ci 39988c2ecf20Sopenharmony_cistatic int io_tee_prep(struct io_kiocb *req, 39998c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 40008c2ecf20Sopenharmony_ci{ 40018c2ecf20Sopenharmony_ci if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) 40028c2ecf20Sopenharmony_ci return -EINVAL; 40038c2ecf20Sopenharmony_ci return __io_splice_prep(req, sqe); 40048c2ecf20Sopenharmony_ci} 40058c2ecf20Sopenharmony_ci 40068c2ecf20Sopenharmony_cistatic int io_tee(struct io_kiocb *req, unsigned int issue_flags) 40078c2ecf20Sopenharmony_ci{ 40088c2ecf20Sopenharmony_ci struct io_splice *sp = &req->splice; 40098c2ecf20Sopenharmony_ci struct file *out = sp->file_out; 40108c2ecf20Sopenharmony_ci unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; 40118c2ecf20Sopenharmony_ci struct file *in; 40128c2ecf20Sopenharmony_ci long ret = 0; 40138c2ecf20Sopenharmony_ci 40148c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 40158c2ecf20Sopenharmony_ci return -EAGAIN; 40168c2ecf20Sopenharmony_ci 40178c2ecf20Sopenharmony_ci in = io_file_get(req->ctx, req, sp->splice_fd_in, 40188c2ecf20Sopenharmony_ci (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); 40198c2ecf20Sopenharmony_ci if (!in) { 40208c2ecf20Sopenharmony_ci ret = -EBADF; 40218c2ecf20Sopenharmony_ci goto done; 40228c2ecf20Sopenharmony_ci } 40238c2ecf20Sopenharmony_ci 40248c2ecf20Sopenharmony_ci if (sp->len) 40258c2ecf20Sopenharmony_ci ret = do_tee(in, out, sp->len, flags); 40268c2ecf20Sopenharmony_ci 40278c2ecf20Sopenharmony_ci if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) 40288c2ecf20Sopenharmony_ci io_put_file(in); 40298c2ecf20Sopenharmony_cidone: 40308c2ecf20Sopenharmony_ci if (ret != sp->len) 40318c2ecf20Sopenharmony_ci req_set_fail(req); 40328c2ecf20Sopenharmony_ci io_req_complete(req, ret); 40338c2ecf20Sopenharmony_ci return 0; 40348c2ecf20Sopenharmony_ci} 40358c2ecf20Sopenharmony_ci 40368c2ecf20Sopenharmony_cistatic int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 40378c2ecf20Sopenharmony_ci{ 40388c2ecf20Sopenharmony_ci struct io_splice *sp = &req->splice; 40398c2ecf20Sopenharmony_ci 40408c2ecf20Sopenharmony_ci sp->off_in = READ_ONCE(sqe->splice_off_in); 40418c2ecf20Sopenharmony_ci sp->off_out = READ_ONCE(sqe->off); 40428c2ecf20Sopenharmony_ci return __io_splice_prep(req, sqe); 40438c2ecf20Sopenharmony_ci} 40448c2ecf20Sopenharmony_ci 40458c2ecf20Sopenharmony_cistatic int io_splice(struct io_kiocb *req, unsigned int issue_flags) 40468c2ecf20Sopenharmony_ci{ 40478c2ecf20Sopenharmony_ci struct io_splice *sp = &req->splice; 40488c2ecf20Sopenharmony_ci struct file *out = sp->file_out; 40498c2ecf20Sopenharmony_ci unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; 40508c2ecf20Sopenharmony_ci loff_t *poff_in, *poff_out; 40518c2ecf20Sopenharmony_ci struct file *in; 40528c2ecf20Sopenharmony_ci long ret = 0; 40538c2ecf20Sopenharmony_ci 40548c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 40558c2ecf20Sopenharmony_ci return -EAGAIN; 40568c2ecf20Sopenharmony_ci 40578c2ecf20Sopenharmony_ci in = io_file_get(req->ctx, req, sp->splice_fd_in, 40588c2ecf20Sopenharmony_ci (sp->flags & SPLICE_F_FD_IN_FIXED), issue_flags); 40598c2ecf20Sopenharmony_ci if (!in) { 40608c2ecf20Sopenharmony_ci ret = -EBADF; 40618c2ecf20Sopenharmony_ci goto done; 40628c2ecf20Sopenharmony_ci } 40638c2ecf20Sopenharmony_ci 40648c2ecf20Sopenharmony_ci poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; 40658c2ecf20Sopenharmony_ci poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; 40668c2ecf20Sopenharmony_ci 40678c2ecf20Sopenharmony_ci if (sp->len) 40688c2ecf20Sopenharmony_ci ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); 40698c2ecf20Sopenharmony_ci 40708c2ecf20Sopenharmony_ci if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) 40718c2ecf20Sopenharmony_ci io_put_file(in); 40728c2ecf20Sopenharmony_cidone: 40738c2ecf20Sopenharmony_ci if (ret != sp->len) 40748c2ecf20Sopenharmony_ci req_set_fail(req); 40758c2ecf20Sopenharmony_ci io_req_complete(req, ret); 40768c2ecf20Sopenharmony_ci return 0; 40778c2ecf20Sopenharmony_ci} 40788c2ecf20Sopenharmony_ci 40798c2ecf20Sopenharmony_ci/* 40808c2ecf20Sopenharmony_ci * IORING_OP_NOP just posts a completion event, nothing else. 40818c2ecf20Sopenharmony_ci */ 40828c2ecf20Sopenharmony_cistatic int io_nop(struct io_kiocb *req, unsigned int issue_flags) 40838c2ecf20Sopenharmony_ci{ 40848c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 40858c2ecf20Sopenharmony_ci 40868c2ecf20Sopenharmony_ci if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 40878c2ecf20Sopenharmony_ci return -EINVAL; 40888c2ecf20Sopenharmony_ci 40898c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, 0, 0); 40908c2ecf20Sopenharmony_ci return 0; 40918c2ecf20Sopenharmony_ci} 40928c2ecf20Sopenharmony_ci 40938c2ecf20Sopenharmony_cistatic int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 40948c2ecf20Sopenharmony_ci{ 40958c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 40968c2ecf20Sopenharmony_ci 40978c2ecf20Sopenharmony_ci if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 40988c2ecf20Sopenharmony_ci return -EINVAL; 40998c2ecf20Sopenharmony_ci if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || 41008c2ecf20Sopenharmony_ci sqe->splice_fd_in)) 41018c2ecf20Sopenharmony_ci return -EINVAL; 41028c2ecf20Sopenharmony_ci 41038c2ecf20Sopenharmony_ci req->sync.flags = READ_ONCE(sqe->fsync_flags); 41048c2ecf20Sopenharmony_ci if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) 41058c2ecf20Sopenharmony_ci return -EINVAL; 41068c2ecf20Sopenharmony_ci 41078c2ecf20Sopenharmony_ci req->sync.off = READ_ONCE(sqe->off); 41088c2ecf20Sopenharmony_ci req->sync.len = READ_ONCE(sqe->len); 41098c2ecf20Sopenharmony_ci return 0; 41108c2ecf20Sopenharmony_ci} 41118c2ecf20Sopenharmony_ci 41128c2ecf20Sopenharmony_cistatic int io_fsync(struct io_kiocb *req, unsigned int issue_flags) 41138c2ecf20Sopenharmony_ci{ 41148c2ecf20Sopenharmony_ci loff_t end = req->sync.off + req->sync.len; 41158c2ecf20Sopenharmony_ci int ret; 41168c2ecf20Sopenharmony_ci 41178c2ecf20Sopenharmony_ci /* fsync always requires a blocking context */ 41188c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 41198c2ecf20Sopenharmony_ci return -EAGAIN; 41208c2ecf20Sopenharmony_ci 41218c2ecf20Sopenharmony_ci ret = vfs_fsync_range(req->file, req->sync.off, 41228c2ecf20Sopenharmony_ci end > 0 ? end : LLONG_MAX, 41238c2ecf20Sopenharmony_ci req->sync.flags & IORING_FSYNC_DATASYNC); 41248c2ecf20Sopenharmony_ci if (ret < 0) 41258c2ecf20Sopenharmony_ci req_set_fail(req); 41268c2ecf20Sopenharmony_ci io_req_complete(req, ret); 41278c2ecf20Sopenharmony_ci return 0; 41288c2ecf20Sopenharmony_ci} 41298c2ecf20Sopenharmony_ci 41308c2ecf20Sopenharmony_cistatic int io_fallocate_prep(struct io_kiocb *req, 41318c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 41328c2ecf20Sopenharmony_ci{ 41338c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->rw_flags || 41348c2ecf20Sopenharmony_ci sqe->splice_fd_in) 41358c2ecf20Sopenharmony_ci return -EINVAL; 41368c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 41378c2ecf20Sopenharmony_ci return -EINVAL; 41388c2ecf20Sopenharmony_ci 41398c2ecf20Sopenharmony_ci req->sync.off = READ_ONCE(sqe->off); 41408c2ecf20Sopenharmony_ci req->sync.len = READ_ONCE(sqe->addr); 41418c2ecf20Sopenharmony_ci req->sync.mode = READ_ONCE(sqe->len); 41428c2ecf20Sopenharmony_ci return 0; 41438c2ecf20Sopenharmony_ci} 41448c2ecf20Sopenharmony_ci 41458c2ecf20Sopenharmony_cistatic int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) 41468c2ecf20Sopenharmony_ci{ 41478c2ecf20Sopenharmony_ci int ret; 41488c2ecf20Sopenharmony_ci 41498c2ecf20Sopenharmony_ci /* fallocate always requiring blocking context */ 41508c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 41518c2ecf20Sopenharmony_ci return -EAGAIN; 41528c2ecf20Sopenharmony_ci ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, 41538c2ecf20Sopenharmony_ci req->sync.len); 41548c2ecf20Sopenharmony_ci if (ret < 0) 41558c2ecf20Sopenharmony_ci req_set_fail(req); 41568c2ecf20Sopenharmony_ci else 41578c2ecf20Sopenharmony_ci fsnotify_modify(req->file); 41588c2ecf20Sopenharmony_ci io_req_complete(req, ret); 41598c2ecf20Sopenharmony_ci return 0; 41608c2ecf20Sopenharmony_ci} 41618c2ecf20Sopenharmony_ci 41628c2ecf20Sopenharmony_cistatic int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 41638c2ecf20Sopenharmony_ci{ 41648c2ecf20Sopenharmony_ci const char __user *fname; 41658c2ecf20Sopenharmony_ci int ret; 41668c2ecf20Sopenharmony_ci 41678c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 41688c2ecf20Sopenharmony_ci return -EINVAL; 41698c2ecf20Sopenharmony_ci if (unlikely(sqe->ioprio || sqe->buf_index)) 41708c2ecf20Sopenharmony_ci return -EINVAL; 41718c2ecf20Sopenharmony_ci if (unlikely(req->flags & REQ_F_FIXED_FILE)) 41728c2ecf20Sopenharmony_ci return -EBADF; 41738c2ecf20Sopenharmony_ci 41748c2ecf20Sopenharmony_ci /* open.how should be already initialised */ 41758c2ecf20Sopenharmony_ci if (!(req->open.how.flags & O_PATH) && force_o_largefile()) 41768c2ecf20Sopenharmony_ci req->open.how.flags |= O_LARGEFILE; 41778c2ecf20Sopenharmony_ci 41788c2ecf20Sopenharmony_ci req->open.dfd = READ_ONCE(sqe->fd); 41798c2ecf20Sopenharmony_ci fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); 41808c2ecf20Sopenharmony_ci req->open.filename = getname(fname); 41818c2ecf20Sopenharmony_ci if (IS_ERR(req->open.filename)) { 41828c2ecf20Sopenharmony_ci ret = PTR_ERR(req->open.filename); 41838c2ecf20Sopenharmony_ci req->open.filename = NULL; 41848c2ecf20Sopenharmony_ci return ret; 41858c2ecf20Sopenharmony_ci } 41868c2ecf20Sopenharmony_ci 41878c2ecf20Sopenharmony_ci req->open.file_slot = READ_ONCE(sqe->file_index); 41888c2ecf20Sopenharmony_ci if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC)) 41898c2ecf20Sopenharmony_ci return -EINVAL; 41908c2ecf20Sopenharmony_ci 41918c2ecf20Sopenharmony_ci req->open.nofile = rlimit(RLIMIT_NOFILE); 41928c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 41938c2ecf20Sopenharmony_ci return 0; 41948c2ecf20Sopenharmony_ci} 41958c2ecf20Sopenharmony_ci 41968c2ecf20Sopenharmony_cistatic int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 41978c2ecf20Sopenharmony_ci{ 41988c2ecf20Sopenharmony_ci u64 mode = READ_ONCE(sqe->len); 41998c2ecf20Sopenharmony_ci u64 flags = READ_ONCE(sqe->open_flags); 42008c2ecf20Sopenharmony_ci 42018c2ecf20Sopenharmony_ci req->open.how = build_open_how(flags, mode); 42028c2ecf20Sopenharmony_ci return __io_openat_prep(req, sqe); 42038c2ecf20Sopenharmony_ci} 42048c2ecf20Sopenharmony_ci 42058c2ecf20Sopenharmony_cistatic int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 42068c2ecf20Sopenharmony_ci{ 42078c2ecf20Sopenharmony_ci struct open_how __user *how; 42088c2ecf20Sopenharmony_ci size_t len; 42098c2ecf20Sopenharmony_ci int ret; 42108c2ecf20Sopenharmony_ci 42118c2ecf20Sopenharmony_ci how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 42128c2ecf20Sopenharmony_ci len = READ_ONCE(sqe->len); 42138c2ecf20Sopenharmony_ci if (len < OPEN_HOW_SIZE_VER0) 42148c2ecf20Sopenharmony_ci return -EINVAL; 42158c2ecf20Sopenharmony_ci 42168c2ecf20Sopenharmony_ci ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how, 42178c2ecf20Sopenharmony_ci len); 42188c2ecf20Sopenharmony_ci if (ret) 42198c2ecf20Sopenharmony_ci return ret; 42208c2ecf20Sopenharmony_ci 42218c2ecf20Sopenharmony_ci return __io_openat_prep(req, sqe); 42228c2ecf20Sopenharmony_ci} 42238c2ecf20Sopenharmony_ci 42248c2ecf20Sopenharmony_cistatic int io_openat2(struct io_kiocb *req, unsigned int issue_flags) 42258c2ecf20Sopenharmony_ci{ 42268c2ecf20Sopenharmony_ci struct open_flags op; 42278c2ecf20Sopenharmony_ci struct file *file; 42288c2ecf20Sopenharmony_ci bool resolve_nonblock, nonblock_set; 42298c2ecf20Sopenharmony_ci bool fixed = !!req->open.file_slot; 42308c2ecf20Sopenharmony_ci int ret; 42318c2ecf20Sopenharmony_ci 42328c2ecf20Sopenharmony_ci ret = build_open_flags(&req->open.how, &op); 42338c2ecf20Sopenharmony_ci if (ret) 42348c2ecf20Sopenharmony_ci goto err; 42358c2ecf20Sopenharmony_ci nonblock_set = op.open_flag & O_NONBLOCK; 42368c2ecf20Sopenharmony_ci resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED; 42378c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) { 42388c2ecf20Sopenharmony_ci /* 42398c2ecf20Sopenharmony_ci * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, 42408c2ecf20Sopenharmony_ci * it'll always -EAGAIN. Note that we test for __O_TMPFILE 42418c2ecf20Sopenharmony_ci * because O_TMPFILE includes O_DIRECTORY, which isn't a flag 42428c2ecf20Sopenharmony_ci * we need to force async for. 42438c2ecf20Sopenharmony_ci */ 42448c2ecf20Sopenharmony_ci if (req->open.how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE)) 42458c2ecf20Sopenharmony_ci return -EAGAIN; 42468c2ecf20Sopenharmony_ci op.lookup_flags |= LOOKUP_CACHED; 42478c2ecf20Sopenharmony_ci op.open_flag |= O_NONBLOCK; 42488c2ecf20Sopenharmony_ci } 42498c2ecf20Sopenharmony_ci 42508c2ecf20Sopenharmony_ci if (!fixed) { 42518c2ecf20Sopenharmony_ci ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); 42528c2ecf20Sopenharmony_ci if (ret < 0) 42538c2ecf20Sopenharmony_ci goto err; 42548c2ecf20Sopenharmony_ci } 42558c2ecf20Sopenharmony_ci 42568c2ecf20Sopenharmony_ci file = do_filp_open(req->open.dfd, req->open.filename, &op); 42578c2ecf20Sopenharmony_ci if (IS_ERR(file)) { 42588c2ecf20Sopenharmony_ci /* 42598c2ecf20Sopenharmony_ci * We could hang on to this 'fd' on retrying, but seems like 42608c2ecf20Sopenharmony_ci * marginal gain for something that is now known to be a slower 42618c2ecf20Sopenharmony_ci * path. So just put it, and we'll get a new one when we retry. 42628c2ecf20Sopenharmony_ci */ 42638c2ecf20Sopenharmony_ci if (!fixed) 42648c2ecf20Sopenharmony_ci put_unused_fd(ret); 42658c2ecf20Sopenharmony_ci 42668c2ecf20Sopenharmony_ci ret = PTR_ERR(file); 42678c2ecf20Sopenharmony_ci /* only retry if RESOLVE_CACHED wasn't already set by application */ 42688c2ecf20Sopenharmony_ci if (ret == -EAGAIN && 42698c2ecf20Sopenharmony_ci (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) 42708c2ecf20Sopenharmony_ci return -EAGAIN; 42718c2ecf20Sopenharmony_ci goto err; 42728c2ecf20Sopenharmony_ci } 42738c2ecf20Sopenharmony_ci 42748c2ecf20Sopenharmony_ci if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) 42758c2ecf20Sopenharmony_ci file->f_flags &= ~O_NONBLOCK; 42768c2ecf20Sopenharmony_ci fsnotify_open(file); 42778c2ecf20Sopenharmony_ci 42788c2ecf20Sopenharmony_ci if (!fixed) 42798c2ecf20Sopenharmony_ci fd_install(ret, file); 42808c2ecf20Sopenharmony_ci else 42818c2ecf20Sopenharmony_ci ret = io_install_fixed_file(req, file, issue_flags, 42828c2ecf20Sopenharmony_ci req->open.file_slot - 1); 42838c2ecf20Sopenharmony_cierr: 42848c2ecf20Sopenharmony_ci putname(req->open.filename); 42858c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_NEED_CLEANUP; 42868c2ecf20Sopenharmony_ci if (ret < 0) 42878c2ecf20Sopenharmony_ci req_set_fail(req); 42888c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 42898c2ecf20Sopenharmony_ci return 0; 42908c2ecf20Sopenharmony_ci} 42918c2ecf20Sopenharmony_ci 42928c2ecf20Sopenharmony_cistatic int io_openat(struct io_kiocb *req, unsigned int issue_flags) 42938c2ecf20Sopenharmony_ci{ 42948c2ecf20Sopenharmony_ci return io_openat2(req, issue_flags); 42958c2ecf20Sopenharmony_ci} 42968c2ecf20Sopenharmony_ci 42978c2ecf20Sopenharmony_cistatic int io_remove_buffers_prep(struct io_kiocb *req, 42988c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 42998c2ecf20Sopenharmony_ci{ 43008c2ecf20Sopenharmony_ci struct io_provide_buf *p = &req->pbuf; 43018c2ecf20Sopenharmony_ci u64 tmp; 43028c2ecf20Sopenharmony_ci 43038c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off || 43048c2ecf20Sopenharmony_ci sqe->splice_fd_in) 43058c2ecf20Sopenharmony_ci return -EINVAL; 43068c2ecf20Sopenharmony_ci 43078c2ecf20Sopenharmony_ci tmp = READ_ONCE(sqe->fd); 43088c2ecf20Sopenharmony_ci if (!tmp || tmp > USHRT_MAX) 43098c2ecf20Sopenharmony_ci return -EINVAL; 43108c2ecf20Sopenharmony_ci 43118c2ecf20Sopenharmony_ci memset(p, 0, sizeof(*p)); 43128c2ecf20Sopenharmony_ci p->nbufs = tmp; 43138c2ecf20Sopenharmony_ci p->bgid = READ_ONCE(sqe->buf_group); 43148c2ecf20Sopenharmony_ci return 0; 43158c2ecf20Sopenharmony_ci} 43168c2ecf20Sopenharmony_ci 43178c2ecf20Sopenharmony_cistatic int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, 43188c2ecf20Sopenharmony_ci int bgid, unsigned nbufs) 43198c2ecf20Sopenharmony_ci{ 43208c2ecf20Sopenharmony_ci unsigned i = 0; 43218c2ecf20Sopenharmony_ci 43228c2ecf20Sopenharmony_ci /* shouldn't happen */ 43238c2ecf20Sopenharmony_ci if (!nbufs) 43248c2ecf20Sopenharmony_ci return 0; 43258c2ecf20Sopenharmony_ci 43268c2ecf20Sopenharmony_ci /* the head kbuf is the list itself */ 43278c2ecf20Sopenharmony_ci while (!list_empty(&buf->list)) { 43288c2ecf20Sopenharmony_ci struct io_buffer *nxt; 43298c2ecf20Sopenharmony_ci 43308c2ecf20Sopenharmony_ci nxt = list_first_entry(&buf->list, struct io_buffer, list); 43318c2ecf20Sopenharmony_ci list_del(&nxt->list); 43328c2ecf20Sopenharmony_ci kfree(nxt); 43338c2ecf20Sopenharmony_ci if (++i == nbufs) 43348c2ecf20Sopenharmony_ci return i; 43358c2ecf20Sopenharmony_ci cond_resched(); 43368c2ecf20Sopenharmony_ci } 43378c2ecf20Sopenharmony_ci i++; 43388c2ecf20Sopenharmony_ci kfree(buf); 43398c2ecf20Sopenharmony_ci xa_erase(&ctx->io_buffers, bgid); 43408c2ecf20Sopenharmony_ci 43418c2ecf20Sopenharmony_ci return i; 43428c2ecf20Sopenharmony_ci} 43438c2ecf20Sopenharmony_ci 43448c2ecf20Sopenharmony_cistatic int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) 43458c2ecf20Sopenharmony_ci{ 43468c2ecf20Sopenharmony_ci struct io_provide_buf *p = &req->pbuf; 43478c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 43488c2ecf20Sopenharmony_ci struct io_buffer *head; 43498c2ecf20Sopenharmony_ci int ret = 0; 43508c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 43518c2ecf20Sopenharmony_ci 43528c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !force_nonblock); 43538c2ecf20Sopenharmony_ci 43548c2ecf20Sopenharmony_ci lockdep_assert_held(&ctx->uring_lock); 43558c2ecf20Sopenharmony_ci 43568c2ecf20Sopenharmony_ci ret = -ENOENT; 43578c2ecf20Sopenharmony_ci head = xa_load(&ctx->io_buffers, p->bgid); 43588c2ecf20Sopenharmony_ci if (head) 43598c2ecf20Sopenharmony_ci ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); 43608c2ecf20Sopenharmony_ci if (ret < 0) 43618c2ecf20Sopenharmony_ci req_set_fail(req); 43628c2ecf20Sopenharmony_ci 43638c2ecf20Sopenharmony_ci /* complete before unlock, IOPOLL may need the lock */ 43648c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 43658c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !force_nonblock); 43668c2ecf20Sopenharmony_ci return 0; 43678c2ecf20Sopenharmony_ci} 43688c2ecf20Sopenharmony_ci 43698c2ecf20Sopenharmony_cistatic int io_provide_buffers_prep(struct io_kiocb *req, 43708c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 43718c2ecf20Sopenharmony_ci{ 43728c2ecf20Sopenharmony_ci unsigned long size, tmp_check; 43738c2ecf20Sopenharmony_ci struct io_provide_buf *p = &req->pbuf; 43748c2ecf20Sopenharmony_ci u64 tmp; 43758c2ecf20Sopenharmony_ci 43768c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) 43778c2ecf20Sopenharmony_ci return -EINVAL; 43788c2ecf20Sopenharmony_ci 43798c2ecf20Sopenharmony_ci tmp = READ_ONCE(sqe->fd); 43808c2ecf20Sopenharmony_ci if (!tmp || tmp > USHRT_MAX) 43818c2ecf20Sopenharmony_ci return -E2BIG; 43828c2ecf20Sopenharmony_ci p->nbufs = tmp; 43838c2ecf20Sopenharmony_ci p->addr = READ_ONCE(sqe->addr); 43848c2ecf20Sopenharmony_ci p->len = READ_ONCE(sqe->len); 43858c2ecf20Sopenharmony_ci 43868c2ecf20Sopenharmony_ci if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, 43878c2ecf20Sopenharmony_ci &size)) 43888c2ecf20Sopenharmony_ci return -EOVERFLOW; 43898c2ecf20Sopenharmony_ci if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) 43908c2ecf20Sopenharmony_ci return -EOVERFLOW; 43918c2ecf20Sopenharmony_ci 43928c2ecf20Sopenharmony_ci size = (unsigned long)p->len * p->nbufs; 43938c2ecf20Sopenharmony_ci if (!access_ok(u64_to_user_ptr(p->addr), size)) 43948c2ecf20Sopenharmony_ci return -EFAULT; 43958c2ecf20Sopenharmony_ci 43968c2ecf20Sopenharmony_ci p->bgid = READ_ONCE(sqe->buf_group); 43978c2ecf20Sopenharmony_ci tmp = READ_ONCE(sqe->off); 43988c2ecf20Sopenharmony_ci if (tmp > USHRT_MAX) 43998c2ecf20Sopenharmony_ci return -E2BIG; 44008c2ecf20Sopenharmony_ci p->bid = tmp; 44018c2ecf20Sopenharmony_ci return 0; 44028c2ecf20Sopenharmony_ci} 44038c2ecf20Sopenharmony_ci 44048c2ecf20Sopenharmony_cistatic int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) 44058c2ecf20Sopenharmony_ci{ 44068c2ecf20Sopenharmony_ci struct io_buffer *buf; 44078c2ecf20Sopenharmony_ci u64 addr = pbuf->addr; 44088c2ecf20Sopenharmony_ci int i, bid = pbuf->bid; 44098c2ecf20Sopenharmony_ci 44108c2ecf20Sopenharmony_ci for (i = 0; i < pbuf->nbufs; i++) { 44118c2ecf20Sopenharmony_ci buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); 44128c2ecf20Sopenharmony_ci if (!buf) 44138c2ecf20Sopenharmony_ci break; 44148c2ecf20Sopenharmony_ci 44158c2ecf20Sopenharmony_ci buf->addr = addr; 44168c2ecf20Sopenharmony_ci buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); 44178c2ecf20Sopenharmony_ci buf->bid = bid; 44188c2ecf20Sopenharmony_ci addr += pbuf->len; 44198c2ecf20Sopenharmony_ci bid++; 44208c2ecf20Sopenharmony_ci if (!*head) { 44218c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&buf->list); 44228c2ecf20Sopenharmony_ci *head = buf; 44238c2ecf20Sopenharmony_ci } else { 44248c2ecf20Sopenharmony_ci list_add_tail(&buf->list, &(*head)->list); 44258c2ecf20Sopenharmony_ci } 44268c2ecf20Sopenharmony_ci cond_resched(); 44278c2ecf20Sopenharmony_ci } 44288c2ecf20Sopenharmony_ci 44298c2ecf20Sopenharmony_ci return i ? i : -ENOMEM; 44308c2ecf20Sopenharmony_ci} 44318c2ecf20Sopenharmony_ci 44328c2ecf20Sopenharmony_cistatic int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) 44338c2ecf20Sopenharmony_ci{ 44348c2ecf20Sopenharmony_ci struct io_provide_buf *p = &req->pbuf; 44358c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 44368c2ecf20Sopenharmony_ci struct io_buffer *head, *list; 44378c2ecf20Sopenharmony_ci int ret = 0; 44388c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 44398c2ecf20Sopenharmony_ci 44408c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !force_nonblock); 44418c2ecf20Sopenharmony_ci 44428c2ecf20Sopenharmony_ci lockdep_assert_held(&ctx->uring_lock); 44438c2ecf20Sopenharmony_ci 44448c2ecf20Sopenharmony_ci list = head = xa_load(&ctx->io_buffers, p->bgid); 44458c2ecf20Sopenharmony_ci 44468c2ecf20Sopenharmony_ci ret = io_add_buffers(p, &head); 44478c2ecf20Sopenharmony_ci if (ret >= 0 && !list) { 44488c2ecf20Sopenharmony_ci ret = xa_insert(&ctx->io_buffers, p->bgid, head, 44498c2ecf20Sopenharmony_ci GFP_KERNEL_ACCOUNT); 44508c2ecf20Sopenharmony_ci if (ret < 0) 44518c2ecf20Sopenharmony_ci __io_remove_buffers(ctx, head, p->bgid, -1U); 44528c2ecf20Sopenharmony_ci } 44538c2ecf20Sopenharmony_ci if (ret < 0) 44548c2ecf20Sopenharmony_ci req_set_fail(req); 44558c2ecf20Sopenharmony_ci /* complete before unlock, IOPOLL may need the lock */ 44568c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 44578c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !force_nonblock); 44588c2ecf20Sopenharmony_ci return 0; 44598c2ecf20Sopenharmony_ci} 44608c2ecf20Sopenharmony_ci 44618c2ecf20Sopenharmony_cistatic int io_epoll_ctl_prep(struct io_kiocb *req, 44628c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 44638c2ecf20Sopenharmony_ci{ 44648c2ecf20Sopenharmony_ci#if defined(CONFIG_EPOLL) 44658c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) 44668c2ecf20Sopenharmony_ci return -EINVAL; 44678c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 44688c2ecf20Sopenharmony_ci return -EINVAL; 44698c2ecf20Sopenharmony_ci 44708c2ecf20Sopenharmony_ci req->epoll.epfd = READ_ONCE(sqe->fd); 44718c2ecf20Sopenharmony_ci req->epoll.op = READ_ONCE(sqe->len); 44728c2ecf20Sopenharmony_ci req->epoll.fd = READ_ONCE(sqe->off); 44738c2ecf20Sopenharmony_ci 44748c2ecf20Sopenharmony_ci if (ep_op_has_event(req->epoll.op)) { 44758c2ecf20Sopenharmony_ci struct epoll_event __user *ev; 44768c2ecf20Sopenharmony_ci 44778c2ecf20Sopenharmony_ci ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); 44788c2ecf20Sopenharmony_ci if (copy_from_user(&req->epoll.event, ev, sizeof(*ev))) 44798c2ecf20Sopenharmony_ci return -EFAULT; 44808c2ecf20Sopenharmony_ci } 44818c2ecf20Sopenharmony_ci 44828c2ecf20Sopenharmony_ci return 0; 44838c2ecf20Sopenharmony_ci#else 44848c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 44858c2ecf20Sopenharmony_ci#endif 44868c2ecf20Sopenharmony_ci} 44878c2ecf20Sopenharmony_ci 44888c2ecf20Sopenharmony_cistatic int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) 44898c2ecf20Sopenharmony_ci{ 44908c2ecf20Sopenharmony_ci#if defined(CONFIG_EPOLL) 44918c2ecf20Sopenharmony_ci struct io_epoll *ie = &req->epoll; 44928c2ecf20Sopenharmony_ci int ret; 44938c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 44948c2ecf20Sopenharmony_ci 44958c2ecf20Sopenharmony_ci ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); 44968c2ecf20Sopenharmony_ci if (force_nonblock && ret == -EAGAIN) 44978c2ecf20Sopenharmony_ci return -EAGAIN; 44988c2ecf20Sopenharmony_ci 44998c2ecf20Sopenharmony_ci if (ret < 0) 45008c2ecf20Sopenharmony_ci req_set_fail(req); 45018c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 45028c2ecf20Sopenharmony_ci return 0; 45038c2ecf20Sopenharmony_ci#else 45048c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 45058c2ecf20Sopenharmony_ci#endif 45068c2ecf20Sopenharmony_ci} 45078c2ecf20Sopenharmony_ci 45088c2ecf20Sopenharmony_cistatic int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 45098c2ecf20Sopenharmony_ci{ 45108c2ecf20Sopenharmony_ci#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) 45118c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->splice_fd_in) 45128c2ecf20Sopenharmony_ci return -EINVAL; 45138c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 45148c2ecf20Sopenharmony_ci return -EINVAL; 45158c2ecf20Sopenharmony_ci 45168c2ecf20Sopenharmony_ci req->madvise.addr = READ_ONCE(sqe->addr); 45178c2ecf20Sopenharmony_ci req->madvise.len = READ_ONCE(sqe->len); 45188c2ecf20Sopenharmony_ci req->madvise.advice = READ_ONCE(sqe->fadvise_advice); 45198c2ecf20Sopenharmony_ci return 0; 45208c2ecf20Sopenharmony_ci#else 45218c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 45228c2ecf20Sopenharmony_ci#endif 45238c2ecf20Sopenharmony_ci} 45248c2ecf20Sopenharmony_ci 45258c2ecf20Sopenharmony_cistatic int io_madvise(struct io_kiocb *req, unsigned int issue_flags) 45268c2ecf20Sopenharmony_ci{ 45278c2ecf20Sopenharmony_ci#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) 45288c2ecf20Sopenharmony_ci struct io_madvise *ma = &req->madvise; 45298c2ecf20Sopenharmony_ci int ret; 45308c2ecf20Sopenharmony_ci 45318c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 45328c2ecf20Sopenharmony_ci return -EAGAIN; 45338c2ecf20Sopenharmony_ci 45348c2ecf20Sopenharmony_ci ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); 45358c2ecf20Sopenharmony_ci if (ret < 0) 45368c2ecf20Sopenharmony_ci req_set_fail(req); 45378c2ecf20Sopenharmony_ci io_req_complete(req, ret); 45388c2ecf20Sopenharmony_ci return 0; 45398c2ecf20Sopenharmony_ci#else 45408c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 45418c2ecf20Sopenharmony_ci#endif 45428c2ecf20Sopenharmony_ci} 45438c2ecf20Sopenharmony_ci 45448c2ecf20Sopenharmony_cistatic int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 45458c2ecf20Sopenharmony_ci{ 45468c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->addr || sqe->splice_fd_in) 45478c2ecf20Sopenharmony_ci return -EINVAL; 45488c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 45498c2ecf20Sopenharmony_ci return -EINVAL; 45508c2ecf20Sopenharmony_ci 45518c2ecf20Sopenharmony_ci req->fadvise.offset = READ_ONCE(sqe->off); 45528c2ecf20Sopenharmony_ci req->fadvise.len = READ_ONCE(sqe->len); 45538c2ecf20Sopenharmony_ci req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); 45548c2ecf20Sopenharmony_ci return 0; 45558c2ecf20Sopenharmony_ci} 45568c2ecf20Sopenharmony_ci 45578c2ecf20Sopenharmony_cistatic int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) 45588c2ecf20Sopenharmony_ci{ 45598c2ecf20Sopenharmony_ci struct io_fadvise *fa = &req->fadvise; 45608c2ecf20Sopenharmony_ci int ret; 45618c2ecf20Sopenharmony_ci 45628c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) { 45638c2ecf20Sopenharmony_ci switch (fa->advice) { 45648c2ecf20Sopenharmony_ci case POSIX_FADV_NORMAL: 45658c2ecf20Sopenharmony_ci case POSIX_FADV_RANDOM: 45668c2ecf20Sopenharmony_ci case POSIX_FADV_SEQUENTIAL: 45678c2ecf20Sopenharmony_ci break; 45688c2ecf20Sopenharmony_ci default: 45698c2ecf20Sopenharmony_ci return -EAGAIN; 45708c2ecf20Sopenharmony_ci } 45718c2ecf20Sopenharmony_ci } 45728c2ecf20Sopenharmony_ci 45738c2ecf20Sopenharmony_ci ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); 45748c2ecf20Sopenharmony_ci if (ret < 0) 45758c2ecf20Sopenharmony_ci req_set_fail(req); 45768c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 45778c2ecf20Sopenharmony_ci return 0; 45788c2ecf20Sopenharmony_ci} 45798c2ecf20Sopenharmony_ci 45808c2ecf20Sopenharmony_cistatic int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 45818c2ecf20Sopenharmony_ci{ 45828c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 45838c2ecf20Sopenharmony_ci return -EINVAL; 45848c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) 45858c2ecf20Sopenharmony_ci return -EINVAL; 45868c2ecf20Sopenharmony_ci if (req->flags & REQ_F_FIXED_FILE) 45878c2ecf20Sopenharmony_ci return -EBADF; 45888c2ecf20Sopenharmony_ci 45898c2ecf20Sopenharmony_ci req->statx.dfd = READ_ONCE(sqe->fd); 45908c2ecf20Sopenharmony_ci req->statx.mask = READ_ONCE(sqe->len); 45918c2ecf20Sopenharmony_ci req->statx.filename = u64_to_user_ptr(READ_ONCE(sqe->addr)); 45928c2ecf20Sopenharmony_ci req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 45938c2ecf20Sopenharmony_ci req->statx.flags = READ_ONCE(sqe->statx_flags); 45948c2ecf20Sopenharmony_ci 45958c2ecf20Sopenharmony_ci return 0; 45968c2ecf20Sopenharmony_ci} 45978c2ecf20Sopenharmony_ci 45988c2ecf20Sopenharmony_cistatic int io_statx(struct io_kiocb *req, unsigned int issue_flags) 45998c2ecf20Sopenharmony_ci{ 46008c2ecf20Sopenharmony_ci struct io_statx *ctx = &req->statx; 46018c2ecf20Sopenharmony_ci int ret; 46028c2ecf20Sopenharmony_ci 46038c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 46048c2ecf20Sopenharmony_ci return -EAGAIN; 46058c2ecf20Sopenharmony_ci 46068c2ecf20Sopenharmony_ci ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, 46078c2ecf20Sopenharmony_ci ctx->buffer); 46088c2ecf20Sopenharmony_ci 46098c2ecf20Sopenharmony_ci if (ret < 0) 46108c2ecf20Sopenharmony_ci req_set_fail(req); 46118c2ecf20Sopenharmony_ci io_req_complete(req, ret); 46128c2ecf20Sopenharmony_ci return 0; 46138c2ecf20Sopenharmony_ci} 46148c2ecf20Sopenharmony_ci 46158c2ecf20Sopenharmony_cistatic int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 46168c2ecf20Sopenharmony_ci{ 46178c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 46188c2ecf20Sopenharmony_ci return -EINVAL; 46198c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || 46208c2ecf20Sopenharmony_ci sqe->rw_flags || sqe->buf_index) 46218c2ecf20Sopenharmony_ci return -EINVAL; 46228c2ecf20Sopenharmony_ci if (req->flags & REQ_F_FIXED_FILE) 46238c2ecf20Sopenharmony_ci return -EBADF; 46248c2ecf20Sopenharmony_ci 46258c2ecf20Sopenharmony_ci req->close.fd = READ_ONCE(sqe->fd); 46268c2ecf20Sopenharmony_ci req->close.file_slot = READ_ONCE(sqe->file_index); 46278c2ecf20Sopenharmony_ci if (req->close.file_slot && req->close.fd) 46288c2ecf20Sopenharmony_ci return -EINVAL; 46298c2ecf20Sopenharmony_ci 46308c2ecf20Sopenharmony_ci return 0; 46318c2ecf20Sopenharmony_ci} 46328c2ecf20Sopenharmony_ci 46338c2ecf20Sopenharmony_cistatic int io_close(struct io_kiocb *req, unsigned int issue_flags) 46348c2ecf20Sopenharmony_ci{ 46358c2ecf20Sopenharmony_ci struct files_struct *files = current->files; 46368c2ecf20Sopenharmony_ci struct io_close *close = &req->close; 46378c2ecf20Sopenharmony_ci struct fdtable *fdt; 46388c2ecf20Sopenharmony_ci struct file *file = NULL; 46398c2ecf20Sopenharmony_ci int ret = -EBADF; 46408c2ecf20Sopenharmony_ci 46418c2ecf20Sopenharmony_ci if (req->close.file_slot) { 46428c2ecf20Sopenharmony_ci ret = io_close_fixed(req, issue_flags); 46438c2ecf20Sopenharmony_ci goto err; 46448c2ecf20Sopenharmony_ci } 46458c2ecf20Sopenharmony_ci 46468c2ecf20Sopenharmony_ci spin_lock(&files->file_lock); 46478c2ecf20Sopenharmony_ci fdt = files_fdtable(files); 46488c2ecf20Sopenharmony_ci if (close->fd >= fdt->max_fds) { 46498c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 46508c2ecf20Sopenharmony_ci goto err; 46518c2ecf20Sopenharmony_ci } 46528c2ecf20Sopenharmony_ci file = fdt->fd[close->fd]; 46538c2ecf20Sopenharmony_ci if (!file || file->f_op == &io_uring_fops) { 46548c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 46558c2ecf20Sopenharmony_ci file = NULL; 46568c2ecf20Sopenharmony_ci goto err; 46578c2ecf20Sopenharmony_ci } 46588c2ecf20Sopenharmony_ci 46598c2ecf20Sopenharmony_ci /* if the file has a flush method, be safe and punt to async */ 46608c2ecf20Sopenharmony_ci if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { 46618c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 46628c2ecf20Sopenharmony_ci return -EAGAIN; 46638c2ecf20Sopenharmony_ci } 46648c2ecf20Sopenharmony_ci 46658c2ecf20Sopenharmony_ci ret = __close_fd_get_file(close->fd, &file); 46668c2ecf20Sopenharmony_ci spin_unlock(&files->file_lock); 46678c2ecf20Sopenharmony_ci if (ret < 0) { 46688c2ecf20Sopenharmony_ci if (ret == -ENOENT) 46698c2ecf20Sopenharmony_ci ret = -EBADF; 46708c2ecf20Sopenharmony_ci goto err; 46718c2ecf20Sopenharmony_ci } 46728c2ecf20Sopenharmony_ci 46738c2ecf20Sopenharmony_ci /* No ->flush() or already async, safely close from here */ 46748c2ecf20Sopenharmony_ci ret = filp_close(file, current->files); 46758c2ecf20Sopenharmony_cierr: 46768c2ecf20Sopenharmony_ci if (ret < 0) 46778c2ecf20Sopenharmony_ci req_set_fail(req); 46788c2ecf20Sopenharmony_ci if (file) 46798c2ecf20Sopenharmony_ci fput(file); 46808c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 46818c2ecf20Sopenharmony_ci return 0; 46828c2ecf20Sopenharmony_ci} 46838c2ecf20Sopenharmony_ci 46848c2ecf20Sopenharmony_cistatic int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 46858c2ecf20Sopenharmony_ci{ 46868c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 46878c2ecf20Sopenharmony_ci 46888c2ecf20Sopenharmony_ci if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) 46898c2ecf20Sopenharmony_ci return -EINVAL; 46908c2ecf20Sopenharmony_ci if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index || 46918c2ecf20Sopenharmony_ci sqe->splice_fd_in)) 46928c2ecf20Sopenharmony_ci return -EINVAL; 46938c2ecf20Sopenharmony_ci 46948c2ecf20Sopenharmony_ci req->sync.off = READ_ONCE(sqe->off); 46958c2ecf20Sopenharmony_ci req->sync.len = READ_ONCE(sqe->len); 46968c2ecf20Sopenharmony_ci req->sync.flags = READ_ONCE(sqe->sync_range_flags); 46978c2ecf20Sopenharmony_ci return 0; 46988c2ecf20Sopenharmony_ci} 46998c2ecf20Sopenharmony_ci 47008c2ecf20Sopenharmony_cistatic int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) 47018c2ecf20Sopenharmony_ci{ 47028c2ecf20Sopenharmony_ci int ret; 47038c2ecf20Sopenharmony_ci 47048c2ecf20Sopenharmony_ci /* sync_file_range always requires a blocking context */ 47058c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 47068c2ecf20Sopenharmony_ci return -EAGAIN; 47078c2ecf20Sopenharmony_ci 47088c2ecf20Sopenharmony_ci ret = sync_file_range(req->file, req->sync.off, req->sync.len, 47098c2ecf20Sopenharmony_ci req->sync.flags); 47108c2ecf20Sopenharmony_ci if (ret < 0) 47118c2ecf20Sopenharmony_ci req_set_fail(req); 47128c2ecf20Sopenharmony_ci io_req_complete(req, ret); 47138c2ecf20Sopenharmony_ci return 0; 47148c2ecf20Sopenharmony_ci} 47158c2ecf20Sopenharmony_ci 47168c2ecf20Sopenharmony_ci#if defined(CONFIG_NET) 47178c2ecf20Sopenharmony_cistatic bool io_net_retry(struct socket *sock, int flags) 47188c2ecf20Sopenharmony_ci{ 47198c2ecf20Sopenharmony_ci if (!(flags & MSG_WAITALL)) 47208c2ecf20Sopenharmony_ci return false; 47218c2ecf20Sopenharmony_ci return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; 47228c2ecf20Sopenharmony_ci} 47238c2ecf20Sopenharmony_ci 47248c2ecf20Sopenharmony_cistatic int io_setup_async_msg(struct io_kiocb *req, 47258c2ecf20Sopenharmony_ci struct io_async_msghdr *kmsg) 47268c2ecf20Sopenharmony_ci{ 47278c2ecf20Sopenharmony_ci struct io_async_msghdr *async_msg = req->async_data; 47288c2ecf20Sopenharmony_ci 47298c2ecf20Sopenharmony_ci if (async_msg) 47308c2ecf20Sopenharmony_ci return -EAGAIN; 47318c2ecf20Sopenharmony_ci if (io_alloc_async_data(req)) { 47328c2ecf20Sopenharmony_ci kfree(kmsg->free_iov); 47338c2ecf20Sopenharmony_ci return -ENOMEM; 47348c2ecf20Sopenharmony_ci } 47358c2ecf20Sopenharmony_ci async_msg = req->async_data; 47368c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 47378c2ecf20Sopenharmony_ci memcpy(async_msg, kmsg, sizeof(*kmsg)); 47388c2ecf20Sopenharmony_ci if (async_msg->msg.msg_name) 47398c2ecf20Sopenharmony_ci async_msg->msg.msg_name = &async_msg->addr; 47408c2ecf20Sopenharmony_ci /* if were using fast_iov, set it to the new one */ 47418c2ecf20Sopenharmony_ci if (!kmsg->free_iov) { 47428c2ecf20Sopenharmony_ci size_t fast_idx = kmsg->msg.msg_iter.iov - kmsg->fast_iov; 47438c2ecf20Sopenharmony_ci async_msg->msg.msg_iter.iov = &async_msg->fast_iov[fast_idx]; 47448c2ecf20Sopenharmony_ci } 47458c2ecf20Sopenharmony_ci 47468c2ecf20Sopenharmony_ci return -EAGAIN; 47478c2ecf20Sopenharmony_ci} 47488c2ecf20Sopenharmony_ci 47498c2ecf20Sopenharmony_cistatic int io_sendmsg_copy_hdr(struct io_kiocb *req, 47508c2ecf20Sopenharmony_ci struct io_async_msghdr *iomsg) 47518c2ecf20Sopenharmony_ci{ 47528c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 47538c2ecf20Sopenharmony_ci int ret; 47548c2ecf20Sopenharmony_ci 47558c2ecf20Sopenharmony_ci iomsg->msg.msg_name = &iomsg->addr; 47568c2ecf20Sopenharmony_ci iomsg->free_iov = iomsg->fast_iov; 47578c2ecf20Sopenharmony_ci ret = sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, 47588c2ecf20Sopenharmony_ci req->sr_msg.msg_flags, &iomsg->free_iov); 47598c2ecf20Sopenharmony_ci /* save msg_control as sys_sendmsg() overwrites it */ 47608c2ecf20Sopenharmony_ci sr->msg_control = iomsg->msg.msg_control; 47618c2ecf20Sopenharmony_ci return ret; 47628c2ecf20Sopenharmony_ci} 47638c2ecf20Sopenharmony_ci 47648c2ecf20Sopenharmony_cistatic int io_sendmsg_prep_async(struct io_kiocb *req) 47658c2ecf20Sopenharmony_ci{ 47668c2ecf20Sopenharmony_ci int ret; 47678c2ecf20Sopenharmony_ci 47688c2ecf20Sopenharmony_ci ret = io_sendmsg_copy_hdr(req, req->async_data); 47698c2ecf20Sopenharmony_ci if (!ret) 47708c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 47718c2ecf20Sopenharmony_ci return ret; 47728c2ecf20Sopenharmony_ci} 47738c2ecf20Sopenharmony_ci 47748c2ecf20Sopenharmony_cistatic int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 47758c2ecf20Sopenharmony_ci{ 47768c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 47778c2ecf20Sopenharmony_ci 47788c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 47798c2ecf20Sopenharmony_ci return -EINVAL; 47808c2ecf20Sopenharmony_ci if (unlikely(sqe->addr2 || sqe->file_index)) 47818c2ecf20Sopenharmony_ci return -EINVAL; 47828c2ecf20Sopenharmony_ci if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) 47838c2ecf20Sopenharmony_ci return -EINVAL; 47848c2ecf20Sopenharmony_ci 47858c2ecf20Sopenharmony_ci sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 47868c2ecf20Sopenharmony_ci sr->len = READ_ONCE(sqe->len); 47878c2ecf20Sopenharmony_ci sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; 47888c2ecf20Sopenharmony_ci if (sr->msg_flags & MSG_DONTWAIT) 47898c2ecf20Sopenharmony_ci req->flags |= REQ_F_NOWAIT; 47908c2ecf20Sopenharmony_ci 47918c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 47928c2ecf20Sopenharmony_ci if (req->ctx->compat) 47938c2ecf20Sopenharmony_ci sr->msg_flags |= MSG_CMSG_COMPAT; 47948c2ecf20Sopenharmony_ci#endif 47958c2ecf20Sopenharmony_ci sr->done_io = 0; 47968c2ecf20Sopenharmony_ci return 0; 47978c2ecf20Sopenharmony_ci} 47988c2ecf20Sopenharmony_ci 47998c2ecf20Sopenharmony_cistatic int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) 48008c2ecf20Sopenharmony_ci{ 48018c2ecf20Sopenharmony_ci struct io_async_msghdr iomsg, *kmsg; 48028c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 48038c2ecf20Sopenharmony_ci struct socket *sock; 48048c2ecf20Sopenharmony_ci unsigned flags; 48058c2ecf20Sopenharmony_ci int min_ret = 0; 48068c2ecf20Sopenharmony_ci int ret; 48078c2ecf20Sopenharmony_ci 48088c2ecf20Sopenharmony_ci sock = sock_from_file(req->file, &ret); 48098c2ecf20Sopenharmony_ci if (unlikely(!sock)) 48108c2ecf20Sopenharmony_ci return ret; 48118c2ecf20Sopenharmony_ci 48128c2ecf20Sopenharmony_ci kmsg = req->async_data; 48138c2ecf20Sopenharmony_ci if (!kmsg) { 48148c2ecf20Sopenharmony_ci ret = io_sendmsg_copy_hdr(req, &iomsg); 48158c2ecf20Sopenharmony_ci if (ret) 48168c2ecf20Sopenharmony_ci return ret; 48178c2ecf20Sopenharmony_ci kmsg = &iomsg; 48188c2ecf20Sopenharmony_ci } else { 48198c2ecf20Sopenharmony_ci kmsg->msg.msg_control = sr->msg_control; 48208c2ecf20Sopenharmony_ci } 48218c2ecf20Sopenharmony_ci 48228c2ecf20Sopenharmony_ci flags = req->sr_msg.msg_flags; 48238c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 48248c2ecf20Sopenharmony_ci flags |= MSG_DONTWAIT; 48258c2ecf20Sopenharmony_ci if (flags & MSG_WAITALL) 48268c2ecf20Sopenharmony_ci min_ret = iov_iter_count(&kmsg->msg.msg_iter); 48278c2ecf20Sopenharmony_ci 48288c2ecf20Sopenharmony_ci ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); 48298c2ecf20Sopenharmony_ci 48308c2ecf20Sopenharmony_ci if (ret < min_ret) { 48318c2ecf20Sopenharmony_ci if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 48328c2ecf20Sopenharmony_ci return io_setup_async_msg(req, kmsg); 48338c2ecf20Sopenharmony_ci if (ret == -ERESTARTSYS) 48348c2ecf20Sopenharmony_ci ret = -EINTR; 48358c2ecf20Sopenharmony_ci if (ret > 0 && io_net_retry(sock, flags)) { 48368c2ecf20Sopenharmony_ci sr->done_io += ret; 48378c2ecf20Sopenharmony_ci req->flags |= REQ_F_PARTIAL_IO; 48388c2ecf20Sopenharmony_ci return io_setup_async_msg(req, kmsg); 48398c2ecf20Sopenharmony_ci } 48408c2ecf20Sopenharmony_ci req_set_fail(req); 48418c2ecf20Sopenharmony_ci } 48428c2ecf20Sopenharmony_ci /* fast path, check for non-NULL to avoid function call */ 48438c2ecf20Sopenharmony_ci if (kmsg->free_iov) 48448c2ecf20Sopenharmony_ci kfree(kmsg->free_iov); 48458c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_NEED_CLEANUP; 48468c2ecf20Sopenharmony_ci if (ret >= 0) 48478c2ecf20Sopenharmony_ci ret += sr->done_io; 48488c2ecf20Sopenharmony_ci else if (sr->done_io) 48498c2ecf20Sopenharmony_ci ret = sr->done_io; 48508c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 48518c2ecf20Sopenharmony_ci return 0; 48528c2ecf20Sopenharmony_ci} 48538c2ecf20Sopenharmony_ci 48548c2ecf20Sopenharmony_cistatic int io_send(struct io_kiocb *req, unsigned int issue_flags) 48558c2ecf20Sopenharmony_ci{ 48568c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 48578c2ecf20Sopenharmony_ci struct msghdr msg; 48588c2ecf20Sopenharmony_ci struct iovec iov; 48598c2ecf20Sopenharmony_ci struct socket *sock; 48608c2ecf20Sopenharmony_ci unsigned flags; 48618c2ecf20Sopenharmony_ci int min_ret = 0; 48628c2ecf20Sopenharmony_ci int ret; 48638c2ecf20Sopenharmony_ci 48648c2ecf20Sopenharmony_ci sock = sock_from_file(req->file, &ret); 48658c2ecf20Sopenharmony_ci if (unlikely(!sock)) 48668c2ecf20Sopenharmony_ci return ret; 48678c2ecf20Sopenharmony_ci 48688c2ecf20Sopenharmony_ci ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); 48698c2ecf20Sopenharmony_ci if (unlikely(ret)) 48708c2ecf20Sopenharmony_ci return ret; 48718c2ecf20Sopenharmony_ci 48728c2ecf20Sopenharmony_ci msg.msg_name = NULL; 48738c2ecf20Sopenharmony_ci msg.msg_control = NULL; 48748c2ecf20Sopenharmony_ci msg.msg_controllen = 0; 48758c2ecf20Sopenharmony_ci msg.msg_namelen = 0; 48768c2ecf20Sopenharmony_ci 48778c2ecf20Sopenharmony_ci flags = req->sr_msg.msg_flags; 48788c2ecf20Sopenharmony_ci if (issue_flags & IO_URING_F_NONBLOCK) 48798c2ecf20Sopenharmony_ci flags |= MSG_DONTWAIT; 48808c2ecf20Sopenharmony_ci if (flags & MSG_WAITALL) 48818c2ecf20Sopenharmony_ci min_ret = iov_iter_count(&msg.msg_iter); 48828c2ecf20Sopenharmony_ci 48838c2ecf20Sopenharmony_ci msg.msg_flags = flags; 48848c2ecf20Sopenharmony_ci ret = sock_sendmsg(sock, &msg); 48858c2ecf20Sopenharmony_ci if (ret < min_ret) { 48868c2ecf20Sopenharmony_ci if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) 48878c2ecf20Sopenharmony_ci return -EAGAIN; 48888c2ecf20Sopenharmony_ci if (ret == -ERESTARTSYS) 48898c2ecf20Sopenharmony_ci ret = -EINTR; 48908c2ecf20Sopenharmony_ci if (ret > 0 && io_net_retry(sock, flags)) { 48918c2ecf20Sopenharmony_ci sr->len -= ret; 48928c2ecf20Sopenharmony_ci sr->buf += ret; 48938c2ecf20Sopenharmony_ci sr->done_io += ret; 48948c2ecf20Sopenharmony_ci req->flags |= REQ_F_PARTIAL_IO; 48958c2ecf20Sopenharmony_ci return -EAGAIN; 48968c2ecf20Sopenharmony_ci } 48978c2ecf20Sopenharmony_ci req_set_fail(req); 48988c2ecf20Sopenharmony_ci } 48998c2ecf20Sopenharmony_ci if (ret >= 0) 49008c2ecf20Sopenharmony_ci ret += sr->done_io; 49018c2ecf20Sopenharmony_ci else if (sr->done_io) 49028c2ecf20Sopenharmony_ci ret = sr->done_io; 49038c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 49048c2ecf20Sopenharmony_ci return 0; 49058c2ecf20Sopenharmony_ci} 49068c2ecf20Sopenharmony_ci 49078c2ecf20Sopenharmony_cistatic int __io_recvmsg_copy_hdr(struct io_kiocb *req, 49088c2ecf20Sopenharmony_ci struct io_async_msghdr *iomsg) 49098c2ecf20Sopenharmony_ci{ 49108c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 49118c2ecf20Sopenharmony_ci struct iovec __user *uiov; 49128c2ecf20Sopenharmony_ci size_t iov_len; 49138c2ecf20Sopenharmony_ci int ret; 49148c2ecf20Sopenharmony_ci 49158c2ecf20Sopenharmony_ci ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg, 49168c2ecf20Sopenharmony_ci &iomsg->uaddr, &uiov, &iov_len); 49178c2ecf20Sopenharmony_ci if (ret) 49188c2ecf20Sopenharmony_ci return ret; 49198c2ecf20Sopenharmony_ci 49208c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECT) { 49218c2ecf20Sopenharmony_ci if (iov_len > 1) 49228c2ecf20Sopenharmony_ci return -EINVAL; 49238c2ecf20Sopenharmony_ci if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov))) 49248c2ecf20Sopenharmony_ci return -EFAULT; 49258c2ecf20Sopenharmony_ci sr->len = iomsg->fast_iov[0].iov_len; 49268c2ecf20Sopenharmony_ci iomsg->free_iov = NULL; 49278c2ecf20Sopenharmony_ci } else { 49288c2ecf20Sopenharmony_ci iomsg->free_iov = iomsg->fast_iov; 49298c2ecf20Sopenharmony_ci ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV, 49308c2ecf20Sopenharmony_ci &iomsg->free_iov, &iomsg->msg.msg_iter, 49318c2ecf20Sopenharmony_ci false); 49328c2ecf20Sopenharmony_ci if (ret > 0) 49338c2ecf20Sopenharmony_ci ret = 0; 49348c2ecf20Sopenharmony_ci } 49358c2ecf20Sopenharmony_ci 49368c2ecf20Sopenharmony_ci return ret; 49378c2ecf20Sopenharmony_ci} 49388c2ecf20Sopenharmony_ci 49398c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 49408c2ecf20Sopenharmony_cistatic int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, 49418c2ecf20Sopenharmony_ci struct io_async_msghdr *iomsg) 49428c2ecf20Sopenharmony_ci{ 49438c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 49448c2ecf20Sopenharmony_ci struct compat_iovec __user *uiov; 49458c2ecf20Sopenharmony_ci compat_uptr_t ptr; 49468c2ecf20Sopenharmony_ci compat_size_t len; 49478c2ecf20Sopenharmony_ci int ret; 49488c2ecf20Sopenharmony_ci 49498c2ecf20Sopenharmony_ci ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr, 49508c2ecf20Sopenharmony_ci &ptr, &len); 49518c2ecf20Sopenharmony_ci if (ret) 49528c2ecf20Sopenharmony_ci return ret; 49538c2ecf20Sopenharmony_ci 49548c2ecf20Sopenharmony_ci uiov = compat_ptr(ptr); 49558c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECT) { 49568c2ecf20Sopenharmony_ci compat_ssize_t clen; 49578c2ecf20Sopenharmony_ci 49588c2ecf20Sopenharmony_ci if (len > 1) 49598c2ecf20Sopenharmony_ci return -EINVAL; 49608c2ecf20Sopenharmony_ci if (!access_ok(uiov, sizeof(*uiov))) 49618c2ecf20Sopenharmony_ci return -EFAULT; 49628c2ecf20Sopenharmony_ci if (__get_user(clen, &uiov->iov_len)) 49638c2ecf20Sopenharmony_ci return -EFAULT; 49648c2ecf20Sopenharmony_ci if (clen < 0) 49658c2ecf20Sopenharmony_ci return -EINVAL; 49668c2ecf20Sopenharmony_ci sr->len = clen; 49678c2ecf20Sopenharmony_ci iomsg->free_iov = NULL; 49688c2ecf20Sopenharmony_ci } else { 49698c2ecf20Sopenharmony_ci iomsg->free_iov = iomsg->fast_iov; 49708c2ecf20Sopenharmony_ci ret = __import_iovec(READ, (struct iovec __user *)uiov, len, 49718c2ecf20Sopenharmony_ci UIO_FASTIOV, &iomsg->free_iov, 49728c2ecf20Sopenharmony_ci &iomsg->msg.msg_iter, true); 49738c2ecf20Sopenharmony_ci if (ret < 0) 49748c2ecf20Sopenharmony_ci return ret; 49758c2ecf20Sopenharmony_ci } 49768c2ecf20Sopenharmony_ci 49778c2ecf20Sopenharmony_ci return 0; 49788c2ecf20Sopenharmony_ci} 49798c2ecf20Sopenharmony_ci#endif 49808c2ecf20Sopenharmony_ci 49818c2ecf20Sopenharmony_cistatic int io_recvmsg_copy_hdr(struct io_kiocb *req, 49828c2ecf20Sopenharmony_ci struct io_async_msghdr *iomsg) 49838c2ecf20Sopenharmony_ci{ 49848c2ecf20Sopenharmony_ci iomsg->msg.msg_name = &iomsg->addr; 49858c2ecf20Sopenharmony_ci 49868c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 49878c2ecf20Sopenharmony_ci if (req->ctx->compat) 49888c2ecf20Sopenharmony_ci return __io_compat_recvmsg_copy_hdr(req, iomsg); 49898c2ecf20Sopenharmony_ci#endif 49908c2ecf20Sopenharmony_ci 49918c2ecf20Sopenharmony_ci return __io_recvmsg_copy_hdr(req, iomsg); 49928c2ecf20Sopenharmony_ci} 49938c2ecf20Sopenharmony_ci 49948c2ecf20Sopenharmony_cistatic struct io_buffer *io_recv_buffer_select(struct io_kiocb *req, 49958c2ecf20Sopenharmony_ci bool needs_lock) 49968c2ecf20Sopenharmony_ci{ 49978c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 49988c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 49998c2ecf20Sopenharmony_ci 50008c2ecf20Sopenharmony_ci kbuf = io_buffer_select(req, &sr->len, sr->bgid, sr->kbuf, needs_lock); 50018c2ecf20Sopenharmony_ci if (IS_ERR(kbuf)) 50028c2ecf20Sopenharmony_ci return kbuf; 50038c2ecf20Sopenharmony_ci 50048c2ecf20Sopenharmony_ci sr->kbuf = kbuf; 50058c2ecf20Sopenharmony_ci req->flags |= REQ_F_BUFFER_SELECTED; 50068c2ecf20Sopenharmony_ci return kbuf; 50078c2ecf20Sopenharmony_ci} 50088c2ecf20Sopenharmony_ci 50098c2ecf20Sopenharmony_cistatic inline unsigned int io_put_recv_kbuf(struct io_kiocb *req) 50108c2ecf20Sopenharmony_ci{ 50118c2ecf20Sopenharmony_ci return io_put_kbuf(req, req->sr_msg.kbuf); 50128c2ecf20Sopenharmony_ci} 50138c2ecf20Sopenharmony_ci 50148c2ecf20Sopenharmony_cistatic int io_recvmsg_prep_async(struct io_kiocb *req) 50158c2ecf20Sopenharmony_ci{ 50168c2ecf20Sopenharmony_ci int ret; 50178c2ecf20Sopenharmony_ci 50188c2ecf20Sopenharmony_ci ret = io_recvmsg_copy_hdr(req, req->async_data); 50198c2ecf20Sopenharmony_ci if (!ret) 50208c2ecf20Sopenharmony_ci req->flags |= REQ_F_NEED_CLEANUP; 50218c2ecf20Sopenharmony_ci return ret; 50228c2ecf20Sopenharmony_ci} 50238c2ecf20Sopenharmony_ci 50248c2ecf20Sopenharmony_cistatic int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 50258c2ecf20Sopenharmony_ci{ 50268c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 50278c2ecf20Sopenharmony_ci 50288c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 50298c2ecf20Sopenharmony_ci return -EINVAL; 50308c2ecf20Sopenharmony_ci if (unlikely(sqe->addr2 || sqe->file_index)) 50318c2ecf20Sopenharmony_ci return -EINVAL; 50328c2ecf20Sopenharmony_ci if (unlikely(sqe->addr2 || sqe->file_index || sqe->ioprio)) 50338c2ecf20Sopenharmony_ci return -EINVAL; 50348c2ecf20Sopenharmony_ci 50358c2ecf20Sopenharmony_ci sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); 50368c2ecf20Sopenharmony_ci sr->len = READ_ONCE(sqe->len); 50378c2ecf20Sopenharmony_ci sr->bgid = READ_ONCE(sqe->buf_group); 50388c2ecf20Sopenharmony_ci sr->msg_flags = READ_ONCE(sqe->msg_flags); 50398c2ecf20Sopenharmony_ci if (sr->msg_flags & MSG_DONTWAIT) 50408c2ecf20Sopenharmony_ci req->flags |= REQ_F_NOWAIT; 50418c2ecf20Sopenharmony_ci 50428c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 50438c2ecf20Sopenharmony_ci if (req->ctx->compat) 50448c2ecf20Sopenharmony_ci sr->msg_flags |= MSG_CMSG_COMPAT; 50458c2ecf20Sopenharmony_ci#endif 50468c2ecf20Sopenharmony_ci sr->done_io = 0; 50478c2ecf20Sopenharmony_ci return 0; 50488c2ecf20Sopenharmony_ci} 50498c2ecf20Sopenharmony_ci 50508c2ecf20Sopenharmony_cistatic int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) 50518c2ecf20Sopenharmony_ci{ 50528c2ecf20Sopenharmony_ci struct io_async_msghdr iomsg, *kmsg; 50538c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 50548c2ecf20Sopenharmony_ci struct socket *sock; 50558c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 50568c2ecf20Sopenharmony_ci unsigned flags; 50578c2ecf20Sopenharmony_ci int min_ret = 0; 50588c2ecf20Sopenharmony_ci int ret, cflags = 0; 50598c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 50608c2ecf20Sopenharmony_ci 50618c2ecf20Sopenharmony_ci sock = sock_from_file(req->file, &ret); 50628c2ecf20Sopenharmony_ci if (unlikely(!sock)) 50638c2ecf20Sopenharmony_ci return ret; 50648c2ecf20Sopenharmony_ci 50658c2ecf20Sopenharmony_ci kmsg = req->async_data; 50668c2ecf20Sopenharmony_ci if (!kmsg) { 50678c2ecf20Sopenharmony_ci ret = io_recvmsg_copy_hdr(req, &iomsg); 50688c2ecf20Sopenharmony_ci if (ret) 50698c2ecf20Sopenharmony_ci return ret; 50708c2ecf20Sopenharmony_ci kmsg = &iomsg; 50718c2ecf20Sopenharmony_ci } 50728c2ecf20Sopenharmony_ci 50738c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECT) { 50748c2ecf20Sopenharmony_ci kbuf = io_recv_buffer_select(req, !force_nonblock); 50758c2ecf20Sopenharmony_ci if (IS_ERR(kbuf)) 50768c2ecf20Sopenharmony_ci return PTR_ERR(kbuf); 50778c2ecf20Sopenharmony_ci kmsg->fast_iov[0].iov_base = u64_to_user_ptr(kbuf->addr); 50788c2ecf20Sopenharmony_ci kmsg->fast_iov[0].iov_len = req->sr_msg.len; 50798c2ecf20Sopenharmony_ci iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 50808c2ecf20Sopenharmony_ci 1, req->sr_msg.len); 50818c2ecf20Sopenharmony_ci } 50828c2ecf20Sopenharmony_ci 50838c2ecf20Sopenharmony_ci flags = req->sr_msg.msg_flags; 50848c2ecf20Sopenharmony_ci if (force_nonblock) 50858c2ecf20Sopenharmony_ci flags |= MSG_DONTWAIT; 50868c2ecf20Sopenharmony_ci if (flags & MSG_WAITALL && !kmsg->msg.msg_controllen) 50878c2ecf20Sopenharmony_ci min_ret = iov_iter_count(&kmsg->msg.msg_iter); 50888c2ecf20Sopenharmony_ci 50898c2ecf20Sopenharmony_ci ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, 50908c2ecf20Sopenharmony_ci kmsg->uaddr, flags); 50918c2ecf20Sopenharmony_ci if (ret < min_ret) { 50928c2ecf20Sopenharmony_ci if (ret == -EAGAIN && force_nonblock) 50938c2ecf20Sopenharmony_ci return io_setup_async_msg(req, kmsg); 50948c2ecf20Sopenharmony_ci if (ret == -ERESTARTSYS) 50958c2ecf20Sopenharmony_ci ret = -EINTR; 50968c2ecf20Sopenharmony_ci if (ret > 0 && io_net_retry(sock, flags)) { 50978c2ecf20Sopenharmony_ci kmsg->msg.msg_controllen = 0; 50988c2ecf20Sopenharmony_ci kmsg->msg.msg_control = NULL; 50998c2ecf20Sopenharmony_ci sr->done_io += ret; 51008c2ecf20Sopenharmony_ci req->flags |= REQ_F_PARTIAL_IO; 51018c2ecf20Sopenharmony_ci return io_setup_async_msg(req, kmsg); 51028c2ecf20Sopenharmony_ci } 51038c2ecf20Sopenharmony_ci req_set_fail(req); 51048c2ecf20Sopenharmony_ci } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 51058c2ecf20Sopenharmony_ci req_set_fail(req); 51068c2ecf20Sopenharmony_ci } 51078c2ecf20Sopenharmony_ci 51088c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECTED) 51098c2ecf20Sopenharmony_ci cflags = io_put_recv_kbuf(req); 51108c2ecf20Sopenharmony_ci /* fast path, check for non-NULL to avoid function call */ 51118c2ecf20Sopenharmony_ci if (kmsg->free_iov) 51128c2ecf20Sopenharmony_ci kfree(kmsg->free_iov); 51138c2ecf20Sopenharmony_ci req->flags &= ~REQ_F_NEED_CLEANUP; 51148c2ecf20Sopenharmony_ci if (ret >= 0) 51158c2ecf20Sopenharmony_ci ret += sr->done_io; 51168c2ecf20Sopenharmony_ci else if (sr->done_io) 51178c2ecf20Sopenharmony_ci ret = sr->done_io; 51188c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, cflags); 51198c2ecf20Sopenharmony_ci return 0; 51208c2ecf20Sopenharmony_ci} 51218c2ecf20Sopenharmony_ci 51228c2ecf20Sopenharmony_cistatic int io_recv(struct io_kiocb *req, unsigned int issue_flags) 51238c2ecf20Sopenharmony_ci{ 51248c2ecf20Sopenharmony_ci struct io_buffer *kbuf; 51258c2ecf20Sopenharmony_ci struct io_sr_msg *sr = &req->sr_msg; 51268c2ecf20Sopenharmony_ci struct msghdr msg; 51278c2ecf20Sopenharmony_ci void __user *buf = sr->buf; 51288c2ecf20Sopenharmony_ci struct socket *sock; 51298c2ecf20Sopenharmony_ci struct iovec iov; 51308c2ecf20Sopenharmony_ci unsigned flags; 51318c2ecf20Sopenharmony_ci int min_ret = 0; 51328c2ecf20Sopenharmony_ci int ret, cflags = 0; 51338c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 51348c2ecf20Sopenharmony_ci 51358c2ecf20Sopenharmony_ci sock = sock_from_file(req->file, &ret); 51368c2ecf20Sopenharmony_ci if (unlikely(!sock)) 51378c2ecf20Sopenharmony_ci return ret; 51388c2ecf20Sopenharmony_ci 51398c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECT) { 51408c2ecf20Sopenharmony_ci kbuf = io_recv_buffer_select(req, !force_nonblock); 51418c2ecf20Sopenharmony_ci if (IS_ERR(kbuf)) 51428c2ecf20Sopenharmony_ci return PTR_ERR(kbuf); 51438c2ecf20Sopenharmony_ci buf = u64_to_user_ptr(kbuf->addr); 51448c2ecf20Sopenharmony_ci } 51458c2ecf20Sopenharmony_ci 51468c2ecf20Sopenharmony_ci ret = import_single_range(READ, buf, sr->len, &iov, &msg.msg_iter); 51478c2ecf20Sopenharmony_ci if (unlikely(ret)) 51488c2ecf20Sopenharmony_ci goto out_free; 51498c2ecf20Sopenharmony_ci 51508c2ecf20Sopenharmony_ci msg.msg_name = NULL; 51518c2ecf20Sopenharmony_ci msg.msg_control = NULL; 51528c2ecf20Sopenharmony_ci msg.msg_controllen = 0; 51538c2ecf20Sopenharmony_ci msg.msg_namelen = 0; 51548c2ecf20Sopenharmony_ci msg.msg_iocb = NULL; 51558c2ecf20Sopenharmony_ci msg.msg_flags = 0; 51568c2ecf20Sopenharmony_ci 51578c2ecf20Sopenharmony_ci flags = req->sr_msg.msg_flags; 51588c2ecf20Sopenharmony_ci if (force_nonblock) 51598c2ecf20Sopenharmony_ci flags |= MSG_DONTWAIT; 51608c2ecf20Sopenharmony_ci if (flags & MSG_WAITALL) 51618c2ecf20Sopenharmony_ci min_ret = iov_iter_count(&msg.msg_iter); 51628c2ecf20Sopenharmony_ci 51638c2ecf20Sopenharmony_ci ret = sock_recvmsg(sock, &msg, flags); 51648c2ecf20Sopenharmony_ci if (ret < min_ret) { 51658c2ecf20Sopenharmony_ci if (ret == -EAGAIN && force_nonblock) 51668c2ecf20Sopenharmony_ci return -EAGAIN; 51678c2ecf20Sopenharmony_ci if (ret == -ERESTARTSYS) 51688c2ecf20Sopenharmony_ci ret = -EINTR; 51698c2ecf20Sopenharmony_ci if (ret > 0 && io_net_retry(sock, flags)) { 51708c2ecf20Sopenharmony_ci sr->len -= ret; 51718c2ecf20Sopenharmony_ci sr->buf += ret; 51728c2ecf20Sopenharmony_ci sr->done_io += ret; 51738c2ecf20Sopenharmony_ci req->flags |= REQ_F_PARTIAL_IO; 51748c2ecf20Sopenharmony_ci return -EAGAIN; 51758c2ecf20Sopenharmony_ci } 51768c2ecf20Sopenharmony_ci req_set_fail(req); 51778c2ecf20Sopenharmony_ci } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { 51788c2ecf20Sopenharmony_ciout_free: 51798c2ecf20Sopenharmony_ci req_set_fail(req); 51808c2ecf20Sopenharmony_ci } 51818c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECTED) 51828c2ecf20Sopenharmony_ci cflags = io_put_recv_kbuf(req); 51838c2ecf20Sopenharmony_ci if (ret >= 0) 51848c2ecf20Sopenharmony_ci ret += sr->done_io; 51858c2ecf20Sopenharmony_ci else if (sr->done_io) 51868c2ecf20Sopenharmony_ci ret = sr->done_io; 51878c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, cflags); 51888c2ecf20Sopenharmony_ci return 0; 51898c2ecf20Sopenharmony_ci} 51908c2ecf20Sopenharmony_ci 51918c2ecf20Sopenharmony_cistatic int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 51928c2ecf20Sopenharmony_ci{ 51938c2ecf20Sopenharmony_ci struct io_accept *accept = &req->accept; 51948c2ecf20Sopenharmony_ci 51958c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 51968c2ecf20Sopenharmony_ci return -EINVAL; 51978c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->len || sqe->buf_index) 51988c2ecf20Sopenharmony_ci return -EINVAL; 51998c2ecf20Sopenharmony_ci 52008c2ecf20Sopenharmony_ci accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 52018c2ecf20Sopenharmony_ci accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); 52028c2ecf20Sopenharmony_ci accept->flags = READ_ONCE(sqe->accept_flags); 52038c2ecf20Sopenharmony_ci accept->nofile = rlimit(RLIMIT_NOFILE); 52048c2ecf20Sopenharmony_ci 52058c2ecf20Sopenharmony_ci accept->file_slot = READ_ONCE(sqe->file_index); 52068c2ecf20Sopenharmony_ci if (accept->file_slot && (accept->flags & SOCK_CLOEXEC)) 52078c2ecf20Sopenharmony_ci return -EINVAL; 52088c2ecf20Sopenharmony_ci if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 52098c2ecf20Sopenharmony_ci return -EINVAL; 52108c2ecf20Sopenharmony_ci if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) 52118c2ecf20Sopenharmony_ci accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; 52128c2ecf20Sopenharmony_ci return 0; 52138c2ecf20Sopenharmony_ci} 52148c2ecf20Sopenharmony_ci 52158c2ecf20Sopenharmony_cistatic int io_accept(struct io_kiocb *req, unsigned int issue_flags) 52168c2ecf20Sopenharmony_ci{ 52178c2ecf20Sopenharmony_ci struct io_accept *accept = &req->accept; 52188c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 52198c2ecf20Sopenharmony_ci unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; 52208c2ecf20Sopenharmony_ci bool fixed = !!accept->file_slot; 52218c2ecf20Sopenharmony_ci struct file *file; 52228c2ecf20Sopenharmony_ci int ret, fd; 52238c2ecf20Sopenharmony_ci 52248c2ecf20Sopenharmony_ci if (!fixed) { 52258c2ecf20Sopenharmony_ci fd = __get_unused_fd_flags(accept->flags, accept->nofile); 52268c2ecf20Sopenharmony_ci if (unlikely(fd < 0)) 52278c2ecf20Sopenharmony_ci return fd; 52288c2ecf20Sopenharmony_ci } 52298c2ecf20Sopenharmony_ci file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, 52308c2ecf20Sopenharmony_ci accept->flags); 52318c2ecf20Sopenharmony_ci 52328c2ecf20Sopenharmony_ci if (IS_ERR(file)) { 52338c2ecf20Sopenharmony_ci if (!fixed) 52348c2ecf20Sopenharmony_ci put_unused_fd(fd); 52358c2ecf20Sopenharmony_ci ret = PTR_ERR(file); 52368c2ecf20Sopenharmony_ci /* safe to retry */ 52378c2ecf20Sopenharmony_ci req->flags |= REQ_F_PARTIAL_IO; 52388c2ecf20Sopenharmony_ci if (ret == -EAGAIN && force_nonblock) 52398c2ecf20Sopenharmony_ci return -EAGAIN; 52408c2ecf20Sopenharmony_ci if (ret == -ERESTARTSYS) 52418c2ecf20Sopenharmony_ci ret = -EINTR; 52428c2ecf20Sopenharmony_ci req_set_fail(req); 52438c2ecf20Sopenharmony_ci } else if (!fixed) { 52448c2ecf20Sopenharmony_ci fd_install(fd, file); 52458c2ecf20Sopenharmony_ci ret = fd; 52468c2ecf20Sopenharmony_ci } else { 52478c2ecf20Sopenharmony_ci ret = io_install_fixed_file(req, file, issue_flags, 52488c2ecf20Sopenharmony_ci accept->file_slot - 1); 52498c2ecf20Sopenharmony_ci } 52508c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 52518c2ecf20Sopenharmony_ci return 0; 52528c2ecf20Sopenharmony_ci} 52538c2ecf20Sopenharmony_ci 52548c2ecf20Sopenharmony_cistatic int io_connect_prep_async(struct io_kiocb *req) 52558c2ecf20Sopenharmony_ci{ 52568c2ecf20Sopenharmony_ci struct io_async_connect *io = req->async_data; 52578c2ecf20Sopenharmony_ci struct io_connect *conn = &req->connect; 52588c2ecf20Sopenharmony_ci 52598c2ecf20Sopenharmony_ci return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); 52608c2ecf20Sopenharmony_ci} 52618c2ecf20Sopenharmony_ci 52628c2ecf20Sopenharmony_cistatic int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 52638c2ecf20Sopenharmony_ci{ 52648c2ecf20Sopenharmony_ci struct io_connect *conn = &req->connect; 52658c2ecf20Sopenharmony_ci 52668c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 52678c2ecf20Sopenharmony_ci return -EINVAL; 52688c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags || 52698c2ecf20Sopenharmony_ci sqe->splice_fd_in) 52708c2ecf20Sopenharmony_ci return -EINVAL; 52718c2ecf20Sopenharmony_ci 52728c2ecf20Sopenharmony_ci conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); 52738c2ecf20Sopenharmony_ci conn->addr_len = READ_ONCE(sqe->addr2); 52748c2ecf20Sopenharmony_ci return 0; 52758c2ecf20Sopenharmony_ci} 52768c2ecf20Sopenharmony_ci 52778c2ecf20Sopenharmony_cistatic int io_connect(struct io_kiocb *req, unsigned int issue_flags) 52788c2ecf20Sopenharmony_ci{ 52798c2ecf20Sopenharmony_ci struct io_async_connect __io, *io; 52808c2ecf20Sopenharmony_ci unsigned file_flags; 52818c2ecf20Sopenharmony_ci int ret; 52828c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 52838c2ecf20Sopenharmony_ci 52848c2ecf20Sopenharmony_ci if (req->async_data) { 52858c2ecf20Sopenharmony_ci io = req->async_data; 52868c2ecf20Sopenharmony_ci } else { 52878c2ecf20Sopenharmony_ci ret = move_addr_to_kernel(req->connect.addr, 52888c2ecf20Sopenharmony_ci req->connect.addr_len, 52898c2ecf20Sopenharmony_ci &__io.address); 52908c2ecf20Sopenharmony_ci if (ret) 52918c2ecf20Sopenharmony_ci goto out; 52928c2ecf20Sopenharmony_ci io = &__io; 52938c2ecf20Sopenharmony_ci } 52948c2ecf20Sopenharmony_ci 52958c2ecf20Sopenharmony_ci file_flags = force_nonblock ? O_NONBLOCK : 0; 52968c2ecf20Sopenharmony_ci 52978c2ecf20Sopenharmony_ci ret = __sys_connect_file(req->file, &io->address, 52988c2ecf20Sopenharmony_ci req->connect.addr_len, file_flags); 52998c2ecf20Sopenharmony_ci if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { 53008c2ecf20Sopenharmony_ci if (req->async_data) 53018c2ecf20Sopenharmony_ci return -EAGAIN; 53028c2ecf20Sopenharmony_ci if (io_alloc_async_data(req)) { 53038c2ecf20Sopenharmony_ci ret = -ENOMEM; 53048c2ecf20Sopenharmony_ci goto out; 53058c2ecf20Sopenharmony_ci } 53068c2ecf20Sopenharmony_ci memcpy(req->async_data, &__io, sizeof(__io)); 53078c2ecf20Sopenharmony_ci return -EAGAIN; 53088c2ecf20Sopenharmony_ci } 53098c2ecf20Sopenharmony_ci if (ret == -ERESTARTSYS) 53108c2ecf20Sopenharmony_ci ret = -EINTR; 53118c2ecf20Sopenharmony_ciout: 53128c2ecf20Sopenharmony_ci if (ret < 0) 53138c2ecf20Sopenharmony_ci req_set_fail(req); 53148c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 53158c2ecf20Sopenharmony_ci return 0; 53168c2ecf20Sopenharmony_ci} 53178c2ecf20Sopenharmony_ci#else /* !CONFIG_NET */ 53188c2ecf20Sopenharmony_ci#define IO_NETOP_FN(op) \ 53198c2ecf20Sopenharmony_cistatic int io_##op(struct io_kiocb *req, unsigned int issue_flags) \ 53208c2ecf20Sopenharmony_ci{ \ 53218c2ecf20Sopenharmony_ci return -EOPNOTSUPP; \ 53228c2ecf20Sopenharmony_ci} 53238c2ecf20Sopenharmony_ci 53248c2ecf20Sopenharmony_ci#define IO_NETOP_PREP(op) \ 53258c2ecf20Sopenharmony_ciIO_NETOP_FN(op) \ 53268c2ecf20Sopenharmony_cistatic int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \ 53278c2ecf20Sopenharmony_ci{ \ 53288c2ecf20Sopenharmony_ci return -EOPNOTSUPP; \ 53298c2ecf20Sopenharmony_ci} \ 53308c2ecf20Sopenharmony_ci 53318c2ecf20Sopenharmony_ci#define IO_NETOP_PREP_ASYNC(op) \ 53328c2ecf20Sopenharmony_ciIO_NETOP_PREP(op) \ 53338c2ecf20Sopenharmony_cistatic int io_##op##_prep_async(struct io_kiocb *req) \ 53348c2ecf20Sopenharmony_ci{ \ 53358c2ecf20Sopenharmony_ci return -EOPNOTSUPP; \ 53368c2ecf20Sopenharmony_ci} 53378c2ecf20Sopenharmony_ci 53388c2ecf20Sopenharmony_ciIO_NETOP_PREP_ASYNC(sendmsg); 53398c2ecf20Sopenharmony_ciIO_NETOP_PREP_ASYNC(recvmsg); 53408c2ecf20Sopenharmony_ciIO_NETOP_PREP_ASYNC(connect); 53418c2ecf20Sopenharmony_ciIO_NETOP_PREP(accept); 53428c2ecf20Sopenharmony_ciIO_NETOP_FN(send); 53438c2ecf20Sopenharmony_ciIO_NETOP_FN(recv); 53448c2ecf20Sopenharmony_ci#endif /* CONFIG_NET */ 53458c2ecf20Sopenharmony_ci 53468c2ecf20Sopenharmony_cistruct io_poll_table { 53478c2ecf20Sopenharmony_ci struct poll_table_struct pt; 53488c2ecf20Sopenharmony_ci struct io_kiocb *req; 53498c2ecf20Sopenharmony_ci int nr_entries; 53508c2ecf20Sopenharmony_ci int error; 53518c2ecf20Sopenharmony_ci}; 53528c2ecf20Sopenharmony_ci 53538c2ecf20Sopenharmony_ci#define IO_POLL_CANCEL_FLAG BIT(31) 53548c2ecf20Sopenharmony_ci#define IO_POLL_RETRY_FLAG BIT(30) 53558c2ecf20Sopenharmony_ci#define IO_POLL_REF_MASK GENMASK(29, 0) 53568c2ecf20Sopenharmony_ci 53578c2ecf20Sopenharmony_ci/* 53588c2ecf20Sopenharmony_ci * We usually have 1-2 refs taken, 128 is more than enough and we want to 53598c2ecf20Sopenharmony_ci * maximise the margin between this amount and the moment when it overflows. 53608c2ecf20Sopenharmony_ci */ 53618c2ecf20Sopenharmony_ci#define IO_POLL_REF_BIAS 128 53628c2ecf20Sopenharmony_ci 53638c2ecf20Sopenharmony_cistatic bool io_poll_get_ownership_slowpath(struct io_kiocb *req) 53648c2ecf20Sopenharmony_ci{ 53658c2ecf20Sopenharmony_ci int v; 53668c2ecf20Sopenharmony_ci 53678c2ecf20Sopenharmony_ci /* 53688c2ecf20Sopenharmony_ci * poll_refs are already elevated and we don't have much hope for 53698c2ecf20Sopenharmony_ci * grabbing the ownership. Instead of incrementing set a retry flag 53708c2ecf20Sopenharmony_ci * to notify the loop that there might have been some change. 53718c2ecf20Sopenharmony_ci */ 53728c2ecf20Sopenharmony_ci v = atomic_fetch_or(IO_POLL_RETRY_FLAG, &req->poll_refs); 53738c2ecf20Sopenharmony_ci if (v & IO_POLL_REF_MASK) 53748c2ecf20Sopenharmony_ci return false; 53758c2ecf20Sopenharmony_ci return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 53768c2ecf20Sopenharmony_ci} 53778c2ecf20Sopenharmony_ci 53788c2ecf20Sopenharmony_ci/* 53798c2ecf20Sopenharmony_ci * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can 53808c2ecf20Sopenharmony_ci * bump it and acquire ownership. It's disallowed to modify requests while not 53818c2ecf20Sopenharmony_ci * owning it, that prevents from races for enqueueing task_work's and b/w 53828c2ecf20Sopenharmony_ci * arming poll and wakeups. 53838c2ecf20Sopenharmony_ci */ 53848c2ecf20Sopenharmony_cistatic inline bool io_poll_get_ownership(struct io_kiocb *req) 53858c2ecf20Sopenharmony_ci{ 53868c2ecf20Sopenharmony_ci if (unlikely(atomic_read(&req->poll_refs) >= IO_POLL_REF_BIAS)) 53878c2ecf20Sopenharmony_ci return io_poll_get_ownership_slowpath(req); 53888c2ecf20Sopenharmony_ci return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); 53898c2ecf20Sopenharmony_ci} 53908c2ecf20Sopenharmony_ci 53918c2ecf20Sopenharmony_cistatic void io_poll_mark_cancelled(struct io_kiocb *req) 53928c2ecf20Sopenharmony_ci{ 53938c2ecf20Sopenharmony_ci atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); 53948c2ecf20Sopenharmony_ci} 53958c2ecf20Sopenharmony_ci 53968c2ecf20Sopenharmony_cistatic struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) 53978c2ecf20Sopenharmony_ci{ 53988c2ecf20Sopenharmony_ci /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ 53998c2ecf20Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) 54008c2ecf20Sopenharmony_ci return req->async_data; 54018c2ecf20Sopenharmony_ci return req->apoll->double_poll; 54028c2ecf20Sopenharmony_ci} 54038c2ecf20Sopenharmony_ci 54048c2ecf20Sopenharmony_cistatic struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) 54058c2ecf20Sopenharmony_ci{ 54068c2ecf20Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) 54078c2ecf20Sopenharmony_ci return &req->poll; 54088c2ecf20Sopenharmony_ci return &req->apoll->poll; 54098c2ecf20Sopenharmony_ci} 54108c2ecf20Sopenharmony_ci 54118c2ecf20Sopenharmony_cistatic void io_poll_req_insert(struct io_kiocb *req) 54128c2ecf20Sopenharmony_ci{ 54138c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 54148c2ecf20Sopenharmony_ci struct hlist_head *list; 54158c2ecf20Sopenharmony_ci 54168c2ecf20Sopenharmony_ci list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)]; 54178c2ecf20Sopenharmony_ci hlist_add_head(&req->hash_node, list); 54188c2ecf20Sopenharmony_ci} 54198c2ecf20Sopenharmony_ci 54208c2ecf20Sopenharmony_cistatic void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, 54218c2ecf20Sopenharmony_ci wait_queue_func_t wake_func) 54228c2ecf20Sopenharmony_ci{ 54238c2ecf20Sopenharmony_ci poll->head = NULL; 54248c2ecf20Sopenharmony_ci#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) 54258c2ecf20Sopenharmony_ci /* mask in events that we always want/need */ 54268c2ecf20Sopenharmony_ci poll->events = events | IO_POLL_UNMASK; 54278c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&poll->wait.entry); 54288c2ecf20Sopenharmony_ci init_waitqueue_func_entry(&poll->wait, wake_func); 54298c2ecf20Sopenharmony_ci} 54308c2ecf20Sopenharmony_ci 54318c2ecf20Sopenharmony_cistatic inline void io_poll_remove_entry(struct io_poll_iocb *poll) 54328c2ecf20Sopenharmony_ci{ 54338c2ecf20Sopenharmony_ci struct wait_queue_head *head = smp_load_acquire(&poll->head); 54348c2ecf20Sopenharmony_ci 54358c2ecf20Sopenharmony_ci if (head) { 54368c2ecf20Sopenharmony_ci spin_lock_irq(&head->lock); 54378c2ecf20Sopenharmony_ci list_del_init(&poll->wait.entry); 54388c2ecf20Sopenharmony_ci poll->head = NULL; 54398c2ecf20Sopenharmony_ci spin_unlock_irq(&head->lock); 54408c2ecf20Sopenharmony_ci } 54418c2ecf20Sopenharmony_ci} 54428c2ecf20Sopenharmony_ci 54438c2ecf20Sopenharmony_cistatic void io_poll_remove_entries(struct io_kiocb *req) 54448c2ecf20Sopenharmony_ci{ 54458c2ecf20Sopenharmony_ci struct io_poll_iocb *poll = io_poll_get_single(req); 54468c2ecf20Sopenharmony_ci struct io_poll_iocb *poll_double = io_poll_get_double(req); 54478c2ecf20Sopenharmony_ci 54488c2ecf20Sopenharmony_ci /* 54498c2ecf20Sopenharmony_ci * While we hold the waitqueue lock and the waitqueue is nonempty, 54508c2ecf20Sopenharmony_ci * wake_up_pollfree() will wait for us. However, taking the waitqueue 54518c2ecf20Sopenharmony_ci * lock in the first place can race with the waitqueue being freed. 54528c2ecf20Sopenharmony_ci * 54538c2ecf20Sopenharmony_ci * We solve this as eventpoll does: by taking advantage of the fact that 54548c2ecf20Sopenharmony_ci * all users of wake_up_pollfree() will RCU-delay the actual free. If 54558c2ecf20Sopenharmony_ci * we enter rcu_read_lock() and see that the pointer to the queue is 54568c2ecf20Sopenharmony_ci * non-NULL, we can then lock it without the memory being freed out from 54578c2ecf20Sopenharmony_ci * under us. 54588c2ecf20Sopenharmony_ci * 54598c2ecf20Sopenharmony_ci * Keep holding rcu_read_lock() as long as we hold the queue lock, in 54608c2ecf20Sopenharmony_ci * case the caller deletes the entry from the queue, leaving it empty. 54618c2ecf20Sopenharmony_ci * In that case, only RCU prevents the queue memory from being freed. 54628c2ecf20Sopenharmony_ci */ 54638c2ecf20Sopenharmony_ci rcu_read_lock(); 54648c2ecf20Sopenharmony_ci io_poll_remove_entry(poll); 54658c2ecf20Sopenharmony_ci if (poll_double) 54668c2ecf20Sopenharmony_ci io_poll_remove_entry(poll_double); 54678c2ecf20Sopenharmony_ci rcu_read_unlock(); 54688c2ecf20Sopenharmony_ci} 54698c2ecf20Sopenharmony_ci 54708c2ecf20Sopenharmony_ci/* 54718c2ecf20Sopenharmony_ci * All poll tw should go through this. Checks for poll events, manages 54728c2ecf20Sopenharmony_ci * references, does rewait, etc. 54738c2ecf20Sopenharmony_ci * 54748c2ecf20Sopenharmony_ci * Returns a negative error on failure. >0 when no action require, which is 54758c2ecf20Sopenharmony_ci * either spurious wakeup or multishot CQE is served. 0 when it's done with 54768c2ecf20Sopenharmony_ci * the request, then the mask is stored in req->result. 54778c2ecf20Sopenharmony_ci */ 54788c2ecf20Sopenharmony_cistatic int io_poll_check_events(struct io_kiocb *req) 54798c2ecf20Sopenharmony_ci{ 54808c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 54818c2ecf20Sopenharmony_ci struct io_poll_iocb *poll = io_poll_get_single(req); 54828c2ecf20Sopenharmony_ci int v; 54838c2ecf20Sopenharmony_ci 54848c2ecf20Sopenharmony_ci /* req->task == current here, checking PF_EXITING is safe */ 54858c2ecf20Sopenharmony_ci if (unlikely(req->task->flags & PF_EXITING)) 54868c2ecf20Sopenharmony_ci io_poll_mark_cancelled(req); 54878c2ecf20Sopenharmony_ci 54888c2ecf20Sopenharmony_ci do { 54898c2ecf20Sopenharmony_ci v = atomic_read(&req->poll_refs); 54908c2ecf20Sopenharmony_ci 54918c2ecf20Sopenharmony_ci /* tw handler should be the owner, and so have some references */ 54928c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) 54938c2ecf20Sopenharmony_ci return 0; 54948c2ecf20Sopenharmony_ci if (v & IO_POLL_CANCEL_FLAG) 54958c2ecf20Sopenharmony_ci return -ECANCELED; 54968c2ecf20Sopenharmony_ci /* 54978c2ecf20Sopenharmony_ci * cqe.res contains only events of the first wake up 54988c2ecf20Sopenharmony_ci * and all others are be lost. Redo vfs_poll() to get 54998c2ecf20Sopenharmony_ci * up to date state. 55008c2ecf20Sopenharmony_ci */ 55018c2ecf20Sopenharmony_ci if ((v & IO_POLL_REF_MASK) != 1) 55028c2ecf20Sopenharmony_ci req->result = 0; 55038c2ecf20Sopenharmony_ci if (v & IO_POLL_RETRY_FLAG) { 55048c2ecf20Sopenharmony_ci req->result = 0; 55058c2ecf20Sopenharmony_ci /* 55068c2ecf20Sopenharmony_ci * We won't find new events that came in between 55078c2ecf20Sopenharmony_ci * vfs_poll and the ref put unless we clear the 55088c2ecf20Sopenharmony_ci * flag in advance. 55098c2ecf20Sopenharmony_ci */ 55108c2ecf20Sopenharmony_ci atomic_andnot(IO_POLL_RETRY_FLAG, &req->poll_refs); 55118c2ecf20Sopenharmony_ci v &= ~IO_POLL_RETRY_FLAG; 55128c2ecf20Sopenharmony_ci } 55138c2ecf20Sopenharmony_ci 55148c2ecf20Sopenharmony_ci if (!req->result) { 55158c2ecf20Sopenharmony_ci struct poll_table_struct pt = { ._key = poll->events }; 55168c2ecf20Sopenharmony_ci 55178c2ecf20Sopenharmony_ci req->result = vfs_poll(req->file, &pt) & poll->events; 55188c2ecf20Sopenharmony_ci } 55198c2ecf20Sopenharmony_ci 55208c2ecf20Sopenharmony_ci /* multishot, just fill an CQE and proceed */ 55218c2ecf20Sopenharmony_ci if (req->result && !(poll->events & EPOLLONESHOT)) { 55228c2ecf20Sopenharmony_ci __poll_t mask = mangle_poll(req->result & poll->events); 55238c2ecf20Sopenharmony_ci bool filled; 55248c2ecf20Sopenharmony_ci 55258c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 55268c2ecf20Sopenharmony_ci filled = io_fill_cqe_aux(ctx, req->user_data, mask, 55278c2ecf20Sopenharmony_ci IORING_CQE_F_MORE); 55288c2ecf20Sopenharmony_ci io_commit_cqring(ctx); 55298c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 55308c2ecf20Sopenharmony_ci if (unlikely(!filled)) 55318c2ecf20Sopenharmony_ci return -ECANCELED; 55328c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 55338c2ecf20Sopenharmony_ci } else if (req->result) { 55348c2ecf20Sopenharmony_ci return 0; 55358c2ecf20Sopenharmony_ci } 55368c2ecf20Sopenharmony_ci 55378c2ecf20Sopenharmony_ci /* force the next iteration to vfs_poll() */ 55388c2ecf20Sopenharmony_ci req->result = 0; 55398c2ecf20Sopenharmony_ci 55408c2ecf20Sopenharmony_ci /* 55418c2ecf20Sopenharmony_ci * Release all references, retry if someone tried to restart 55428c2ecf20Sopenharmony_ci * task_work while we were executing it. 55438c2ecf20Sopenharmony_ci */ 55448c2ecf20Sopenharmony_ci } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs) & 55458c2ecf20Sopenharmony_ci IO_POLL_REF_MASK); 55468c2ecf20Sopenharmony_ci 55478c2ecf20Sopenharmony_ci return 1; 55488c2ecf20Sopenharmony_ci} 55498c2ecf20Sopenharmony_ci 55508c2ecf20Sopenharmony_cistatic void io_poll_task_func(struct io_kiocb *req, bool *locked) 55518c2ecf20Sopenharmony_ci{ 55528c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 55538c2ecf20Sopenharmony_ci int ret; 55548c2ecf20Sopenharmony_ci 55558c2ecf20Sopenharmony_ci ret = io_poll_check_events(req); 55568c2ecf20Sopenharmony_ci if (ret > 0) 55578c2ecf20Sopenharmony_ci return; 55588c2ecf20Sopenharmony_ci 55598c2ecf20Sopenharmony_ci if (!ret) { 55608c2ecf20Sopenharmony_ci req->result = mangle_poll(req->result & req->poll.events); 55618c2ecf20Sopenharmony_ci } else { 55628c2ecf20Sopenharmony_ci req->result = ret; 55638c2ecf20Sopenharmony_ci req_set_fail(req); 55648c2ecf20Sopenharmony_ci } 55658c2ecf20Sopenharmony_ci 55668c2ecf20Sopenharmony_ci io_poll_remove_entries(req); 55678c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 55688c2ecf20Sopenharmony_ci hash_del(&req->hash_node); 55698c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 55708c2ecf20Sopenharmony_ci io_req_complete_post(req, req->result, 0); 55718c2ecf20Sopenharmony_ci} 55728c2ecf20Sopenharmony_ci 55738c2ecf20Sopenharmony_cistatic void io_apoll_task_func(struct io_kiocb *req, bool *locked) 55748c2ecf20Sopenharmony_ci{ 55758c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 55768c2ecf20Sopenharmony_ci int ret; 55778c2ecf20Sopenharmony_ci 55788c2ecf20Sopenharmony_ci ret = io_poll_check_events(req); 55798c2ecf20Sopenharmony_ci if (ret > 0) 55808c2ecf20Sopenharmony_ci return; 55818c2ecf20Sopenharmony_ci 55828c2ecf20Sopenharmony_ci io_tw_lock(req->ctx, locked); 55838c2ecf20Sopenharmony_ci io_poll_remove_entries(req); 55848c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 55858c2ecf20Sopenharmony_ci hash_del(&req->hash_node); 55868c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 55878c2ecf20Sopenharmony_ci 55888c2ecf20Sopenharmony_ci if (!ret) 55898c2ecf20Sopenharmony_ci io_req_task_submit(req, locked); 55908c2ecf20Sopenharmony_ci else 55918c2ecf20Sopenharmony_ci io_req_complete_failed(req, ret); 55928c2ecf20Sopenharmony_ci} 55938c2ecf20Sopenharmony_ci 55948c2ecf20Sopenharmony_cistatic void __io_poll_execute(struct io_kiocb *req, int mask) 55958c2ecf20Sopenharmony_ci{ 55968c2ecf20Sopenharmony_ci req->result = mask; 55978c2ecf20Sopenharmony_ci if (req->opcode == IORING_OP_POLL_ADD) 55988c2ecf20Sopenharmony_ci req->io_task_work.func = io_poll_task_func; 55998c2ecf20Sopenharmony_ci else 56008c2ecf20Sopenharmony_ci req->io_task_work.func = io_apoll_task_func; 56018c2ecf20Sopenharmony_ci 56028c2ecf20Sopenharmony_ci trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask); 56038c2ecf20Sopenharmony_ci io_req_task_work_add(req); 56048c2ecf20Sopenharmony_ci} 56058c2ecf20Sopenharmony_ci 56068c2ecf20Sopenharmony_cistatic inline void io_poll_execute(struct io_kiocb *req, int res) 56078c2ecf20Sopenharmony_ci{ 56088c2ecf20Sopenharmony_ci if (io_poll_get_ownership(req)) 56098c2ecf20Sopenharmony_ci __io_poll_execute(req, res); 56108c2ecf20Sopenharmony_ci} 56118c2ecf20Sopenharmony_ci 56128c2ecf20Sopenharmony_cistatic void io_poll_cancel_req(struct io_kiocb *req) 56138c2ecf20Sopenharmony_ci{ 56148c2ecf20Sopenharmony_ci io_poll_mark_cancelled(req); 56158c2ecf20Sopenharmony_ci /* kick tw, which should complete the request */ 56168c2ecf20Sopenharmony_ci io_poll_execute(req, 0); 56178c2ecf20Sopenharmony_ci} 56188c2ecf20Sopenharmony_ci 56198c2ecf20Sopenharmony_cistatic int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, 56208c2ecf20Sopenharmony_ci void *key) 56218c2ecf20Sopenharmony_ci{ 56228c2ecf20Sopenharmony_ci struct io_kiocb *req = wait->private; 56238c2ecf20Sopenharmony_ci struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, 56248c2ecf20Sopenharmony_ci wait); 56258c2ecf20Sopenharmony_ci __poll_t mask = key_to_poll(key); 56268c2ecf20Sopenharmony_ci 56278c2ecf20Sopenharmony_ci if (unlikely(mask & POLLFREE)) { 56288c2ecf20Sopenharmony_ci io_poll_mark_cancelled(req); 56298c2ecf20Sopenharmony_ci /* we have to kick tw in case it's not already */ 56308c2ecf20Sopenharmony_ci io_poll_execute(req, 0); 56318c2ecf20Sopenharmony_ci 56328c2ecf20Sopenharmony_ci /* 56338c2ecf20Sopenharmony_ci * If the waitqueue is being freed early but someone is already 56348c2ecf20Sopenharmony_ci * holds ownership over it, we have to tear down the request as 56358c2ecf20Sopenharmony_ci * best we can. That means immediately removing the request from 56368c2ecf20Sopenharmony_ci * its waitqueue and preventing all further accesses to the 56378c2ecf20Sopenharmony_ci * waitqueue via the request. 56388c2ecf20Sopenharmony_ci */ 56398c2ecf20Sopenharmony_ci list_del_init(&poll->wait.entry); 56408c2ecf20Sopenharmony_ci 56418c2ecf20Sopenharmony_ci /* 56428c2ecf20Sopenharmony_ci * Careful: this *must* be the last step, since as soon 56438c2ecf20Sopenharmony_ci * as req->head is NULL'ed out, the request can be 56448c2ecf20Sopenharmony_ci * completed and freed, since aio_poll_complete_work() 56458c2ecf20Sopenharmony_ci * will no longer need to take the waitqueue lock. 56468c2ecf20Sopenharmony_ci */ 56478c2ecf20Sopenharmony_ci smp_store_release(&poll->head, NULL); 56488c2ecf20Sopenharmony_ci return 1; 56498c2ecf20Sopenharmony_ci } 56508c2ecf20Sopenharmony_ci 56518c2ecf20Sopenharmony_ci /* for instances that support it check for an event match first */ 56528c2ecf20Sopenharmony_ci if (mask && !(mask & poll->events)) 56538c2ecf20Sopenharmony_ci return 0; 56548c2ecf20Sopenharmony_ci 56558c2ecf20Sopenharmony_ci if (io_poll_get_ownership(req)) { 56568c2ecf20Sopenharmony_ci /* 56578c2ecf20Sopenharmony_ci * If we trigger a multishot poll off our own wakeup path, 56588c2ecf20Sopenharmony_ci * disable multishot as there is a circular dependency between 56598c2ecf20Sopenharmony_ci * CQ posting and triggering the event. 56608c2ecf20Sopenharmony_ci */ 56618c2ecf20Sopenharmony_ci if (mask & EPOLL_URING_WAKE) 56628c2ecf20Sopenharmony_ci poll->events |= EPOLLONESHOT; 56638c2ecf20Sopenharmony_ci 56648c2ecf20Sopenharmony_ci __io_poll_execute(req, mask); 56658c2ecf20Sopenharmony_ci } 56668c2ecf20Sopenharmony_ci return 1; 56678c2ecf20Sopenharmony_ci} 56688c2ecf20Sopenharmony_ci 56698c2ecf20Sopenharmony_cistatic void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, 56708c2ecf20Sopenharmony_ci struct wait_queue_head *head, 56718c2ecf20Sopenharmony_ci struct io_poll_iocb **poll_ptr) 56728c2ecf20Sopenharmony_ci{ 56738c2ecf20Sopenharmony_ci struct io_kiocb *req = pt->req; 56748c2ecf20Sopenharmony_ci 56758c2ecf20Sopenharmony_ci /* 56768c2ecf20Sopenharmony_ci * The file being polled uses multiple waitqueues for poll handling 56778c2ecf20Sopenharmony_ci * (e.g. one for read, one for write). Setup a separate io_poll_iocb 56788c2ecf20Sopenharmony_ci * if this happens. 56798c2ecf20Sopenharmony_ci */ 56808c2ecf20Sopenharmony_ci if (unlikely(pt->nr_entries)) { 56818c2ecf20Sopenharmony_ci struct io_poll_iocb *first = poll; 56828c2ecf20Sopenharmony_ci 56838c2ecf20Sopenharmony_ci /* double add on the same waitqueue head, ignore */ 56848c2ecf20Sopenharmony_ci if (first->head == head) 56858c2ecf20Sopenharmony_ci return; 56868c2ecf20Sopenharmony_ci /* already have a 2nd entry, fail a third attempt */ 56878c2ecf20Sopenharmony_ci if (*poll_ptr) { 56888c2ecf20Sopenharmony_ci if ((*poll_ptr)->head == head) 56898c2ecf20Sopenharmony_ci return; 56908c2ecf20Sopenharmony_ci pt->error = -EINVAL; 56918c2ecf20Sopenharmony_ci return; 56928c2ecf20Sopenharmony_ci } 56938c2ecf20Sopenharmony_ci 56948c2ecf20Sopenharmony_ci poll = kmalloc(sizeof(*poll), GFP_ATOMIC); 56958c2ecf20Sopenharmony_ci if (!poll) { 56968c2ecf20Sopenharmony_ci pt->error = -ENOMEM; 56978c2ecf20Sopenharmony_ci return; 56988c2ecf20Sopenharmony_ci } 56998c2ecf20Sopenharmony_ci io_init_poll_iocb(poll, first->events, first->wait.func); 57008c2ecf20Sopenharmony_ci *poll_ptr = poll; 57018c2ecf20Sopenharmony_ci } 57028c2ecf20Sopenharmony_ci 57038c2ecf20Sopenharmony_ci pt->nr_entries++; 57048c2ecf20Sopenharmony_ci poll->head = head; 57058c2ecf20Sopenharmony_ci poll->wait.private = req; 57068c2ecf20Sopenharmony_ci 57078c2ecf20Sopenharmony_ci if (poll->events & EPOLLEXCLUSIVE) 57088c2ecf20Sopenharmony_ci add_wait_queue_exclusive(head, &poll->wait); 57098c2ecf20Sopenharmony_ci else 57108c2ecf20Sopenharmony_ci add_wait_queue(head, &poll->wait); 57118c2ecf20Sopenharmony_ci} 57128c2ecf20Sopenharmony_ci 57138c2ecf20Sopenharmony_cistatic void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, 57148c2ecf20Sopenharmony_ci struct poll_table_struct *p) 57158c2ecf20Sopenharmony_ci{ 57168c2ecf20Sopenharmony_ci struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 57178c2ecf20Sopenharmony_ci 57188c2ecf20Sopenharmony_ci __io_queue_proc(&pt->req->poll, pt, head, 57198c2ecf20Sopenharmony_ci (struct io_poll_iocb **) &pt->req->async_data); 57208c2ecf20Sopenharmony_ci} 57218c2ecf20Sopenharmony_ci 57228c2ecf20Sopenharmony_cistatic int __io_arm_poll_handler(struct io_kiocb *req, 57238c2ecf20Sopenharmony_ci struct io_poll_iocb *poll, 57248c2ecf20Sopenharmony_ci struct io_poll_table *ipt, __poll_t mask) 57258c2ecf20Sopenharmony_ci{ 57268c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 57278c2ecf20Sopenharmony_ci 57288c2ecf20Sopenharmony_ci INIT_HLIST_NODE(&req->hash_node); 57298c2ecf20Sopenharmony_ci io_init_poll_iocb(poll, mask, io_poll_wake); 57308c2ecf20Sopenharmony_ci poll->file = req->file; 57318c2ecf20Sopenharmony_ci poll->wait.private = req; 57328c2ecf20Sopenharmony_ci 57338c2ecf20Sopenharmony_ci ipt->pt._key = mask; 57348c2ecf20Sopenharmony_ci ipt->req = req; 57358c2ecf20Sopenharmony_ci ipt->error = 0; 57368c2ecf20Sopenharmony_ci ipt->nr_entries = 0; 57378c2ecf20Sopenharmony_ci 57388c2ecf20Sopenharmony_ci /* 57398c2ecf20Sopenharmony_ci * Take the ownership to delay any tw execution up until we're done 57408c2ecf20Sopenharmony_ci * with poll arming. see io_poll_get_ownership(). 57418c2ecf20Sopenharmony_ci */ 57428c2ecf20Sopenharmony_ci atomic_set(&req->poll_refs, 1); 57438c2ecf20Sopenharmony_ci mask = vfs_poll(req->file, &ipt->pt) & poll->events; 57448c2ecf20Sopenharmony_ci 57458c2ecf20Sopenharmony_ci if (mask && (poll->events & EPOLLONESHOT)) { 57468c2ecf20Sopenharmony_ci io_poll_remove_entries(req); 57478c2ecf20Sopenharmony_ci /* no one else has access to the req, forget about the ref */ 57488c2ecf20Sopenharmony_ci return mask; 57498c2ecf20Sopenharmony_ci } 57508c2ecf20Sopenharmony_ci if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { 57518c2ecf20Sopenharmony_ci io_poll_remove_entries(req); 57528c2ecf20Sopenharmony_ci if (!ipt->error) 57538c2ecf20Sopenharmony_ci ipt->error = -EINVAL; 57548c2ecf20Sopenharmony_ci return 0; 57558c2ecf20Sopenharmony_ci } 57568c2ecf20Sopenharmony_ci 57578c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 57588c2ecf20Sopenharmony_ci io_poll_req_insert(req); 57598c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 57608c2ecf20Sopenharmony_ci 57618c2ecf20Sopenharmony_ci if (mask) { 57628c2ecf20Sopenharmony_ci /* can't multishot if failed, just queue the event we've got */ 57638c2ecf20Sopenharmony_ci if (unlikely(ipt->error || !ipt->nr_entries)) { 57648c2ecf20Sopenharmony_ci poll->events |= EPOLLONESHOT; 57658c2ecf20Sopenharmony_ci ipt->error = 0; 57668c2ecf20Sopenharmony_ci } 57678c2ecf20Sopenharmony_ci __io_poll_execute(req, mask); 57688c2ecf20Sopenharmony_ci return 0; 57698c2ecf20Sopenharmony_ci } 57708c2ecf20Sopenharmony_ci 57718c2ecf20Sopenharmony_ci /* 57728c2ecf20Sopenharmony_ci * Try to release ownership. If we see a change of state, e.g. 57738c2ecf20Sopenharmony_ci * poll was waken up, queue up a tw, it'll deal with it. 57748c2ecf20Sopenharmony_ci */ 57758c2ecf20Sopenharmony_ci if (atomic_cmpxchg(&req->poll_refs, 1, 0) != 1) 57768c2ecf20Sopenharmony_ci __io_poll_execute(req, 0); 57778c2ecf20Sopenharmony_ci return 0; 57788c2ecf20Sopenharmony_ci} 57798c2ecf20Sopenharmony_ci 57808c2ecf20Sopenharmony_cistatic void io_async_queue_proc(struct file *file, struct wait_queue_head *head, 57818c2ecf20Sopenharmony_ci struct poll_table_struct *p) 57828c2ecf20Sopenharmony_ci{ 57838c2ecf20Sopenharmony_ci struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); 57848c2ecf20Sopenharmony_ci struct async_poll *apoll = pt->req->apoll; 57858c2ecf20Sopenharmony_ci 57868c2ecf20Sopenharmony_ci __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); 57878c2ecf20Sopenharmony_ci} 57888c2ecf20Sopenharmony_ci 57898c2ecf20Sopenharmony_cienum { 57908c2ecf20Sopenharmony_ci IO_APOLL_OK, 57918c2ecf20Sopenharmony_ci IO_APOLL_ABORTED, 57928c2ecf20Sopenharmony_ci IO_APOLL_READY 57938c2ecf20Sopenharmony_ci}; 57948c2ecf20Sopenharmony_ci 57958c2ecf20Sopenharmony_ci/* 57968c2ecf20Sopenharmony_ci * We can't reliably detect loops in repeated poll triggers and issue 57978c2ecf20Sopenharmony_ci * subsequently failing. But rather than fail these immediately, allow a 57988c2ecf20Sopenharmony_ci * certain amount of retries before we give up. Given that this condition 57998c2ecf20Sopenharmony_ci * should _rarely_ trigger even once, we should be fine with a larger value. 58008c2ecf20Sopenharmony_ci */ 58018c2ecf20Sopenharmony_ci#define APOLL_MAX_RETRY 128 58028c2ecf20Sopenharmony_ci 58038c2ecf20Sopenharmony_cistatic int io_arm_poll_handler(struct io_kiocb *req) 58048c2ecf20Sopenharmony_ci{ 58058c2ecf20Sopenharmony_ci const struct io_op_def *def = &io_op_defs[req->opcode]; 58068c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 58078c2ecf20Sopenharmony_ci struct async_poll *apoll; 58088c2ecf20Sopenharmony_ci struct io_poll_table ipt; 58098c2ecf20Sopenharmony_ci __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI; 58108c2ecf20Sopenharmony_ci int ret; 58118c2ecf20Sopenharmony_ci 58128c2ecf20Sopenharmony_ci if (!req->file || !file_can_poll(req->file)) 58138c2ecf20Sopenharmony_ci return IO_APOLL_ABORTED; 58148c2ecf20Sopenharmony_ci if (!def->pollin && !def->pollout) 58158c2ecf20Sopenharmony_ci return IO_APOLL_ABORTED; 58168c2ecf20Sopenharmony_ci 58178c2ecf20Sopenharmony_ci if (def->pollin) { 58188c2ecf20Sopenharmony_ci mask |= POLLIN | POLLRDNORM; 58198c2ecf20Sopenharmony_ci 58208c2ecf20Sopenharmony_ci /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ 58218c2ecf20Sopenharmony_ci if ((req->opcode == IORING_OP_RECVMSG) && 58228c2ecf20Sopenharmony_ci (req->sr_msg.msg_flags & MSG_ERRQUEUE)) 58238c2ecf20Sopenharmony_ci mask &= ~POLLIN; 58248c2ecf20Sopenharmony_ci } else { 58258c2ecf20Sopenharmony_ci mask |= POLLOUT | POLLWRNORM; 58268c2ecf20Sopenharmony_ci } 58278c2ecf20Sopenharmony_ci 58288c2ecf20Sopenharmony_ci if (req->flags & REQ_F_POLLED) { 58298c2ecf20Sopenharmony_ci apoll = req->apoll; 58308c2ecf20Sopenharmony_ci kfree(apoll->double_poll); 58318c2ecf20Sopenharmony_ci if (unlikely(!--apoll->poll.retries)) { 58328c2ecf20Sopenharmony_ci apoll->double_poll = NULL; 58338c2ecf20Sopenharmony_ci return IO_APOLL_ABORTED; 58348c2ecf20Sopenharmony_ci } 58358c2ecf20Sopenharmony_ci } else { 58368c2ecf20Sopenharmony_ci apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); 58378c2ecf20Sopenharmony_ci if (unlikely(!apoll)) 58388c2ecf20Sopenharmony_ci return IO_APOLL_ABORTED; 58398c2ecf20Sopenharmony_ci apoll->poll.retries = APOLL_MAX_RETRY; 58408c2ecf20Sopenharmony_ci } 58418c2ecf20Sopenharmony_ci apoll->double_poll = NULL; 58428c2ecf20Sopenharmony_ci req->apoll = apoll; 58438c2ecf20Sopenharmony_ci req->flags |= REQ_F_POLLED; 58448c2ecf20Sopenharmony_ci ipt.pt._qproc = io_async_queue_proc; 58458c2ecf20Sopenharmony_ci 58468c2ecf20Sopenharmony_ci ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); 58478c2ecf20Sopenharmony_ci if (ret || ipt.error) 58488c2ecf20Sopenharmony_ci return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; 58498c2ecf20Sopenharmony_ci 58508c2ecf20Sopenharmony_ci trace_io_uring_poll_arm(ctx, req, req->opcode, req->user_data, 58518c2ecf20Sopenharmony_ci mask, apoll->poll.events); 58528c2ecf20Sopenharmony_ci return IO_APOLL_OK; 58538c2ecf20Sopenharmony_ci} 58548c2ecf20Sopenharmony_ci 58558c2ecf20Sopenharmony_ci/* 58568c2ecf20Sopenharmony_ci * Returns true if we found and killed one or more poll requests 58578c2ecf20Sopenharmony_ci */ 58588c2ecf20Sopenharmony_cistatic bool io_poll_remove_all(struct io_ring_ctx *ctx, struct task_struct *tsk, 58598c2ecf20Sopenharmony_ci bool cancel_all) 58608c2ecf20Sopenharmony_ci{ 58618c2ecf20Sopenharmony_ci struct hlist_node *tmp; 58628c2ecf20Sopenharmony_ci struct io_kiocb *req; 58638c2ecf20Sopenharmony_ci bool found = false; 58648c2ecf20Sopenharmony_ci int i; 58658c2ecf20Sopenharmony_ci 58668c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 58678c2ecf20Sopenharmony_ci for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 58688c2ecf20Sopenharmony_ci struct hlist_head *list; 58698c2ecf20Sopenharmony_ci 58708c2ecf20Sopenharmony_ci list = &ctx->cancel_hash[i]; 58718c2ecf20Sopenharmony_ci hlist_for_each_entry_safe(req, tmp, list, hash_node) { 58728c2ecf20Sopenharmony_ci if (io_match_task_safe(req, tsk, cancel_all)) { 58738c2ecf20Sopenharmony_ci hlist_del_init(&req->hash_node); 58748c2ecf20Sopenharmony_ci io_poll_cancel_req(req); 58758c2ecf20Sopenharmony_ci found = true; 58768c2ecf20Sopenharmony_ci } 58778c2ecf20Sopenharmony_ci } 58788c2ecf20Sopenharmony_ci } 58798c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 58808c2ecf20Sopenharmony_ci return found; 58818c2ecf20Sopenharmony_ci} 58828c2ecf20Sopenharmony_ci 58838c2ecf20Sopenharmony_cistatic struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr, 58848c2ecf20Sopenharmony_ci bool poll_only) 58858c2ecf20Sopenharmony_ci __must_hold(&ctx->completion_lock) 58868c2ecf20Sopenharmony_ci{ 58878c2ecf20Sopenharmony_ci struct hlist_head *list; 58888c2ecf20Sopenharmony_ci struct io_kiocb *req; 58898c2ecf20Sopenharmony_ci 58908c2ecf20Sopenharmony_ci list = &ctx->cancel_hash[hash_long(sqe_addr, ctx->cancel_hash_bits)]; 58918c2ecf20Sopenharmony_ci hlist_for_each_entry(req, list, hash_node) { 58928c2ecf20Sopenharmony_ci if (sqe_addr != req->user_data) 58938c2ecf20Sopenharmony_ci continue; 58948c2ecf20Sopenharmony_ci if (poll_only && req->opcode != IORING_OP_POLL_ADD) 58958c2ecf20Sopenharmony_ci continue; 58968c2ecf20Sopenharmony_ci return req; 58978c2ecf20Sopenharmony_ci } 58988c2ecf20Sopenharmony_ci return NULL; 58998c2ecf20Sopenharmony_ci} 59008c2ecf20Sopenharmony_ci 59018c2ecf20Sopenharmony_cistatic bool io_poll_disarm(struct io_kiocb *req) 59028c2ecf20Sopenharmony_ci __must_hold(&ctx->completion_lock) 59038c2ecf20Sopenharmony_ci{ 59048c2ecf20Sopenharmony_ci if (!io_poll_get_ownership(req)) 59058c2ecf20Sopenharmony_ci return false; 59068c2ecf20Sopenharmony_ci io_poll_remove_entries(req); 59078c2ecf20Sopenharmony_ci hash_del(&req->hash_node); 59088c2ecf20Sopenharmony_ci return true; 59098c2ecf20Sopenharmony_ci} 59108c2ecf20Sopenharmony_ci 59118c2ecf20Sopenharmony_cistatic int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr, 59128c2ecf20Sopenharmony_ci bool poll_only) 59138c2ecf20Sopenharmony_ci __must_hold(&ctx->completion_lock) 59148c2ecf20Sopenharmony_ci{ 59158c2ecf20Sopenharmony_ci struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only); 59168c2ecf20Sopenharmony_ci 59178c2ecf20Sopenharmony_ci if (!req) 59188c2ecf20Sopenharmony_ci return -ENOENT; 59198c2ecf20Sopenharmony_ci io_poll_cancel_req(req); 59208c2ecf20Sopenharmony_ci return 0; 59218c2ecf20Sopenharmony_ci} 59228c2ecf20Sopenharmony_ci 59238c2ecf20Sopenharmony_cistatic __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, 59248c2ecf20Sopenharmony_ci unsigned int flags) 59258c2ecf20Sopenharmony_ci{ 59268c2ecf20Sopenharmony_ci u32 events; 59278c2ecf20Sopenharmony_ci 59288c2ecf20Sopenharmony_ci events = READ_ONCE(sqe->poll32_events); 59298c2ecf20Sopenharmony_ci#ifdef __BIG_ENDIAN 59308c2ecf20Sopenharmony_ci events = swahw32(events); 59318c2ecf20Sopenharmony_ci#endif 59328c2ecf20Sopenharmony_ci if (!(flags & IORING_POLL_ADD_MULTI)) 59338c2ecf20Sopenharmony_ci events |= EPOLLONESHOT; 59348c2ecf20Sopenharmony_ci return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); 59358c2ecf20Sopenharmony_ci} 59368c2ecf20Sopenharmony_ci 59378c2ecf20Sopenharmony_cistatic int io_poll_update_prep(struct io_kiocb *req, 59388c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 59398c2ecf20Sopenharmony_ci{ 59408c2ecf20Sopenharmony_ci struct io_poll_update *upd = &req->poll_update; 59418c2ecf20Sopenharmony_ci u32 flags; 59428c2ecf20Sopenharmony_ci 59438c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 59448c2ecf20Sopenharmony_ci return -EINVAL; 59458c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->splice_fd_in) 59468c2ecf20Sopenharmony_ci return -EINVAL; 59478c2ecf20Sopenharmony_ci flags = READ_ONCE(sqe->len); 59488c2ecf20Sopenharmony_ci if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | 59498c2ecf20Sopenharmony_ci IORING_POLL_ADD_MULTI)) 59508c2ecf20Sopenharmony_ci return -EINVAL; 59518c2ecf20Sopenharmony_ci /* meaningless without update */ 59528c2ecf20Sopenharmony_ci if (flags == IORING_POLL_ADD_MULTI) 59538c2ecf20Sopenharmony_ci return -EINVAL; 59548c2ecf20Sopenharmony_ci 59558c2ecf20Sopenharmony_ci upd->old_user_data = READ_ONCE(sqe->addr); 59568c2ecf20Sopenharmony_ci upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; 59578c2ecf20Sopenharmony_ci upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; 59588c2ecf20Sopenharmony_ci 59598c2ecf20Sopenharmony_ci upd->new_user_data = READ_ONCE(sqe->off); 59608c2ecf20Sopenharmony_ci if (!upd->update_user_data && upd->new_user_data) 59618c2ecf20Sopenharmony_ci return -EINVAL; 59628c2ecf20Sopenharmony_ci if (upd->update_events) 59638c2ecf20Sopenharmony_ci upd->events = io_poll_parse_events(sqe, flags); 59648c2ecf20Sopenharmony_ci else if (sqe->poll32_events) 59658c2ecf20Sopenharmony_ci return -EINVAL; 59668c2ecf20Sopenharmony_ci 59678c2ecf20Sopenharmony_ci return 0; 59688c2ecf20Sopenharmony_ci} 59698c2ecf20Sopenharmony_ci 59708c2ecf20Sopenharmony_cistatic int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 59718c2ecf20Sopenharmony_ci{ 59728c2ecf20Sopenharmony_ci struct io_poll_iocb *poll = &req->poll; 59738c2ecf20Sopenharmony_ci u32 flags; 59748c2ecf20Sopenharmony_ci 59758c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 59768c2ecf20Sopenharmony_ci return -EINVAL; 59778c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->off || sqe->addr) 59788c2ecf20Sopenharmony_ci return -EINVAL; 59798c2ecf20Sopenharmony_ci flags = READ_ONCE(sqe->len); 59808c2ecf20Sopenharmony_ci if (flags & ~IORING_POLL_ADD_MULTI) 59818c2ecf20Sopenharmony_ci return -EINVAL; 59828c2ecf20Sopenharmony_ci 59838c2ecf20Sopenharmony_ci io_req_set_refcount(req); 59848c2ecf20Sopenharmony_ci poll->events = io_poll_parse_events(sqe, flags); 59858c2ecf20Sopenharmony_ci return 0; 59868c2ecf20Sopenharmony_ci} 59878c2ecf20Sopenharmony_ci 59888c2ecf20Sopenharmony_cistatic int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) 59898c2ecf20Sopenharmony_ci{ 59908c2ecf20Sopenharmony_ci struct io_poll_iocb *poll = &req->poll; 59918c2ecf20Sopenharmony_ci struct io_poll_table ipt; 59928c2ecf20Sopenharmony_ci int ret; 59938c2ecf20Sopenharmony_ci 59948c2ecf20Sopenharmony_ci ipt.pt._qproc = io_poll_queue_proc; 59958c2ecf20Sopenharmony_ci 59968c2ecf20Sopenharmony_ci ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); 59978c2ecf20Sopenharmony_ci if (!ret && ipt.error) 59988c2ecf20Sopenharmony_ci req_set_fail(req); 59998c2ecf20Sopenharmony_ci ret = ret ?: ipt.error; 60008c2ecf20Sopenharmony_ci if (ret) 60018c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 60028c2ecf20Sopenharmony_ci return 0; 60038c2ecf20Sopenharmony_ci} 60048c2ecf20Sopenharmony_ci 60058c2ecf20Sopenharmony_cistatic int io_poll_update(struct io_kiocb *req, unsigned int issue_flags) 60068c2ecf20Sopenharmony_ci{ 60078c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 60088c2ecf20Sopenharmony_ci struct io_kiocb *preq; 60098c2ecf20Sopenharmony_ci int ret2, ret = 0; 60108c2ecf20Sopenharmony_ci 60118c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 60128c2ecf20Sopenharmony_ci 60138c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 60148c2ecf20Sopenharmony_ci preq = io_poll_find(ctx, req->poll_update.old_user_data, true); 60158c2ecf20Sopenharmony_ci if (!preq || !io_poll_disarm(preq)) { 60168c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 60178c2ecf20Sopenharmony_ci ret = preq ? -EALREADY : -ENOENT; 60188c2ecf20Sopenharmony_ci goto out; 60198c2ecf20Sopenharmony_ci } 60208c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 60218c2ecf20Sopenharmony_ci 60228c2ecf20Sopenharmony_ci if (req->poll_update.update_events || req->poll_update.update_user_data) { 60238c2ecf20Sopenharmony_ci /* only mask one event flags, keep behavior flags */ 60248c2ecf20Sopenharmony_ci if (req->poll_update.update_events) { 60258c2ecf20Sopenharmony_ci preq->poll.events &= ~0xffff; 60268c2ecf20Sopenharmony_ci preq->poll.events |= req->poll_update.events & 0xffff; 60278c2ecf20Sopenharmony_ci preq->poll.events |= IO_POLL_UNMASK; 60288c2ecf20Sopenharmony_ci } 60298c2ecf20Sopenharmony_ci if (req->poll_update.update_user_data) 60308c2ecf20Sopenharmony_ci preq->user_data = req->poll_update.new_user_data; 60318c2ecf20Sopenharmony_ci 60328c2ecf20Sopenharmony_ci ret2 = io_poll_add(preq, issue_flags); 60338c2ecf20Sopenharmony_ci /* successfully updated, don't complete poll request */ 60348c2ecf20Sopenharmony_ci if (!ret2) 60358c2ecf20Sopenharmony_ci goto out; 60368c2ecf20Sopenharmony_ci } 60378c2ecf20Sopenharmony_ci req_set_fail(preq); 60388c2ecf20Sopenharmony_ci io_req_complete(preq, -ECANCELED); 60398c2ecf20Sopenharmony_ciout: 60408c2ecf20Sopenharmony_ci if (ret < 0) 60418c2ecf20Sopenharmony_ci req_set_fail(req); 60428c2ecf20Sopenharmony_ci /* complete update request, we're done with it */ 60438c2ecf20Sopenharmony_ci io_req_complete(req, ret); 60448c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 60458c2ecf20Sopenharmony_ci return 0; 60468c2ecf20Sopenharmony_ci} 60478c2ecf20Sopenharmony_ci 60488c2ecf20Sopenharmony_cistatic void io_req_task_timeout(struct io_kiocb *req, bool *locked) 60498c2ecf20Sopenharmony_ci{ 60508c2ecf20Sopenharmony_ci req_set_fail(req); 60518c2ecf20Sopenharmony_ci io_req_complete_post(req, -ETIME, 0); 60528c2ecf20Sopenharmony_ci} 60538c2ecf20Sopenharmony_ci 60548c2ecf20Sopenharmony_cistatic enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) 60558c2ecf20Sopenharmony_ci{ 60568c2ecf20Sopenharmony_ci struct io_timeout_data *data = container_of(timer, 60578c2ecf20Sopenharmony_ci struct io_timeout_data, timer); 60588c2ecf20Sopenharmony_ci struct io_kiocb *req = data->req; 60598c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 60608c2ecf20Sopenharmony_ci unsigned long flags; 60618c2ecf20Sopenharmony_ci 60628c2ecf20Sopenharmony_ci spin_lock_irqsave(&ctx->timeout_lock, flags); 60638c2ecf20Sopenharmony_ci list_del_init(&req->timeout.list); 60648c2ecf20Sopenharmony_ci atomic_set(&req->ctx->cq_timeouts, 60658c2ecf20Sopenharmony_ci atomic_read(&req->ctx->cq_timeouts) + 1); 60668c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&ctx->timeout_lock, flags); 60678c2ecf20Sopenharmony_ci 60688c2ecf20Sopenharmony_ci req->io_task_work.func = io_req_task_timeout; 60698c2ecf20Sopenharmony_ci io_req_task_work_add(req); 60708c2ecf20Sopenharmony_ci return HRTIMER_NORESTART; 60718c2ecf20Sopenharmony_ci} 60728c2ecf20Sopenharmony_ci 60738c2ecf20Sopenharmony_cistatic struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, 60748c2ecf20Sopenharmony_ci __u64 user_data) 60758c2ecf20Sopenharmony_ci __must_hold(&ctx->timeout_lock) 60768c2ecf20Sopenharmony_ci{ 60778c2ecf20Sopenharmony_ci struct io_timeout_data *io; 60788c2ecf20Sopenharmony_ci struct io_kiocb *req; 60798c2ecf20Sopenharmony_ci bool found = false; 60808c2ecf20Sopenharmony_ci 60818c2ecf20Sopenharmony_ci list_for_each_entry(req, &ctx->timeout_list, timeout.list) { 60828c2ecf20Sopenharmony_ci found = user_data == req->user_data; 60838c2ecf20Sopenharmony_ci if (found) 60848c2ecf20Sopenharmony_ci break; 60858c2ecf20Sopenharmony_ci } 60868c2ecf20Sopenharmony_ci if (!found) 60878c2ecf20Sopenharmony_ci return ERR_PTR(-ENOENT); 60888c2ecf20Sopenharmony_ci 60898c2ecf20Sopenharmony_ci io = req->async_data; 60908c2ecf20Sopenharmony_ci if (hrtimer_try_to_cancel(&io->timer) == -1) 60918c2ecf20Sopenharmony_ci return ERR_PTR(-EALREADY); 60928c2ecf20Sopenharmony_ci list_del_init(&req->timeout.list); 60938c2ecf20Sopenharmony_ci return req; 60948c2ecf20Sopenharmony_ci} 60958c2ecf20Sopenharmony_ci 60968c2ecf20Sopenharmony_cistatic int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data) 60978c2ecf20Sopenharmony_ci __must_hold(&ctx->completion_lock) 60988c2ecf20Sopenharmony_ci __must_hold(&ctx->timeout_lock) 60998c2ecf20Sopenharmony_ci{ 61008c2ecf20Sopenharmony_ci struct io_kiocb *req = io_timeout_extract(ctx, user_data); 61018c2ecf20Sopenharmony_ci 61028c2ecf20Sopenharmony_ci if (IS_ERR(req)) 61038c2ecf20Sopenharmony_ci return PTR_ERR(req); 61048c2ecf20Sopenharmony_ci 61058c2ecf20Sopenharmony_ci req_set_fail(req); 61068c2ecf20Sopenharmony_ci io_fill_cqe_req(req, -ECANCELED, 0); 61078c2ecf20Sopenharmony_ci io_put_req_deferred(req); 61088c2ecf20Sopenharmony_ci return 0; 61098c2ecf20Sopenharmony_ci} 61108c2ecf20Sopenharmony_ci 61118c2ecf20Sopenharmony_cistatic clockid_t io_timeout_get_clock(struct io_timeout_data *data) 61128c2ecf20Sopenharmony_ci{ 61138c2ecf20Sopenharmony_ci switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { 61148c2ecf20Sopenharmony_ci case IORING_TIMEOUT_BOOTTIME: 61158c2ecf20Sopenharmony_ci return CLOCK_BOOTTIME; 61168c2ecf20Sopenharmony_ci case IORING_TIMEOUT_REALTIME: 61178c2ecf20Sopenharmony_ci return CLOCK_REALTIME; 61188c2ecf20Sopenharmony_ci default: 61198c2ecf20Sopenharmony_ci /* can't happen, vetted at prep time */ 61208c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 61218c2ecf20Sopenharmony_ci fallthrough; 61228c2ecf20Sopenharmony_ci case 0: 61238c2ecf20Sopenharmony_ci return CLOCK_MONOTONIC; 61248c2ecf20Sopenharmony_ci } 61258c2ecf20Sopenharmony_ci} 61268c2ecf20Sopenharmony_ci 61278c2ecf20Sopenharmony_cistatic int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, 61288c2ecf20Sopenharmony_ci struct timespec64 *ts, enum hrtimer_mode mode) 61298c2ecf20Sopenharmony_ci __must_hold(&ctx->timeout_lock) 61308c2ecf20Sopenharmony_ci{ 61318c2ecf20Sopenharmony_ci struct io_timeout_data *io; 61328c2ecf20Sopenharmony_ci struct io_kiocb *req; 61338c2ecf20Sopenharmony_ci bool found = false; 61348c2ecf20Sopenharmony_ci 61358c2ecf20Sopenharmony_ci list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) { 61368c2ecf20Sopenharmony_ci found = user_data == req->user_data; 61378c2ecf20Sopenharmony_ci if (found) 61388c2ecf20Sopenharmony_ci break; 61398c2ecf20Sopenharmony_ci } 61408c2ecf20Sopenharmony_ci if (!found) 61418c2ecf20Sopenharmony_ci return -ENOENT; 61428c2ecf20Sopenharmony_ci 61438c2ecf20Sopenharmony_ci io = req->async_data; 61448c2ecf20Sopenharmony_ci if (hrtimer_try_to_cancel(&io->timer) == -1) 61458c2ecf20Sopenharmony_ci return -EALREADY; 61468c2ecf20Sopenharmony_ci hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); 61478c2ecf20Sopenharmony_ci io->timer.function = io_link_timeout_fn; 61488c2ecf20Sopenharmony_ci hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); 61498c2ecf20Sopenharmony_ci return 0; 61508c2ecf20Sopenharmony_ci} 61518c2ecf20Sopenharmony_ci 61528c2ecf20Sopenharmony_cistatic int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, 61538c2ecf20Sopenharmony_ci struct timespec64 *ts, enum hrtimer_mode mode) 61548c2ecf20Sopenharmony_ci __must_hold(&ctx->timeout_lock) 61558c2ecf20Sopenharmony_ci{ 61568c2ecf20Sopenharmony_ci struct io_kiocb *req = io_timeout_extract(ctx, user_data); 61578c2ecf20Sopenharmony_ci struct io_timeout_data *data; 61588c2ecf20Sopenharmony_ci 61598c2ecf20Sopenharmony_ci if (IS_ERR(req)) 61608c2ecf20Sopenharmony_ci return PTR_ERR(req); 61618c2ecf20Sopenharmony_ci 61628c2ecf20Sopenharmony_ci req->timeout.off = 0; /* noseq */ 61638c2ecf20Sopenharmony_ci data = req->async_data; 61648c2ecf20Sopenharmony_ci list_add_tail(&req->timeout.list, &ctx->timeout_list); 61658c2ecf20Sopenharmony_ci hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); 61668c2ecf20Sopenharmony_ci data->timer.function = io_timeout_fn; 61678c2ecf20Sopenharmony_ci hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); 61688c2ecf20Sopenharmony_ci return 0; 61698c2ecf20Sopenharmony_ci} 61708c2ecf20Sopenharmony_ci 61718c2ecf20Sopenharmony_cistatic int io_timeout_remove_prep(struct io_kiocb *req, 61728c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 61738c2ecf20Sopenharmony_ci{ 61748c2ecf20Sopenharmony_ci struct io_timeout_rem *tr = &req->timeout_rem; 61758c2ecf20Sopenharmony_ci 61768c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 61778c2ecf20Sopenharmony_ci return -EINVAL; 61788c2ecf20Sopenharmony_ci if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) 61798c2ecf20Sopenharmony_ci return -EINVAL; 61808c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->splice_fd_in) 61818c2ecf20Sopenharmony_ci return -EINVAL; 61828c2ecf20Sopenharmony_ci 61838c2ecf20Sopenharmony_ci tr->ltimeout = false; 61848c2ecf20Sopenharmony_ci tr->addr = READ_ONCE(sqe->addr); 61858c2ecf20Sopenharmony_ci tr->flags = READ_ONCE(sqe->timeout_flags); 61868c2ecf20Sopenharmony_ci if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { 61878c2ecf20Sopenharmony_ci if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) 61888c2ecf20Sopenharmony_ci return -EINVAL; 61898c2ecf20Sopenharmony_ci if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) 61908c2ecf20Sopenharmony_ci tr->ltimeout = true; 61918c2ecf20Sopenharmony_ci if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) 61928c2ecf20Sopenharmony_ci return -EINVAL; 61938c2ecf20Sopenharmony_ci if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) 61948c2ecf20Sopenharmony_ci return -EFAULT; 61958c2ecf20Sopenharmony_ci } else if (tr->flags) { 61968c2ecf20Sopenharmony_ci /* timeout removal doesn't support flags */ 61978c2ecf20Sopenharmony_ci return -EINVAL; 61988c2ecf20Sopenharmony_ci } 61998c2ecf20Sopenharmony_ci 62008c2ecf20Sopenharmony_ci return 0; 62018c2ecf20Sopenharmony_ci} 62028c2ecf20Sopenharmony_ci 62038c2ecf20Sopenharmony_cistatic inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) 62048c2ecf20Sopenharmony_ci{ 62058c2ecf20Sopenharmony_ci return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS 62068c2ecf20Sopenharmony_ci : HRTIMER_MODE_REL; 62078c2ecf20Sopenharmony_ci} 62088c2ecf20Sopenharmony_ci 62098c2ecf20Sopenharmony_ci/* 62108c2ecf20Sopenharmony_ci * Remove or update an existing timeout command 62118c2ecf20Sopenharmony_ci */ 62128c2ecf20Sopenharmony_cistatic int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) 62138c2ecf20Sopenharmony_ci{ 62148c2ecf20Sopenharmony_ci struct io_timeout_rem *tr = &req->timeout_rem; 62158c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 62168c2ecf20Sopenharmony_ci int ret; 62178c2ecf20Sopenharmony_ci 62188c2ecf20Sopenharmony_ci if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) { 62198c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 62208c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 62218c2ecf20Sopenharmony_ci ret = io_timeout_cancel(ctx, tr->addr); 62228c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 62238c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 62248c2ecf20Sopenharmony_ci } else { 62258c2ecf20Sopenharmony_ci enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); 62268c2ecf20Sopenharmony_ci 62278c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 62288c2ecf20Sopenharmony_ci if (tr->ltimeout) 62298c2ecf20Sopenharmony_ci ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); 62308c2ecf20Sopenharmony_ci else 62318c2ecf20Sopenharmony_ci ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); 62328c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 62338c2ecf20Sopenharmony_ci } 62348c2ecf20Sopenharmony_ci 62358c2ecf20Sopenharmony_ci if (ret < 0) 62368c2ecf20Sopenharmony_ci req_set_fail(req); 62378c2ecf20Sopenharmony_ci io_req_complete_post(req, ret, 0); 62388c2ecf20Sopenharmony_ci return 0; 62398c2ecf20Sopenharmony_ci} 62408c2ecf20Sopenharmony_ci 62418c2ecf20Sopenharmony_cistatic int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, 62428c2ecf20Sopenharmony_ci bool is_timeout_link) 62438c2ecf20Sopenharmony_ci{ 62448c2ecf20Sopenharmony_ci struct io_timeout_data *data; 62458c2ecf20Sopenharmony_ci unsigned flags; 62468c2ecf20Sopenharmony_ci u32 off = READ_ONCE(sqe->off); 62478c2ecf20Sopenharmony_ci 62488c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 62498c2ecf20Sopenharmony_ci return -EINVAL; 62508c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->buf_index || sqe->len != 1 || 62518c2ecf20Sopenharmony_ci sqe->splice_fd_in) 62528c2ecf20Sopenharmony_ci return -EINVAL; 62538c2ecf20Sopenharmony_ci if (off && is_timeout_link) 62548c2ecf20Sopenharmony_ci return -EINVAL; 62558c2ecf20Sopenharmony_ci flags = READ_ONCE(sqe->timeout_flags); 62568c2ecf20Sopenharmony_ci if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK)) 62578c2ecf20Sopenharmony_ci return -EINVAL; 62588c2ecf20Sopenharmony_ci /* more than one clock specified is invalid, obviously */ 62598c2ecf20Sopenharmony_ci if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) 62608c2ecf20Sopenharmony_ci return -EINVAL; 62618c2ecf20Sopenharmony_ci 62628c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&req->timeout.list); 62638c2ecf20Sopenharmony_ci req->timeout.off = off; 62648c2ecf20Sopenharmony_ci if (unlikely(off && !req->ctx->off_timeout_used)) 62658c2ecf20Sopenharmony_ci req->ctx->off_timeout_used = true; 62668c2ecf20Sopenharmony_ci 62678c2ecf20Sopenharmony_ci if (!req->async_data && io_alloc_async_data(req)) 62688c2ecf20Sopenharmony_ci return -ENOMEM; 62698c2ecf20Sopenharmony_ci 62708c2ecf20Sopenharmony_ci data = req->async_data; 62718c2ecf20Sopenharmony_ci data->req = req; 62728c2ecf20Sopenharmony_ci data->flags = flags; 62738c2ecf20Sopenharmony_ci 62748c2ecf20Sopenharmony_ci if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) 62758c2ecf20Sopenharmony_ci return -EFAULT; 62768c2ecf20Sopenharmony_ci 62778c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&req->timeout.list); 62788c2ecf20Sopenharmony_ci data->mode = io_translate_timeout_mode(flags); 62798c2ecf20Sopenharmony_ci hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); 62808c2ecf20Sopenharmony_ci 62818c2ecf20Sopenharmony_ci if (is_timeout_link) { 62828c2ecf20Sopenharmony_ci struct io_submit_link *link = &req->ctx->submit_state.link; 62838c2ecf20Sopenharmony_ci 62848c2ecf20Sopenharmony_ci if (!link->head) 62858c2ecf20Sopenharmony_ci return -EINVAL; 62868c2ecf20Sopenharmony_ci if (link->last->opcode == IORING_OP_LINK_TIMEOUT) 62878c2ecf20Sopenharmony_ci return -EINVAL; 62888c2ecf20Sopenharmony_ci req->timeout.head = link->last; 62898c2ecf20Sopenharmony_ci link->last->flags |= REQ_F_ARM_LTIMEOUT; 62908c2ecf20Sopenharmony_ci } 62918c2ecf20Sopenharmony_ci return 0; 62928c2ecf20Sopenharmony_ci} 62938c2ecf20Sopenharmony_ci 62948c2ecf20Sopenharmony_cistatic int io_timeout(struct io_kiocb *req, unsigned int issue_flags) 62958c2ecf20Sopenharmony_ci{ 62968c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 62978c2ecf20Sopenharmony_ci struct io_timeout_data *data = req->async_data; 62988c2ecf20Sopenharmony_ci struct list_head *entry; 62998c2ecf20Sopenharmony_ci u32 tail, off = req->timeout.off; 63008c2ecf20Sopenharmony_ci 63018c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 63028c2ecf20Sopenharmony_ci 63038c2ecf20Sopenharmony_ci /* 63048c2ecf20Sopenharmony_ci * sqe->off holds how many events that need to occur for this 63058c2ecf20Sopenharmony_ci * timeout event to be satisfied. If it isn't set, then this is 63068c2ecf20Sopenharmony_ci * a pure timeout request, sequence isn't used. 63078c2ecf20Sopenharmony_ci */ 63088c2ecf20Sopenharmony_ci if (io_is_timeout_noseq(req)) { 63098c2ecf20Sopenharmony_ci entry = ctx->timeout_list.prev; 63108c2ecf20Sopenharmony_ci goto add; 63118c2ecf20Sopenharmony_ci } 63128c2ecf20Sopenharmony_ci 63138c2ecf20Sopenharmony_ci tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); 63148c2ecf20Sopenharmony_ci req->timeout.target_seq = tail + off; 63158c2ecf20Sopenharmony_ci 63168c2ecf20Sopenharmony_ci /* Update the last seq here in case io_flush_timeouts() hasn't. 63178c2ecf20Sopenharmony_ci * This is safe because ->completion_lock is held, and submissions 63188c2ecf20Sopenharmony_ci * and completions are never mixed in the same ->completion_lock section. 63198c2ecf20Sopenharmony_ci */ 63208c2ecf20Sopenharmony_ci ctx->cq_last_tm_flush = tail; 63218c2ecf20Sopenharmony_ci 63228c2ecf20Sopenharmony_ci /* 63238c2ecf20Sopenharmony_ci * Insertion sort, ensuring the first entry in the list is always 63248c2ecf20Sopenharmony_ci * the one we need first. 63258c2ecf20Sopenharmony_ci */ 63268c2ecf20Sopenharmony_ci list_for_each_prev(entry, &ctx->timeout_list) { 63278c2ecf20Sopenharmony_ci struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, 63288c2ecf20Sopenharmony_ci timeout.list); 63298c2ecf20Sopenharmony_ci 63308c2ecf20Sopenharmony_ci if (io_is_timeout_noseq(nxt)) 63318c2ecf20Sopenharmony_ci continue; 63328c2ecf20Sopenharmony_ci /* nxt.seq is behind @tail, otherwise would've been completed */ 63338c2ecf20Sopenharmony_ci if (off >= nxt->timeout.target_seq - tail) 63348c2ecf20Sopenharmony_ci break; 63358c2ecf20Sopenharmony_ci } 63368c2ecf20Sopenharmony_ciadd: 63378c2ecf20Sopenharmony_ci list_add(&req->timeout.list, entry); 63388c2ecf20Sopenharmony_ci data->timer.function = io_timeout_fn; 63398c2ecf20Sopenharmony_ci hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); 63408c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 63418c2ecf20Sopenharmony_ci return 0; 63428c2ecf20Sopenharmony_ci} 63438c2ecf20Sopenharmony_ci 63448c2ecf20Sopenharmony_cistruct io_cancel_data { 63458c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 63468c2ecf20Sopenharmony_ci u64 user_data; 63478c2ecf20Sopenharmony_ci}; 63488c2ecf20Sopenharmony_ci 63498c2ecf20Sopenharmony_cistatic bool io_cancel_cb(struct io_wq_work *work, void *data) 63508c2ecf20Sopenharmony_ci{ 63518c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(work, struct io_kiocb, work); 63528c2ecf20Sopenharmony_ci struct io_cancel_data *cd = data; 63538c2ecf20Sopenharmony_ci 63548c2ecf20Sopenharmony_ci return req->ctx == cd->ctx && req->user_data == cd->user_data; 63558c2ecf20Sopenharmony_ci} 63568c2ecf20Sopenharmony_ci 63578c2ecf20Sopenharmony_cistatic int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data, 63588c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx) 63598c2ecf20Sopenharmony_ci{ 63608c2ecf20Sopenharmony_ci struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, }; 63618c2ecf20Sopenharmony_ci enum io_wq_cancel cancel_ret; 63628c2ecf20Sopenharmony_ci int ret = 0; 63638c2ecf20Sopenharmony_ci 63648c2ecf20Sopenharmony_ci if (!tctx || !tctx->io_wq) 63658c2ecf20Sopenharmony_ci return -ENOENT; 63668c2ecf20Sopenharmony_ci 63678c2ecf20Sopenharmony_ci cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false); 63688c2ecf20Sopenharmony_ci switch (cancel_ret) { 63698c2ecf20Sopenharmony_ci case IO_WQ_CANCEL_OK: 63708c2ecf20Sopenharmony_ci ret = 0; 63718c2ecf20Sopenharmony_ci break; 63728c2ecf20Sopenharmony_ci case IO_WQ_CANCEL_RUNNING: 63738c2ecf20Sopenharmony_ci ret = -EALREADY; 63748c2ecf20Sopenharmony_ci break; 63758c2ecf20Sopenharmony_ci case IO_WQ_CANCEL_NOTFOUND: 63768c2ecf20Sopenharmony_ci ret = -ENOENT; 63778c2ecf20Sopenharmony_ci break; 63788c2ecf20Sopenharmony_ci } 63798c2ecf20Sopenharmony_ci 63808c2ecf20Sopenharmony_ci return ret; 63818c2ecf20Sopenharmony_ci} 63828c2ecf20Sopenharmony_ci 63838c2ecf20Sopenharmony_cistatic int io_try_cancel_userdata(struct io_kiocb *req, u64 sqe_addr) 63848c2ecf20Sopenharmony_ci{ 63858c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 63868c2ecf20Sopenharmony_ci int ret; 63878c2ecf20Sopenharmony_ci 63888c2ecf20Sopenharmony_ci WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); 63898c2ecf20Sopenharmony_ci 63908c2ecf20Sopenharmony_ci ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx); 63918c2ecf20Sopenharmony_ci if (ret != -ENOENT) 63928c2ecf20Sopenharmony_ci return ret; 63938c2ecf20Sopenharmony_ci 63948c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 63958c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 63968c2ecf20Sopenharmony_ci ret = io_timeout_cancel(ctx, sqe_addr); 63978c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 63988c2ecf20Sopenharmony_ci if (ret != -ENOENT) 63998c2ecf20Sopenharmony_ci goto out; 64008c2ecf20Sopenharmony_ci ret = io_poll_cancel(ctx, sqe_addr, false); 64018c2ecf20Sopenharmony_ciout: 64028c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 64038c2ecf20Sopenharmony_ci return ret; 64048c2ecf20Sopenharmony_ci} 64058c2ecf20Sopenharmony_ci 64068c2ecf20Sopenharmony_cistatic int io_async_cancel_prep(struct io_kiocb *req, 64078c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 64088c2ecf20Sopenharmony_ci{ 64098c2ecf20Sopenharmony_ci if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) 64108c2ecf20Sopenharmony_ci return -EINVAL; 64118c2ecf20Sopenharmony_ci if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) 64128c2ecf20Sopenharmony_ci return -EINVAL; 64138c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->off || sqe->len || sqe->cancel_flags || 64148c2ecf20Sopenharmony_ci sqe->splice_fd_in) 64158c2ecf20Sopenharmony_ci return -EINVAL; 64168c2ecf20Sopenharmony_ci 64178c2ecf20Sopenharmony_ci req->cancel.addr = READ_ONCE(sqe->addr); 64188c2ecf20Sopenharmony_ci return 0; 64198c2ecf20Sopenharmony_ci} 64208c2ecf20Sopenharmony_ci 64218c2ecf20Sopenharmony_cistatic int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) 64228c2ecf20Sopenharmony_ci{ 64238c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 64248c2ecf20Sopenharmony_ci u64 sqe_addr = req->cancel.addr; 64258c2ecf20Sopenharmony_ci struct io_tctx_node *node; 64268c2ecf20Sopenharmony_ci int ret; 64278c2ecf20Sopenharmony_ci 64288c2ecf20Sopenharmony_ci ret = io_try_cancel_userdata(req, sqe_addr); 64298c2ecf20Sopenharmony_ci if (ret != -ENOENT) 64308c2ecf20Sopenharmony_ci goto done; 64318c2ecf20Sopenharmony_ci 64328c2ecf20Sopenharmony_ci /* slow path, try all io-wq's */ 64338c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 64348c2ecf20Sopenharmony_ci ret = -ENOENT; 64358c2ecf20Sopenharmony_ci list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 64368c2ecf20Sopenharmony_ci struct io_uring_task *tctx = node->task->io_uring; 64378c2ecf20Sopenharmony_ci 64388c2ecf20Sopenharmony_ci ret = io_async_cancel_one(tctx, req->cancel.addr, ctx); 64398c2ecf20Sopenharmony_ci if (ret != -ENOENT) 64408c2ecf20Sopenharmony_ci break; 64418c2ecf20Sopenharmony_ci } 64428c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 64438c2ecf20Sopenharmony_cidone: 64448c2ecf20Sopenharmony_ci if (ret < 0) 64458c2ecf20Sopenharmony_ci req_set_fail(req); 64468c2ecf20Sopenharmony_ci io_req_complete_post(req, ret, 0); 64478c2ecf20Sopenharmony_ci return 0; 64488c2ecf20Sopenharmony_ci} 64498c2ecf20Sopenharmony_ci 64508c2ecf20Sopenharmony_cistatic int io_rsrc_update_prep(struct io_kiocb *req, 64518c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 64528c2ecf20Sopenharmony_ci{ 64538c2ecf20Sopenharmony_ci if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) 64548c2ecf20Sopenharmony_ci return -EINVAL; 64558c2ecf20Sopenharmony_ci if (sqe->ioprio || sqe->rw_flags || sqe->splice_fd_in) 64568c2ecf20Sopenharmony_ci return -EINVAL; 64578c2ecf20Sopenharmony_ci 64588c2ecf20Sopenharmony_ci req->rsrc_update.offset = READ_ONCE(sqe->off); 64598c2ecf20Sopenharmony_ci req->rsrc_update.nr_args = READ_ONCE(sqe->len); 64608c2ecf20Sopenharmony_ci if (!req->rsrc_update.nr_args) 64618c2ecf20Sopenharmony_ci return -EINVAL; 64628c2ecf20Sopenharmony_ci req->rsrc_update.arg = READ_ONCE(sqe->addr); 64638c2ecf20Sopenharmony_ci return 0; 64648c2ecf20Sopenharmony_ci} 64658c2ecf20Sopenharmony_ci 64668c2ecf20Sopenharmony_cistatic int io_files_update(struct io_kiocb *req, unsigned int issue_flags) 64678c2ecf20Sopenharmony_ci{ 64688c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 64698c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 up; 64708c2ecf20Sopenharmony_ci int ret; 64718c2ecf20Sopenharmony_ci 64728c2ecf20Sopenharmony_ci up.offset = req->rsrc_update.offset; 64738c2ecf20Sopenharmony_ci up.data = req->rsrc_update.arg; 64748c2ecf20Sopenharmony_ci up.nr = 0; 64758c2ecf20Sopenharmony_ci up.tags = 0; 64768c2ecf20Sopenharmony_ci up.resv = 0; 64778c2ecf20Sopenharmony_ci up.resv2 = 0; 64788c2ecf20Sopenharmony_ci 64798c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 64808c2ecf20Sopenharmony_ci ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 64818c2ecf20Sopenharmony_ci &up, req->rsrc_update.nr_args); 64828c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 64838c2ecf20Sopenharmony_ci 64848c2ecf20Sopenharmony_ci if (ret < 0) 64858c2ecf20Sopenharmony_ci req_set_fail(req); 64868c2ecf20Sopenharmony_ci __io_req_complete(req, issue_flags, ret, 0); 64878c2ecf20Sopenharmony_ci return 0; 64888c2ecf20Sopenharmony_ci} 64898c2ecf20Sopenharmony_ci 64908c2ecf20Sopenharmony_cistatic int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 64918c2ecf20Sopenharmony_ci{ 64928c2ecf20Sopenharmony_ci switch (req->opcode) { 64938c2ecf20Sopenharmony_ci case IORING_OP_NOP: 64948c2ecf20Sopenharmony_ci return 0; 64958c2ecf20Sopenharmony_ci case IORING_OP_READV: 64968c2ecf20Sopenharmony_ci case IORING_OP_READ_FIXED: 64978c2ecf20Sopenharmony_ci case IORING_OP_READ: 64988c2ecf20Sopenharmony_ci return io_read_prep(req, sqe); 64998c2ecf20Sopenharmony_ci case IORING_OP_WRITEV: 65008c2ecf20Sopenharmony_ci case IORING_OP_WRITE_FIXED: 65018c2ecf20Sopenharmony_ci case IORING_OP_WRITE: 65028c2ecf20Sopenharmony_ci return io_write_prep(req, sqe); 65038c2ecf20Sopenharmony_ci case IORING_OP_POLL_ADD: 65048c2ecf20Sopenharmony_ci return io_poll_add_prep(req, sqe); 65058c2ecf20Sopenharmony_ci case IORING_OP_POLL_REMOVE: 65068c2ecf20Sopenharmony_ci return io_poll_update_prep(req, sqe); 65078c2ecf20Sopenharmony_ci case IORING_OP_FSYNC: 65088c2ecf20Sopenharmony_ci return io_fsync_prep(req, sqe); 65098c2ecf20Sopenharmony_ci case IORING_OP_SYNC_FILE_RANGE: 65108c2ecf20Sopenharmony_ci return io_sfr_prep(req, sqe); 65118c2ecf20Sopenharmony_ci case IORING_OP_SENDMSG: 65128c2ecf20Sopenharmony_ci case IORING_OP_SEND: 65138c2ecf20Sopenharmony_ci return io_sendmsg_prep(req, sqe); 65148c2ecf20Sopenharmony_ci case IORING_OP_RECVMSG: 65158c2ecf20Sopenharmony_ci case IORING_OP_RECV: 65168c2ecf20Sopenharmony_ci return io_recvmsg_prep(req, sqe); 65178c2ecf20Sopenharmony_ci case IORING_OP_CONNECT: 65188c2ecf20Sopenharmony_ci return io_connect_prep(req, sqe); 65198c2ecf20Sopenharmony_ci case IORING_OP_TIMEOUT: 65208c2ecf20Sopenharmony_ci return io_timeout_prep(req, sqe, false); 65218c2ecf20Sopenharmony_ci case IORING_OP_TIMEOUT_REMOVE: 65228c2ecf20Sopenharmony_ci return io_timeout_remove_prep(req, sqe); 65238c2ecf20Sopenharmony_ci case IORING_OP_ASYNC_CANCEL: 65248c2ecf20Sopenharmony_ci return io_async_cancel_prep(req, sqe); 65258c2ecf20Sopenharmony_ci case IORING_OP_LINK_TIMEOUT: 65268c2ecf20Sopenharmony_ci return io_timeout_prep(req, sqe, true); 65278c2ecf20Sopenharmony_ci case IORING_OP_ACCEPT: 65288c2ecf20Sopenharmony_ci return io_accept_prep(req, sqe); 65298c2ecf20Sopenharmony_ci case IORING_OP_FALLOCATE: 65308c2ecf20Sopenharmony_ci return io_fallocate_prep(req, sqe); 65318c2ecf20Sopenharmony_ci case IORING_OP_OPENAT: 65328c2ecf20Sopenharmony_ci return io_openat_prep(req, sqe); 65338c2ecf20Sopenharmony_ci case IORING_OP_CLOSE: 65348c2ecf20Sopenharmony_ci return io_close_prep(req, sqe); 65358c2ecf20Sopenharmony_ci case IORING_OP_FILES_UPDATE: 65368c2ecf20Sopenharmony_ci return io_rsrc_update_prep(req, sqe); 65378c2ecf20Sopenharmony_ci case IORING_OP_STATX: 65388c2ecf20Sopenharmony_ci return io_statx_prep(req, sqe); 65398c2ecf20Sopenharmony_ci case IORING_OP_FADVISE: 65408c2ecf20Sopenharmony_ci return io_fadvise_prep(req, sqe); 65418c2ecf20Sopenharmony_ci case IORING_OP_MADVISE: 65428c2ecf20Sopenharmony_ci return io_madvise_prep(req, sqe); 65438c2ecf20Sopenharmony_ci case IORING_OP_OPENAT2: 65448c2ecf20Sopenharmony_ci return io_openat2_prep(req, sqe); 65458c2ecf20Sopenharmony_ci case IORING_OP_EPOLL_CTL: 65468c2ecf20Sopenharmony_ci return io_epoll_ctl_prep(req, sqe); 65478c2ecf20Sopenharmony_ci case IORING_OP_SPLICE: 65488c2ecf20Sopenharmony_ci return io_splice_prep(req, sqe); 65498c2ecf20Sopenharmony_ci case IORING_OP_PROVIDE_BUFFERS: 65508c2ecf20Sopenharmony_ci return io_provide_buffers_prep(req, sqe); 65518c2ecf20Sopenharmony_ci case IORING_OP_REMOVE_BUFFERS: 65528c2ecf20Sopenharmony_ci return io_remove_buffers_prep(req, sqe); 65538c2ecf20Sopenharmony_ci case IORING_OP_TEE: 65548c2ecf20Sopenharmony_ci return io_tee_prep(req, sqe); 65558c2ecf20Sopenharmony_ci case IORING_OP_SHUTDOWN: 65568c2ecf20Sopenharmony_ci return io_shutdown_prep(req, sqe); 65578c2ecf20Sopenharmony_ci case IORING_OP_RENAMEAT: 65588c2ecf20Sopenharmony_ci return io_renameat_prep(req, sqe); 65598c2ecf20Sopenharmony_ci case IORING_OP_UNLINKAT: 65608c2ecf20Sopenharmony_ci return io_unlinkat_prep(req, sqe); 65618c2ecf20Sopenharmony_ci } 65628c2ecf20Sopenharmony_ci 65638c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", 65648c2ecf20Sopenharmony_ci req->opcode); 65658c2ecf20Sopenharmony_ci return -EINVAL; 65668c2ecf20Sopenharmony_ci} 65678c2ecf20Sopenharmony_ci 65688c2ecf20Sopenharmony_cistatic int io_req_prep_async(struct io_kiocb *req) 65698c2ecf20Sopenharmony_ci{ 65708c2ecf20Sopenharmony_ci if (!io_op_defs[req->opcode].needs_async_setup) 65718c2ecf20Sopenharmony_ci return 0; 65728c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(req->async_data)) 65738c2ecf20Sopenharmony_ci return -EFAULT; 65748c2ecf20Sopenharmony_ci if (io_alloc_async_data(req)) 65758c2ecf20Sopenharmony_ci return -EAGAIN; 65768c2ecf20Sopenharmony_ci 65778c2ecf20Sopenharmony_ci switch (req->opcode) { 65788c2ecf20Sopenharmony_ci case IORING_OP_READV: 65798c2ecf20Sopenharmony_ci return io_rw_prep_async(req, READ); 65808c2ecf20Sopenharmony_ci case IORING_OP_WRITEV: 65818c2ecf20Sopenharmony_ci return io_rw_prep_async(req, WRITE); 65828c2ecf20Sopenharmony_ci case IORING_OP_SENDMSG: 65838c2ecf20Sopenharmony_ci return io_sendmsg_prep_async(req); 65848c2ecf20Sopenharmony_ci case IORING_OP_RECVMSG: 65858c2ecf20Sopenharmony_ci return io_recvmsg_prep_async(req); 65868c2ecf20Sopenharmony_ci case IORING_OP_CONNECT: 65878c2ecf20Sopenharmony_ci return io_connect_prep_async(req); 65888c2ecf20Sopenharmony_ci } 65898c2ecf20Sopenharmony_ci printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n", 65908c2ecf20Sopenharmony_ci req->opcode); 65918c2ecf20Sopenharmony_ci return -EFAULT; 65928c2ecf20Sopenharmony_ci} 65938c2ecf20Sopenharmony_ci 65948c2ecf20Sopenharmony_cistatic u32 io_get_sequence(struct io_kiocb *req) 65958c2ecf20Sopenharmony_ci{ 65968c2ecf20Sopenharmony_ci u32 seq = req->ctx->cached_sq_head; 65978c2ecf20Sopenharmony_ci 65988c2ecf20Sopenharmony_ci /* need original cached_sq_head, but it was increased for each req */ 65998c2ecf20Sopenharmony_ci io_for_each_link(req, req) 66008c2ecf20Sopenharmony_ci seq--; 66018c2ecf20Sopenharmony_ci return seq; 66028c2ecf20Sopenharmony_ci} 66038c2ecf20Sopenharmony_ci 66048c2ecf20Sopenharmony_cistatic bool io_drain_req(struct io_kiocb *req) 66058c2ecf20Sopenharmony_ci{ 66068c2ecf20Sopenharmony_ci struct io_kiocb *pos; 66078c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 66088c2ecf20Sopenharmony_ci struct io_defer_entry *de; 66098c2ecf20Sopenharmony_ci int ret; 66108c2ecf20Sopenharmony_ci u32 seq; 66118c2ecf20Sopenharmony_ci 66128c2ecf20Sopenharmony_ci if (req->flags & REQ_F_FAIL) { 66138c2ecf20Sopenharmony_ci io_req_complete_fail_submit(req); 66148c2ecf20Sopenharmony_ci return true; 66158c2ecf20Sopenharmony_ci } 66168c2ecf20Sopenharmony_ci 66178c2ecf20Sopenharmony_ci /* 66188c2ecf20Sopenharmony_ci * If we need to drain a request in the middle of a link, drain the 66198c2ecf20Sopenharmony_ci * head request and the next request/link after the current link. 66208c2ecf20Sopenharmony_ci * Considering sequential execution of links, IOSQE_IO_DRAIN will be 66218c2ecf20Sopenharmony_ci * maintained for every request of our link. 66228c2ecf20Sopenharmony_ci */ 66238c2ecf20Sopenharmony_ci if (ctx->drain_next) { 66248c2ecf20Sopenharmony_ci req->flags |= REQ_F_IO_DRAIN; 66258c2ecf20Sopenharmony_ci ctx->drain_next = false; 66268c2ecf20Sopenharmony_ci } 66278c2ecf20Sopenharmony_ci /* not interested in head, start from the first linked */ 66288c2ecf20Sopenharmony_ci io_for_each_link(pos, req->link) { 66298c2ecf20Sopenharmony_ci if (pos->flags & REQ_F_IO_DRAIN) { 66308c2ecf20Sopenharmony_ci ctx->drain_next = true; 66318c2ecf20Sopenharmony_ci req->flags |= REQ_F_IO_DRAIN; 66328c2ecf20Sopenharmony_ci break; 66338c2ecf20Sopenharmony_ci } 66348c2ecf20Sopenharmony_ci } 66358c2ecf20Sopenharmony_ci 66368c2ecf20Sopenharmony_ci /* Still need defer if there is pending req in defer list. */ 66378c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 66388c2ecf20Sopenharmony_ci if (likely(list_empty_careful(&ctx->defer_list) && 66398c2ecf20Sopenharmony_ci !(req->flags & REQ_F_IO_DRAIN))) { 66408c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 66418c2ecf20Sopenharmony_ci ctx->drain_active = false; 66428c2ecf20Sopenharmony_ci return false; 66438c2ecf20Sopenharmony_ci } 66448c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 66458c2ecf20Sopenharmony_ci 66468c2ecf20Sopenharmony_ci seq = io_get_sequence(req); 66478c2ecf20Sopenharmony_ci /* Still a chance to pass the sequence check */ 66488c2ecf20Sopenharmony_ci if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) 66498c2ecf20Sopenharmony_ci return false; 66508c2ecf20Sopenharmony_ci 66518c2ecf20Sopenharmony_ci ret = io_req_prep_async(req); 66528c2ecf20Sopenharmony_ci if (ret) 66538c2ecf20Sopenharmony_ci goto fail; 66548c2ecf20Sopenharmony_ci io_prep_async_link(req); 66558c2ecf20Sopenharmony_ci de = kmalloc(sizeof(*de), GFP_KERNEL); 66568c2ecf20Sopenharmony_ci if (!de) { 66578c2ecf20Sopenharmony_ci ret = -ENOMEM; 66588c2ecf20Sopenharmony_cifail: 66598c2ecf20Sopenharmony_ci io_req_complete_failed(req, ret); 66608c2ecf20Sopenharmony_ci return true; 66618c2ecf20Sopenharmony_ci } 66628c2ecf20Sopenharmony_ci 66638c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 66648c2ecf20Sopenharmony_ci if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { 66658c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 66668c2ecf20Sopenharmony_ci kfree(de); 66678c2ecf20Sopenharmony_ci io_queue_async_work(req, NULL); 66688c2ecf20Sopenharmony_ci return true; 66698c2ecf20Sopenharmony_ci } 66708c2ecf20Sopenharmony_ci 66718c2ecf20Sopenharmony_ci trace_io_uring_defer(ctx, req, req->user_data); 66728c2ecf20Sopenharmony_ci de->req = req; 66738c2ecf20Sopenharmony_ci de->seq = seq; 66748c2ecf20Sopenharmony_ci list_add_tail(&de->list, &ctx->defer_list); 66758c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 66768c2ecf20Sopenharmony_ci return true; 66778c2ecf20Sopenharmony_ci} 66788c2ecf20Sopenharmony_ci 66798c2ecf20Sopenharmony_cistatic void io_clean_op(struct io_kiocb *req) 66808c2ecf20Sopenharmony_ci{ 66818c2ecf20Sopenharmony_ci if (req->flags & REQ_F_BUFFER_SELECTED) { 66828c2ecf20Sopenharmony_ci switch (req->opcode) { 66838c2ecf20Sopenharmony_ci case IORING_OP_READV: 66848c2ecf20Sopenharmony_ci case IORING_OP_READ_FIXED: 66858c2ecf20Sopenharmony_ci case IORING_OP_READ: 66868c2ecf20Sopenharmony_ci kfree((void *)(unsigned long)req->rw.addr); 66878c2ecf20Sopenharmony_ci break; 66888c2ecf20Sopenharmony_ci case IORING_OP_RECVMSG: 66898c2ecf20Sopenharmony_ci case IORING_OP_RECV: 66908c2ecf20Sopenharmony_ci kfree(req->sr_msg.kbuf); 66918c2ecf20Sopenharmony_ci break; 66928c2ecf20Sopenharmony_ci } 66938c2ecf20Sopenharmony_ci } 66948c2ecf20Sopenharmony_ci 66958c2ecf20Sopenharmony_ci if (req->flags & REQ_F_NEED_CLEANUP) { 66968c2ecf20Sopenharmony_ci switch (req->opcode) { 66978c2ecf20Sopenharmony_ci case IORING_OP_READV: 66988c2ecf20Sopenharmony_ci case IORING_OP_READ_FIXED: 66998c2ecf20Sopenharmony_ci case IORING_OP_READ: 67008c2ecf20Sopenharmony_ci case IORING_OP_WRITEV: 67018c2ecf20Sopenharmony_ci case IORING_OP_WRITE_FIXED: 67028c2ecf20Sopenharmony_ci case IORING_OP_WRITE: { 67038c2ecf20Sopenharmony_ci struct io_async_rw *io = req->async_data; 67048c2ecf20Sopenharmony_ci 67058c2ecf20Sopenharmony_ci kfree(io->free_iovec); 67068c2ecf20Sopenharmony_ci break; 67078c2ecf20Sopenharmony_ci } 67088c2ecf20Sopenharmony_ci case IORING_OP_RECVMSG: 67098c2ecf20Sopenharmony_ci case IORING_OP_SENDMSG: { 67108c2ecf20Sopenharmony_ci struct io_async_msghdr *io = req->async_data; 67118c2ecf20Sopenharmony_ci 67128c2ecf20Sopenharmony_ci kfree(io->free_iov); 67138c2ecf20Sopenharmony_ci break; 67148c2ecf20Sopenharmony_ci } 67158c2ecf20Sopenharmony_ci case IORING_OP_OPENAT: 67168c2ecf20Sopenharmony_ci case IORING_OP_OPENAT2: 67178c2ecf20Sopenharmony_ci if (req->open.filename) 67188c2ecf20Sopenharmony_ci putname(req->open.filename); 67198c2ecf20Sopenharmony_ci break; 67208c2ecf20Sopenharmony_ci case IORING_OP_RENAMEAT: 67218c2ecf20Sopenharmony_ci putname(req->rename.oldpath); 67228c2ecf20Sopenharmony_ci putname(req->rename.newpath); 67238c2ecf20Sopenharmony_ci break; 67248c2ecf20Sopenharmony_ci case IORING_OP_UNLINKAT: 67258c2ecf20Sopenharmony_ci putname(req->unlink.filename); 67268c2ecf20Sopenharmony_ci break; 67278c2ecf20Sopenharmony_ci } 67288c2ecf20Sopenharmony_ci } 67298c2ecf20Sopenharmony_ci if ((req->flags & REQ_F_POLLED) && req->apoll) { 67308c2ecf20Sopenharmony_ci kfree(req->apoll->double_poll); 67318c2ecf20Sopenharmony_ci kfree(req->apoll); 67328c2ecf20Sopenharmony_ci req->apoll = NULL; 67338c2ecf20Sopenharmony_ci } 67348c2ecf20Sopenharmony_ci if (req->flags & REQ_F_INFLIGHT) { 67358c2ecf20Sopenharmony_ci struct io_uring_task *tctx = req->task->io_uring; 67368c2ecf20Sopenharmony_ci 67378c2ecf20Sopenharmony_ci atomic_dec(&tctx->inflight_tracked); 67388c2ecf20Sopenharmony_ci } 67398c2ecf20Sopenharmony_ci if (req->flags & REQ_F_CREDS) 67408c2ecf20Sopenharmony_ci put_cred(req->creds); 67418c2ecf20Sopenharmony_ci 67428c2ecf20Sopenharmony_ci req->flags &= ~IO_REQ_CLEAN_FLAGS; 67438c2ecf20Sopenharmony_ci} 67448c2ecf20Sopenharmony_ci 67458c2ecf20Sopenharmony_cistatic int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) 67468c2ecf20Sopenharmony_ci{ 67478c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 67488c2ecf20Sopenharmony_ci const struct cred *creds = NULL; 67498c2ecf20Sopenharmony_ci int ret; 67508c2ecf20Sopenharmony_ci 67518c2ecf20Sopenharmony_ci if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) 67528c2ecf20Sopenharmony_ci creds = override_creds(req->creds); 67538c2ecf20Sopenharmony_ci 67548c2ecf20Sopenharmony_ci switch (req->opcode) { 67558c2ecf20Sopenharmony_ci case IORING_OP_NOP: 67568c2ecf20Sopenharmony_ci ret = io_nop(req, issue_flags); 67578c2ecf20Sopenharmony_ci break; 67588c2ecf20Sopenharmony_ci case IORING_OP_READV: 67598c2ecf20Sopenharmony_ci case IORING_OP_READ_FIXED: 67608c2ecf20Sopenharmony_ci case IORING_OP_READ: 67618c2ecf20Sopenharmony_ci ret = io_read(req, issue_flags); 67628c2ecf20Sopenharmony_ci break; 67638c2ecf20Sopenharmony_ci case IORING_OP_WRITEV: 67648c2ecf20Sopenharmony_ci case IORING_OP_WRITE_FIXED: 67658c2ecf20Sopenharmony_ci case IORING_OP_WRITE: 67668c2ecf20Sopenharmony_ci ret = io_write(req, issue_flags); 67678c2ecf20Sopenharmony_ci break; 67688c2ecf20Sopenharmony_ci case IORING_OP_FSYNC: 67698c2ecf20Sopenharmony_ci ret = io_fsync(req, issue_flags); 67708c2ecf20Sopenharmony_ci break; 67718c2ecf20Sopenharmony_ci case IORING_OP_POLL_ADD: 67728c2ecf20Sopenharmony_ci ret = io_poll_add(req, issue_flags); 67738c2ecf20Sopenharmony_ci break; 67748c2ecf20Sopenharmony_ci case IORING_OP_POLL_REMOVE: 67758c2ecf20Sopenharmony_ci ret = io_poll_update(req, issue_flags); 67768c2ecf20Sopenharmony_ci break; 67778c2ecf20Sopenharmony_ci case IORING_OP_SYNC_FILE_RANGE: 67788c2ecf20Sopenharmony_ci ret = io_sync_file_range(req, issue_flags); 67798c2ecf20Sopenharmony_ci break; 67808c2ecf20Sopenharmony_ci case IORING_OP_SENDMSG: 67818c2ecf20Sopenharmony_ci ret = io_sendmsg(req, issue_flags); 67828c2ecf20Sopenharmony_ci break; 67838c2ecf20Sopenharmony_ci case IORING_OP_SEND: 67848c2ecf20Sopenharmony_ci ret = io_send(req, issue_flags); 67858c2ecf20Sopenharmony_ci break; 67868c2ecf20Sopenharmony_ci case IORING_OP_RECVMSG: 67878c2ecf20Sopenharmony_ci ret = io_recvmsg(req, issue_flags); 67888c2ecf20Sopenharmony_ci break; 67898c2ecf20Sopenharmony_ci case IORING_OP_RECV: 67908c2ecf20Sopenharmony_ci ret = io_recv(req, issue_flags); 67918c2ecf20Sopenharmony_ci break; 67928c2ecf20Sopenharmony_ci case IORING_OP_TIMEOUT: 67938c2ecf20Sopenharmony_ci ret = io_timeout(req, issue_flags); 67948c2ecf20Sopenharmony_ci break; 67958c2ecf20Sopenharmony_ci case IORING_OP_TIMEOUT_REMOVE: 67968c2ecf20Sopenharmony_ci ret = io_timeout_remove(req, issue_flags); 67978c2ecf20Sopenharmony_ci break; 67988c2ecf20Sopenharmony_ci case IORING_OP_ACCEPT: 67998c2ecf20Sopenharmony_ci ret = io_accept(req, issue_flags); 68008c2ecf20Sopenharmony_ci break; 68018c2ecf20Sopenharmony_ci case IORING_OP_CONNECT: 68028c2ecf20Sopenharmony_ci ret = io_connect(req, issue_flags); 68038c2ecf20Sopenharmony_ci break; 68048c2ecf20Sopenharmony_ci case IORING_OP_ASYNC_CANCEL: 68058c2ecf20Sopenharmony_ci ret = io_async_cancel(req, issue_flags); 68068c2ecf20Sopenharmony_ci break; 68078c2ecf20Sopenharmony_ci case IORING_OP_FALLOCATE: 68088c2ecf20Sopenharmony_ci ret = io_fallocate(req, issue_flags); 68098c2ecf20Sopenharmony_ci break; 68108c2ecf20Sopenharmony_ci case IORING_OP_OPENAT: 68118c2ecf20Sopenharmony_ci ret = io_openat(req, issue_flags); 68128c2ecf20Sopenharmony_ci break; 68138c2ecf20Sopenharmony_ci case IORING_OP_CLOSE: 68148c2ecf20Sopenharmony_ci ret = io_close(req, issue_flags); 68158c2ecf20Sopenharmony_ci break; 68168c2ecf20Sopenharmony_ci case IORING_OP_FILES_UPDATE: 68178c2ecf20Sopenharmony_ci ret = io_files_update(req, issue_flags); 68188c2ecf20Sopenharmony_ci break; 68198c2ecf20Sopenharmony_ci case IORING_OP_STATX: 68208c2ecf20Sopenharmony_ci ret = io_statx(req, issue_flags); 68218c2ecf20Sopenharmony_ci break; 68228c2ecf20Sopenharmony_ci case IORING_OP_FADVISE: 68238c2ecf20Sopenharmony_ci ret = io_fadvise(req, issue_flags); 68248c2ecf20Sopenharmony_ci break; 68258c2ecf20Sopenharmony_ci case IORING_OP_MADVISE: 68268c2ecf20Sopenharmony_ci ret = io_madvise(req, issue_flags); 68278c2ecf20Sopenharmony_ci break; 68288c2ecf20Sopenharmony_ci case IORING_OP_OPENAT2: 68298c2ecf20Sopenharmony_ci ret = io_openat2(req, issue_flags); 68308c2ecf20Sopenharmony_ci break; 68318c2ecf20Sopenharmony_ci case IORING_OP_EPOLL_CTL: 68328c2ecf20Sopenharmony_ci ret = io_epoll_ctl(req, issue_flags); 68338c2ecf20Sopenharmony_ci break; 68348c2ecf20Sopenharmony_ci case IORING_OP_SPLICE: 68358c2ecf20Sopenharmony_ci ret = io_splice(req, issue_flags); 68368c2ecf20Sopenharmony_ci break; 68378c2ecf20Sopenharmony_ci case IORING_OP_PROVIDE_BUFFERS: 68388c2ecf20Sopenharmony_ci ret = io_provide_buffers(req, issue_flags); 68398c2ecf20Sopenharmony_ci break; 68408c2ecf20Sopenharmony_ci case IORING_OP_REMOVE_BUFFERS: 68418c2ecf20Sopenharmony_ci ret = io_remove_buffers(req, issue_flags); 68428c2ecf20Sopenharmony_ci break; 68438c2ecf20Sopenharmony_ci case IORING_OP_TEE: 68448c2ecf20Sopenharmony_ci ret = io_tee(req, issue_flags); 68458c2ecf20Sopenharmony_ci break; 68468c2ecf20Sopenharmony_ci case IORING_OP_SHUTDOWN: 68478c2ecf20Sopenharmony_ci ret = io_shutdown(req, issue_flags); 68488c2ecf20Sopenharmony_ci break; 68498c2ecf20Sopenharmony_ci case IORING_OP_RENAMEAT: 68508c2ecf20Sopenharmony_ci ret = io_renameat(req, issue_flags); 68518c2ecf20Sopenharmony_ci break; 68528c2ecf20Sopenharmony_ci case IORING_OP_UNLINKAT: 68538c2ecf20Sopenharmony_ci ret = io_unlinkat(req, issue_flags); 68548c2ecf20Sopenharmony_ci break; 68558c2ecf20Sopenharmony_ci default: 68568c2ecf20Sopenharmony_ci ret = -EINVAL; 68578c2ecf20Sopenharmony_ci break; 68588c2ecf20Sopenharmony_ci } 68598c2ecf20Sopenharmony_ci 68608c2ecf20Sopenharmony_ci if (creds) 68618c2ecf20Sopenharmony_ci revert_creds(creds); 68628c2ecf20Sopenharmony_ci if (ret) 68638c2ecf20Sopenharmony_ci return ret; 68648c2ecf20Sopenharmony_ci /* If the op doesn't have a file, we're not polling for it */ 68658c2ecf20Sopenharmony_ci if ((ctx->flags & IORING_SETUP_IOPOLL) && req->file) 68668c2ecf20Sopenharmony_ci io_iopoll_req_issued(req); 68678c2ecf20Sopenharmony_ci 68688c2ecf20Sopenharmony_ci return 0; 68698c2ecf20Sopenharmony_ci} 68708c2ecf20Sopenharmony_ci 68718c2ecf20Sopenharmony_cistatic struct io_wq_work *io_wq_free_work(struct io_wq_work *work) 68728c2ecf20Sopenharmony_ci{ 68738c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(work, struct io_kiocb, work); 68748c2ecf20Sopenharmony_ci 68758c2ecf20Sopenharmony_ci req = io_put_req_find_next(req); 68768c2ecf20Sopenharmony_ci return req ? &req->work : NULL; 68778c2ecf20Sopenharmony_ci} 68788c2ecf20Sopenharmony_ci 68798c2ecf20Sopenharmony_cistatic void io_wq_submit_work(struct io_wq_work *work) 68808c2ecf20Sopenharmony_ci{ 68818c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(work, struct io_kiocb, work); 68828c2ecf20Sopenharmony_ci struct io_kiocb *timeout; 68838c2ecf20Sopenharmony_ci int ret = 0; 68848c2ecf20Sopenharmony_ci 68858c2ecf20Sopenharmony_ci /* one will be dropped by ->io_free_work() after returning to io-wq */ 68868c2ecf20Sopenharmony_ci if (!(req->flags & REQ_F_REFCOUNT)) 68878c2ecf20Sopenharmony_ci __io_req_set_refcount(req, 2); 68888c2ecf20Sopenharmony_ci else 68898c2ecf20Sopenharmony_ci req_ref_get(req); 68908c2ecf20Sopenharmony_ci 68918c2ecf20Sopenharmony_ci timeout = io_prep_linked_timeout(req); 68928c2ecf20Sopenharmony_ci if (timeout) 68938c2ecf20Sopenharmony_ci io_queue_linked_timeout(timeout); 68948c2ecf20Sopenharmony_ci /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ 68958c2ecf20Sopenharmony_ci if (work->flags & IO_WQ_WORK_CANCEL) 68968c2ecf20Sopenharmony_ci ret = -ECANCELED; 68978c2ecf20Sopenharmony_ci 68988c2ecf20Sopenharmony_ci if (!ret) { 68998c2ecf20Sopenharmony_ci do { 69008c2ecf20Sopenharmony_ci ret = io_issue_sqe(req, 0); 69018c2ecf20Sopenharmony_ci /* 69028c2ecf20Sopenharmony_ci * We can get EAGAIN for polled IO even though we're 69038c2ecf20Sopenharmony_ci * forcing a sync submission from here, since we can't 69048c2ecf20Sopenharmony_ci * wait for request slots on the block side. 69058c2ecf20Sopenharmony_ci */ 69068c2ecf20Sopenharmony_ci if (ret != -EAGAIN || !(req->ctx->flags & IORING_SETUP_IOPOLL)) 69078c2ecf20Sopenharmony_ci break; 69088c2ecf20Sopenharmony_ci if (io_wq_worker_stopped()) 69098c2ecf20Sopenharmony_ci break; 69108c2ecf20Sopenharmony_ci /* 69118c2ecf20Sopenharmony_ci * If REQ_F_NOWAIT is set, then don't wait or retry with 69128c2ecf20Sopenharmony_ci * poll. -EAGAIN is final for that case. 69138c2ecf20Sopenharmony_ci */ 69148c2ecf20Sopenharmony_ci if (req->flags & REQ_F_NOWAIT) 69158c2ecf20Sopenharmony_ci break; 69168c2ecf20Sopenharmony_ci 69178c2ecf20Sopenharmony_ci cond_resched(); 69188c2ecf20Sopenharmony_ci } while (1); 69198c2ecf20Sopenharmony_ci } 69208c2ecf20Sopenharmony_ci 69218c2ecf20Sopenharmony_ci /* avoid locking problems by failing it from a clean context */ 69228c2ecf20Sopenharmony_ci if (ret) 69238c2ecf20Sopenharmony_ci io_req_task_queue_fail(req, ret); 69248c2ecf20Sopenharmony_ci} 69258c2ecf20Sopenharmony_ci 69268c2ecf20Sopenharmony_cistatic inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table, 69278c2ecf20Sopenharmony_ci unsigned i) 69288c2ecf20Sopenharmony_ci{ 69298c2ecf20Sopenharmony_ci return &table->files[i]; 69308c2ecf20Sopenharmony_ci} 69318c2ecf20Sopenharmony_ci 69328c2ecf20Sopenharmony_cistatic inline struct file *io_file_from_index(struct io_ring_ctx *ctx, 69338c2ecf20Sopenharmony_ci int index) 69348c2ecf20Sopenharmony_ci{ 69358c2ecf20Sopenharmony_ci struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); 69368c2ecf20Sopenharmony_ci 69378c2ecf20Sopenharmony_ci return (struct file *) (slot->file_ptr & FFS_MASK); 69388c2ecf20Sopenharmony_ci} 69398c2ecf20Sopenharmony_ci 69408c2ecf20Sopenharmony_cistatic void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) 69418c2ecf20Sopenharmony_ci{ 69428c2ecf20Sopenharmony_ci unsigned long file_ptr = (unsigned long) file; 69438c2ecf20Sopenharmony_ci 69448c2ecf20Sopenharmony_ci if (__io_file_supports_nowait(file, READ)) 69458c2ecf20Sopenharmony_ci file_ptr |= FFS_ASYNC_READ; 69468c2ecf20Sopenharmony_ci if (__io_file_supports_nowait(file, WRITE)) 69478c2ecf20Sopenharmony_ci file_ptr |= FFS_ASYNC_WRITE; 69488c2ecf20Sopenharmony_ci if (S_ISREG(file_inode(file)->i_mode)) 69498c2ecf20Sopenharmony_ci file_ptr |= FFS_ISREG; 69508c2ecf20Sopenharmony_ci file_slot->file_ptr = file_ptr; 69518c2ecf20Sopenharmony_ci} 69528c2ecf20Sopenharmony_ci 69538c2ecf20Sopenharmony_cistatic inline struct file *io_file_get_fixed(struct io_ring_ctx *ctx, 69548c2ecf20Sopenharmony_ci struct io_kiocb *req, int fd, 69558c2ecf20Sopenharmony_ci unsigned int issue_flags) 69568c2ecf20Sopenharmony_ci{ 69578c2ecf20Sopenharmony_ci struct file *file = NULL; 69588c2ecf20Sopenharmony_ci unsigned long file_ptr; 69598c2ecf20Sopenharmony_ci 69608c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 69618c2ecf20Sopenharmony_ci 69628c2ecf20Sopenharmony_ci if (unlikely((unsigned int)fd >= ctx->nr_user_files)) 69638c2ecf20Sopenharmony_ci goto out; 69648c2ecf20Sopenharmony_ci fd = array_index_nospec(fd, ctx->nr_user_files); 69658c2ecf20Sopenharmony_ci file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; 69668c2ecf20Sopenharmony_ci file = (struct file *) (file_ptr & FFS_MASK); 69678c2ecf20Sopenharmony_ci file_ptr &= ~FFS_MASK; 69688c2ecf20Sopenharmony_ci /* mask in overlapping REQ_F and FFS bits */ 69698c2ecf20Sopenharmony_ci req->flags |= (file_ptr << REQ_F_NOWAIT_READ_BIT); 69708c2ecf20Sopenharmony_ci io_req_set_rsrc_node(req); 69718c2ecf20Sopenharmony_ciout: 69728c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 69738c2ecf20Sopenharmony_ci return file; 69748c2ecf20Sopenharmony_ci} 69758c2ecf20Sopenharmony_ci 69768c2ecf20Sopenharmony_cistatic struct file *io_file_get_normal(struct io_ring_ctx *ctx, 69778c2ecf20Sopenharmony_ci struct io_kiocb *req, int fd) 69788c2ecf20Sopenharmony_ci{ 69798c2ecf20Sopenharmony_ci struct file *file = fget(fd); 69808c2ecf20Sopenharmony_ci 69818c2ecf20Sopenharmony_ci trace_io_uring_file_get(ctx, fd); 69828c2ecf20Sopenharmony_ci 69838c2ecf20Sopenharmony_ci /* we don't allow fixed io_uring files */ 69848c2ecf20Sopenharmony_ci if (file && unlikely(file->f_op == &io_uring_fops)) 69858c2ecf20Sopenharmony_ci io_req_track_inflight(req); 69868c2ecf20Sopenharmony_ci return file; 69878c2ecf20Sopenharmony_ci} 69888c2ecf20Sopenharmony_ci 69898c2ecf20Sopenharmony_cistatic inline struct file *io_file_get(struct io_ring_ctx *ctx, 69908c2ecf20Sopenharmony_ci struct io_kiocb *req, int fd, bool fixed, 69918c2ecf20Sopenharmony_ci unsigned int issue_flags) 69928c2ecf20Sopenharmony_ci{ 69938c2ecf20Sopenharmony_ci if (fixed) 69948c2ecf20Sopenharmony_ci return io_file_get_fixed(ctx, req, fd, issue_flags); 69958c2ecf20Sopenharmony_ci else 69968c2ecf20Sopenharmony_ci return io_file_get_normal(ctx, req, fd); 69978c2ecf20Sopenharmony_ci} 69988c2ecf20Sopenharmony_ci 69998c2ecf20Sopenharmony_cistatic void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) 70008c2ecf20Sopenharmony_ci{ 70018c2ecf20Sopenharmony_ci struct io_kiocb *prev = req->timeout.prev; 70028c2ecf20Sopenharmony_ci int ret = -ENOENT; 70038c2ecf20Sopenharmony_ci 70048c2ecf20Sopenharmony_ci if (prev) { 70058c2ecf20Sopenharmony_ci if (!(req->task->flags & PF_EXITING)) 70068c2ecf20Sopenharmony_ci ret = io_try_cancel_userdata(req, prev->user_data); 70078c2ecf20Sopenharmony_ci io_req_complete_post(req, ret ?: -ETIME, 0); 70088c2ecf20Sopenharmony_ci io_put_req(prev); 70098c2ecf20Sopenharmony_ci } else { 70108c2ecf20Sopenharmony_ci io_req_complete_post(req, -ETIME, 0); 70118c2ecf20Sopenharmony_ci } 70128c2ecf20Sopenharmony_ci} 70138c2ecf20Sopenharmony_ci 70148c2ecf20Sopenharmony_cistatic enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) 70158c2ecf20Sopenharmony_ci{ 70168c2ecf20Sopenharmony_ci struct io_timeout_data *data = container_of(timer, 70178c2ecf20Sopenharmony_ci struct io_timeout_data, timer); 70188c2ecf20Sopenharmony_ci struct io_kiocb *prev, *req = data->req; 70198c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 70208c2ecf20Sopenharmony_ci unsigned long flags; 70218c2ecf20Sopenharmony_ci 70228c2ecf20Sopenharmony_ci spin_lock_irqsave(&ctx->timeout_lock, flags); 70238c2ecf20Sopenharmony_ci prev = req->timeout.head; 70248c2ecf20Sopenharmony_ci req->timeout.head = NULL; 70258c2ecf20Sopenharmony_ci 70268c2ecf20Sopenharmony_ci /* 70278c2ecf20Sopenharmony_ci * We don't expect the list to be empty, that will only happen if we 70288c2ecf20Sopenharmony_ci * race with the completion of the linked work. 70298c2ecf20Sopenharmony_ci */ 70308c2ecf20Sopenharmony_ci if (prev) { 70318c2ecf20Sopenharmony_ci io_remove_next_linked(prev); 70328c2ecf20Sopenharmony_ci if (!req_ref_inc_not_zero(prev)) 70338c2ecf20Sopenharmony_ci prev = NULL; 70348c2ecf20Sopenharmony_ci } 70358c2ecf20Sopenharmony_ci list_del(&req->timeout.list); 70368c2ecf20Sopenharmony_ci req->timeout.prev = prev; 70378c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&ctx->timeout_lock, flags); 70388c2ecf20Sopenharmony_ci 70398c2ecf20Sopenharmony_ci req->io_task_work.func = io_req_task_link_timeout; 70408c2ecf20Sopenharmony_ci io_req_task_work_add(req); 70418c2ecf20Sopenharmony_ci return HRTIMER_NORESTART; 70428c2ecf20Sopenharmony_ci} 70438c2ecf20Sopenharmony_ci 70448c2ecf20Sopenharmony_cistatic void io_queue_linked_timeout(struct io_kiocb *req) 70458c2ecf20Sopenharmony_ci{ 70468c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 70478c2ecf20Sopenharmony_ci 70488c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 70498c2ecf20Sopenharmony_ci /* 70508c2ecf20Sopenharmony_ci * If the back reference is NULL, then our linked request finished 70518c2ecf20Sopenharmony_ci * before we got a chance to setup the timer 70528c2ecf20Sopenharmony_ci */ 70538c2ecf20Sopenharmony_ci if (req->timeout.head) { 70548c2ecf20Sopenharmony_ci struct io_timeout_data *data = req->async_data; 70558c2ecf20Sopenharmony_ci 70568c2ecf20Sopenharmony_ci data->timer.function = io_link_timeout_fn; 70578c2ecf20Sopenharmony_ci hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), 70588c2ecf20Sopenharmony_ci data->mode); 70598c2ecf20Sopenharmony_ci list_add_tail(&req->timeout.list, &ctx->ltimeout_list); 70608c2ecf20Sopenharmony_ci } 70618c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 70628c2ecf20Sopenharmony_ci /* drop submission reference */ 70638c2ecf20Sopenharmony_ci io_put_req(req); 70648c2ecf20Sopenharmony_ci} 70658c2ecf20Sopenharmony_ci 70668c2ecf20Sopenharmony_cistatic void __io_queue_sqe(struct io_kiocb *req) 70678c2ecf20Sopenharmony_ci __must_hold(&req->ctx->uring_lock) 70688c2ecf20Sopenharmony_ci{ 70698c2ecf20Sopenharmony_ci struct io_kiocb *linked_timeout; 70708c2ecf20Sopenharmony_ci int ret; 70718c2ecf20Sopenharmony_ci 70728c2ecf20Sopenharmony_ciissue_sqe: 70738c2ecf20Sopenharmony_ci ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); 70748c2ecf20Sopenharmony_ci 70758c2ecf20Sopenharmony_ci /* 70768c2ecf20Sopenharmony_ci * We async punt it if the file wasn't marked NOWAIT, or if the file 70778c2ecf20Sopenharmony_ci * doesn't support non-blocking read/write attempts 70788c2ecf20Sopenharmony_ci */ 70798c2ecf20Sopenharmony_ci if (likely(!ret)) { 70808c2ecf20Sopenharmony_ci if (req->flags & REQ_F_COMPLETE_INLINE) { 70818c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 70828c2ecf20Sopenharmony_ci struct io_submit_state *state = &ctx->submit_state; 70838c2ecf20Sopenharmony_ci 70848c2ecf20Sopenharmony_ci state->compl_reqs[state->compl_nr++] = req; 70858c2ecf20Sopenharmony_ci if (state->compl_nr == ARRAY_SIZE(state->compl_reqs)) 70868c2ecf20Sopenharmony_ci io_submit_flush_completions(ctx); 70878c2ecf20Sopenharmony_ci return; 70888c2ecf20Sopenharmony_ci } 70898c2ecf20Sopenharmony_ci 70908c2ecf20Sopenharmony_ci linked_timeout = io_prep_linked_timeout(req); 70918c2ecf20Sopenharmony_ci if (linked_timeout) 70928c2ecf20Sopenharmony_ci io_queue_linked_timeout(linked_timeout); 70938c2ecf20Sopenharmony_ci } else if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { 70948c2ecf20Sopenharmony_ci linked_timeout = io_prep_linked_timeout(req); 70958c2ecf20Sopenharmony_ci 70968c2ecf20Sopenharmony_ci switch (io_arm_poll_handler(req)) { 70978c2ecf20Sopenharmony_ci case IO_APOLL_READY: 70988c2ecf20Sopenharmony_ci if (linked_timeout) 70998c2ecf20Sopenharmony_ci io_queue_linked_timeout(linked_timeout); 71008c2ecf20Sopenharmony_ci goto issue_sqe; 71018c2ecf20Sopenharmony_ci case IO_APOLL_ABORTED: 71028c2ecf20Sopenharmony_ci /* 71038c2ecf20Sopenharmony_ci * Queued up for async execution, worker will release 71048c2ecf20Sopenharmony_ci * submit reference when the iocb is actually submitted. 71058c2ecf20Sopenharmony_ci */ 71068c2ecf20Sopenharmony_ci io_queue_async_work(req, NULL); 71078c2ecf20Sopenharmony_ci break; 71088c2ecf20Sopenharmony_ci } 71098c2ecf20Sopenharmony_ci 71108c2ecf20Sopenharmony_ci if (linked_timeout) 71118c2ecf20Sopenharmony_ci io_queue_linked_timeout(linked_timeout); 71128c2ecf20Sopenharmony_ci } else { 71138c2ecf20Sopenharmony_ci io_req_complete_failed(req, ret); 71148c2ecf20Sopenharmony_ci } 71158c2ecf20Sopenharmony_ci} 71168c2ecf20Sopenharmony_ci 71178c2ecf20Sopenharmony_cistatic inline void io_queue_sqe(struct io_kiocb *req) 71188c2ecf20Sopenharmony_ci __must_hold(&req->ctx->uring_lock) 71198c2ecf20Sopenharmony_ci{ 71208c2ecf20Sopenharmony_ci if (unlikely(req->ctx->drain_active) && io_drain_req(req)) 71218c2ecf20Sopenharmony_ci return; 71228c2ecf20Sopenharmony_ci 71238c2ecf20Sopenharmony_ci if (likely(!(req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)))) { 71248c2ecf20Sopenharmony_ci __io_queue_sqe(req); 71258c2ecf20Sopenharmony_ci } else if (req->flags & REQ_F_FAIL) { 71268c2ecf20Sopenharmony_ci io_req_complete_fail_submit(req); 71278c2ecf20Sopenharmony_ci } else { 71288c2ecf20Sopenharmony_ci int ret = io_req_prep_async(req); 71298c2ecf20Sopenharmony_ci 71308c2ecf20Sopenharmony_ci if (unlikely(ret)) 71318c2ecf20Sopenharmony_ci io_req_complete_failed(req, ret); 71328c2ecf20Sopenharmony_ci else 71338c2ecf20Sopenharmony_ci io_queue_async_work(req, NULL); 71348c2ecf20Sopenharmony_ci } 71358c2ecf20Sopenharmony_ci} 71368c2ecf20Sopenharmony_ci 71378c2ecf20Sopenharmony_ci/* 71388c2ecf20Sopenharmony_ci * Check SQE restrictions (opcode and flags). 71398c2ecf20Sopenharmony_ci * 71408c2ecf20Sopenharmony_ci * Returns 'true' if SQE is allowed, 'false' otherwise. 71418c2ecf20Sopenharmony_ci */ 71428c2ecf20Sopenharmony_cistatic inline bool io_check_restriction(struct io_ring_ctx *ctx, 71438c2ecf20Sopenharmony_ci struct io_kiocb *req, 71448c2ecf20Sopenharmony_ci unsigned int sqe_flags) 71458c2ecf20Sopenharmony_ci{ 71468c2ecf20Sopenharmony_ci if (likely(!ctx->restricted)) 71478c2ecf20Sopenharmony_ci return true; 71488c2ecf20Sopenharmony_ci 71498c2ecf20Sopenharmony_ci if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) 71508c2ecf20Sopenharmony_ci return false; 71518c2ecf20Sopenharmony_ci 71528c2ecf20Sopenharmony_ci if ((sqe_flags & ctx->restrictions.sqe_flags_required) != 71538c2ecf20Sopenharmony_ci ctx->restrictions.sqe_flags_required) 71548c2ecf20Sopenharmony_ci return false; 71558c2ecf20Sopenharmony_ci 71568c2ecf20Sopenharmony_ci if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | 71578c2ecf20Sopenharmony_ci ctx->restrictions.sqe_flags_required)) 71588c2ecf20Sopenharmony_ci return false; 71598c2ecf20Sopenharmony_ci 71608c2ecf20Sopenharmony_ci return true; 71618c2ecf20Sopenharmony_ci} 71628c2ecf20Sopenharmony_ci 71638c2ecf20Sopenharmony_cistatic int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, 71648c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 71658c2ecf20Sopenharmony_ci __must_hold(&ctx->uring_lock) 71668c2ecf20Sopenharmony_ci{ 71678c2ecf20Sopenharmony_ci struct io_submit_state *state; 71688c2ecf20Sopenharmony_ci unsigned int sqe_flags; 71698c2ecf20Sopenharmony_ci int personality, ret = 0; 71708c2ecf20Sopenharmony_ci 71718c2ecf20Sopenharmony_ci /* req is partially pre-initialised, see io_preinit_req() */ 71728c2ecf20Sopenharmony_ci req->opcode = READ_ONCE(sqe->opcode); 71738c2ecf20Sopenharmony_ci /* same numerical values with corresponding REQ_F_*, safe to copy */ 71748c2ecf20Sopenharmony_ci req->flags = sqe_flags = READ_ONCE(sqe->flags); 71758c2ecf20Sopenharmony_ci req->user_data = READ_ONCE(sqe->user_data); 71768c2ecf20Sopenharmony_ci req->file = NULL; 71778c2ecf20Sopenharmony_ci req->fixed_rsrc_refs = NULL; 71788c2ecf20Sopenharmony_ci req->task = current; 71798c2ecf20Sopenharmony_ci 71808c2ecf20Sopenharmony_ci /* enforce forwards compatibility on users */ 71818c2ecf20Sopenharmony_ci if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) 71828c2ecf20Sopenharmony_ci return -EINVAL; 71838c2ecf20Sopenharmony_ci if (unlikely(req->opcode >= IORING_OP_LAST)) 71848c2ecf20Sopenharmony_ci return -EINVAL; 71858c2ecf20Sopenharmony_ci if (!io_check_restriction(ctx, req, sqe_flags)) 71868c2ecf20Sopenharmony_ci return -EACCES; 71878c2ecf20Sopenharmony_ci 71888c2ecf20Sopenharmony_ci if ((sqe_flags & IOSQE_BUFFER_SELECT) && 71898c2ecf20Sopenharmony_ci !io_op_defs[req->opcode].buffer_select) 71908c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 71918c2ecf20Sopenharmony_ci if (unlikely(sqe_flags & IOSQE_IO_DRAIN)) 71928c2ecf20Sopenharmony_ci ctx->drain_active = true; 71938c2ecf20Sopenharmony_ci 71948c2ecf20Sopenharmony_ci personality = READ_ONCE(sqe->personality); 71958c2ecf20Sopenharmony_ci if (personality) { 71968c2ecf20Sopenharmony_ci req->creds = xa_load(&ctx->personalities, personality); 71978c2ecf20Sopenharmony_ci if (!req->creds) 71988c2ecf20Sopenharmony_ci return -EINVAL; 71998c2ecf20Sopenharmony_ci get_cred(req->creds); 72008c2ecf20Sopenharmony_ci req->flags |= REQ_F_CREDS; 72018c2ecf20Sopenharmony_ci } 72028c2ecf20Sopenharmony_ci state = &ctx->submit_state; 72038c2ecf20Sopenharmony_ci 72048c2ecf20Sopenharmony_ci /* 72058c2ecf20Sopenharmony_ci * Plug now if we have more than 1 IO left after this, and the target 72068c2ecf20Sopenharmony_ci * is potentially a read/write to block based storage. 72078c2ecf20Sopenharmony_ci */ 72088c2ecf20Sopenharmony_ci if (!state->plug_started && state->ios_left > 1 && 72098c2ecf20Sopenharmony_ci io_op_defs[req->opcode].plug) { 72108c2ecf20Sopenharmony_ci blk_start_plug(&state->plug); 72118c2ecf20Sopenharmony_ci state->plug_started = true; 72128c2ecf20Sopenharmony_ci } 72138c2ecf20Sopenharmony_ci 72148c2ecf20Sopenharmony_ci if (io_op_defs[req->opcode].needs_file) { 72158c2ecf20Sopenharmony_ci req->file = io_file_get(ctx, req, READ_ONCE(sqe->fd), 72168c2ecf20Sopenharmony_ci (sqe_flags & IOSQE_FIXED_FILE), 72178c2ecf20Sopenharmony_ci IO_URING_F_NONBLOCK); 72188c2ecf20Sopenharmony_ci if (unlikely(!req->file)) 72198c2ecf20Sopenharmony_ci ret = -EBADF; 72208c2ecf20Sopenharmony_ci } 72218c2ecf20Sopenharmony_ci 72228c2ecf20Sopenharmony_ci state->ios_left--; 72238c2ecf20Sopenharmony_ci return ret; 72248c2ecf20Sopenharmony_ci} 72258c2ecf20Sopenharmony_ci 72268c2ecf20Sopenharmony_cistatic int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, 72278c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe) 72288c2ecf20Sopenharmony_ci __must_hold(&ctx->uring_lock) 72298c2ecf20Sopenharmony_ci{ 72308c2ecf20Sopenharmony_ci struct io_submit_link *link = &ctx->submit_state.link; 72318c2ecf20Sopenharmony_ci int ret; 72328c2ecf20Sopenharmony_ci 72338c2ecf20Sopenharmony_ci ret = io_init_req(ctx, req, sqe); 72348c2ecf20Sopenharmony_ci if (unlikely(ret)) { 72358c2ecf20Sopenharmony_cifail_req: 72368c2ecf20Sopenharmony_ci /* fail even hard links since we don't submit */ 72378c2ecf20Sopenharmony_ci if (link->head) { 72388c2ecf20Sopenharmony_ci /* 72398c2ecf20Sopenharmony_ci * we can judge a link req is failed or cancelled by if 72408c2ecf20Sopenharmony_ci * REQ_F_FAIL is set, but the head is an exception since 72418c2ecf20Sopenharmony_ci * it may be set REQ_F_FAIL because of other req's failure 72428c2ecf20Sopenharmony_ci * so let's leverage req->result to distinguish if a head 72438c2ecf20Sopenharmony_ci * is set REQ_F_FAIL because of its failure or other req's 72448c2ecf20Sopenharmony_ci * failure so that we can set the correct ret code for it. 72458c2ecf20Sopenharmony_ci * init result here to avoid affecting the normal path. 72468c2ecf20Sopenharmony_ci */ 72478c2ecf20Sopenharmony_ci if (!(link->head->flags & REQ_F_FAIL)) 72488c2ecf20Sopenharmony_ci req_fail_link_node(link->head, -ECANCELED); 72498c2ecf20Sopenharmony_ci } else if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { 72508c2ecf20Sopenharmony_ci /* 72518c2ecf20Sopenharmony_ci * the current req is a normal req, we should return 72528c2ecf20Sopenharmony_ci * error and thus break the submittion loop. 72538c2ecf20Sopenharmony_ci */ 72548c2ecf20Sopenharmony_ci io_req_complete_failed(req, ret); 72558c2ecf20Sopenharmony_ci return ret; 72568c2ecf20Sopenharmony_ci } 72578c2ecf20Sopenharmony_ci req_fail_link_node(req, ret); 72588c2ecf20Sopenharmony_ci } else { 72598c2ecf20Sopenharmony_ci ret = io_req_prep(req, sqe); 72608c2ecf20Sopenharmony_ci if (unlikely(ret)) 72618c2ecf20Sopenharmony_ci goto fail_req; 72628c2ecf20Sopenharmony_ci } 72638c2ecf20Sopenharmony_ci 72648c2ecf20Sopenharmony_ci /* don't need @sqe from now on */ 72658c2ecf20Sopenharmony_ci trace_io_uring_submit_sqe(ctx, req, req->opcode, req->user_data, 72668c2ecf20Sopenharmony_ci req->flags, true, 72678c2ecf20Sopenharmony_ci ctx->flags & IORING_SETUP_SQPOLL); 72688c2ecf20Sopenharmony_ci 72698c2ecf20Sopenharmony_ci /* 72708c2ecf20Sopenharmony_ci * If we already have a head request, queue this one for async 72718c2ecf20Sopenharmony_ci * submittal once the head completes. If we don't have a head but 72728c2ecf20Sopenharmony_ci * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be 72738c2ecf20Sopenharmony_ci * submitted sync once the chain is complete. If none of those 72748c2ecf20Sopenharmony_ci * conditions are true (normal request), then just queue it. 72758c2ecf20Sopenharmony_ci */ 72768c2ecf20Sopenharmony_ci if (link->head) { 72778c2ecf20Sopenharmony_ci struct io_kiocb *head = link->head; 72788c2ecf20Sopenharmony_ci 72798c2ecf20Sopenharmony_ci if (!(req->flags & REQ_F_FAIL)) { 72808c2ecf20Sopenharmony_ci ret = io_req_prep_async(req); 72818c2ecf20Sopenharmony_ci if (unlikely(ret)) { 72828c2ecf20Sopenharmony_ci req_fail_link_node(req, ret); 72838c2ecf20Sopenharmony_ci if (!(head->flags & REQ_F_FAIL)) 72848c2ecf20Sopenharmony_ci req_fail_link_node(head, -ECANCELED); 72858c2ecf20Sopenharmony_ci } 72868c2ecf20Sopenharmony_ci } 72878c2ecf20Sopenharmony_ci trace_io_uring_link(ctx, req, head); 72888c2ecf20Sopenharmony_ci link->last->link = req; 72898c2ecf20Sopenharmony_ci link->last = req; 72908c2ecf20Sopenharmony_ci 72918c2ecf20Sopenharmony_ci /* last request of a link, enqueue the link */ 72928c2ecf20Sopenharmony_ci if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) { 72938c2ecf20Sopenharmony_ci link->head = NULL; 72948c2ecf20Sopenharmony_ci io_queue_sqe(head); 72958c2ecf20Sopenharmony_ci } 72968c2ecf20Sopenharmony_ci } else { 72978c2ecf20Sopenharmony_ci if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) { 72988c2ecf20Sopenharmony_ci link->head = req; 72998c2ecf20Sopenharmony_ci link->last = req; 73008c2ecf20Sopenharmony_ci } else { 73018c2ecf20Sopenharmony_ci io_queue_sqe(req); 73028c2ecf20Sopenharmony_ci } 73038c2ecf20Sopenharmony_ci } 73048c2ecf20Sopenharmony_ci 73058c2ecf20Sopenharmony_ci return 0; 73068c2ecf20Sopenharmony_ci} 73078c2ecf20Sopenharmony_ci 73088c2ecf20Sopenharmony_ci/* 73098c2ecf20Sopenharmony_ci * Batched submission is done, ensure local IO is flushed out. 73108c2ecf20Sopenharmony_ci */ 73118c2ecf20Sopenharmony_cistatic void io_submit_state_end(struct io_submit_state *state, 73128c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx) 73138c2ecf20Sopenharmony_ci{ 73148c2ecf20Sopenharmony_ci if (state->link.head) 73158c2ecf20Sopenharmony_ci io_queue_sqe(state->link.head); 73168c2ecf20Sopenharmony_ci if (state->compl_nr) 73178c2ecf20Sopenharmony_ci io_submit_flush_completions(ctx); 73188c2ecf20Sopenharmony_ci if (state->plug_started) 73198c2ecf20Sopenharmony_ci blk_finish_plug(&state->plug); 73208c2ecf20Sopenharmony_ci} 73218c2ecf20Sopenharmony_ci 73228c2ecf20Sopenharmony_ci/* 73238c2ecf20Sopenharmony_ci * Start submission side cache. 73248c2ecf20Sopenharmony_ci */ 73258c2ecf20Sopenharmony_cistatic void io_submit_state_start(struct io_submit_state *state, 73268c2ecf20Sopenharmony_ci unsigned int max_ios) 73278c2ecf20Sopenharmony_ci{ 73288c2ecf20Sopenharmony_ci state->plug_started = false; 73298c2ecf20Sopenharmony_ci state->ios_left = max_ios; 73308c2ecf20Sopenharmony_ci /* set only head, no need to init link_last in advance */ 73318c2ecf20Sopenharmony_ci state->link.head = NULL; 73328c2ecf20Sopenharmony_ci} 73338c2ecf20Sopenharmony_ci 73348c2ecf20Sopenharmony_cistatic void io_commit_sqring(struct io_ring_ctx *ctx) 73358c2ecf20Sopenharmony_ci{ 73368c2ecf20Sopenharmony_ci struct io_rings *rings = ctx->rings; 73378c2ecf20Sopenharmony_ci 73388c2ecf20Sopenharmony_ci /* 73398c2ecf20Sopenharmony_ci * Ensure any loads from the SQEs are done at this point, 73408c2ecf20Sopenharmony_ci * since once we write the new head, the application could 73418c2ecf20Sopenharmony_ci * write new data to them. 73428c2ecf20Sopenharmony_ci */ 73438c2ecf20Sopenharmony_ci smp_store_release(&rings->sq.head, ctx->cached_sq_head); 73448c2ecf20Sopenharmony_ci} 73458c2ecf20Sopenharmony_ci 73468c2ecf20Sopenharmony_ci/* 73478c2ecf20Sopenharmony_ci * Fetch an sqe, if one is available. Note this returns a pointer to memory 73488c2ecf20Sopenharmony_ci * that is mapped by userspace. This means that care needs to be taken to 73498c2ecf20Sopenharmony_ci * ensure that reads are stable, as we cannot rely on userspace always 73508c2ecf20Sopenharmony_ci * being a good citizen. If members of the sqe are validated and then later 73518c2ecf20Sopenharmony_ci * used, it's important that those reads are done through READ_ONCE() to 73528c2ecf20Sopenharmony_ci * prevent a re-load down the line. 73538c2ecf20Sopenharmony_ci */ 73548c2ecf20Sopenharmony_cistatic const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) 73558c2ecf20Sopenharmony_ci{ 73568c2ecf20Sopenharmony_ci unsigned head, mask = ctx->sq_entries - 1; 73578c2ecf20Sopenharmony_ci unsigned sq_idx = ctx->cached_sq_head++ & mask; 73588c2ecf20Sopenharmony_ci 73598c2ecf20Sopenharmony_ci /* 73608c2ecf20Sopenharmony_ci * The cached sq head (or cq tail) serves two purposes: 73618c2ecf20Sopenharmony_ci * 73628c2ecf20Sopenharmony_ci * 1) allows us to batch the cost of updating the user visible 73638c2ecf20Sopenharmony_ci * head updates. 73648c2ecf20Sopenharmony_ci * 2) allows the kernel side to track the head on its own, even 73658c2ecf20Sopenharmony_ci * though the application is the one updating it. 73668c2ecf20Sopenharmony_ci */ 73678c2ecf20Sopenharmony_ci head = READ_ONCE(ctx->sq_array[sq_idx]); 73688c2ecf20Sopenharmony_ci if (likely(head < ctx->sq_entries)) 73698c2ecf20Sopenharmony_ci return &ctx->sq_sqes[head]; 73708c2ecf20Sopenharmony_ci 73718c2ecf20Sopenharmony_ci /* drop invalid entries */ 73728c2ecf20Sopenharmony_ci ctx->cq_extra--; 73738c2ecf20Sopenharmony_ci WRITE_ONCE(ctx->rings->sq_dropped, 73748c2ecf20Sopenharmony_ci READ_ONCE(ctx->rings->sq_dropped) + 1); 73758c2ecf20Sopenharmony_ci return NULL; 73768c2ecf20Sopenharmony_ci} 73778c2ecf20Sopenharmony_ci 73788c2ecf20Sopenharmony_cistatic int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) 73798c2ecf20Sopenharmony_ci __must_hold(&ctx->uring_lock) 73808c2ecf20Sopenharmony_ci{ 73818c2ecf20Sopenharmony_ci int submitted = 0; 73828c2ecf20Sopenharmony_ci 73838c2ecf20Sopenharmony_ci /* make sure SQ entry isn't read before tail */ 73848c2ecf20Sopenharmony_ci nr = min3(nr, ctx->sq_entries, io_sqring_entries(ctx)); 73858c2ecf20Sopenharmony_ci if (!percpu_ref_tryget_many(&ctx->refs, nr)) 73868c2ecf20Sopenharmony_ci return -EAGAIN; 73878c2ecf20Sopenharmony_ci io_get_task_refs(nr); 73888c2ecf20Sopenharmony_ci 73898c2ecf20Sopenharmony_ci io_submit_state_start(&ctx->submit_state, nr); 73908c2ecf20Sopenharmony_ci while (submitted < nr) { 73918c2ecf20Sopenharmony_ci const struct io_uring_sqe *sqe; 73928c2ecf20Sopenharmony_ci struct io_kiocb *req; 73938c2ecf20Sopenharmony_ci 73948c2ecf20Sopenharmony_ci req = io_alloc_req(ctx); 73958c2ecf20Sopenharmony_ci if (unlikely(!req)) { 73968c2ecf20Sopenharmony_ci if (!submitted) 73978c2ecf20Sopenharmony_ci submitted = -EAGAIN; 73988c2ecf20Sopenharmony_ci break; 73998c2ecf20Sopenharmony_ci } 74008c2ecf20Sopenharmony_ci sqe = io_get_sqe(ctx); 74018c2ecf20Sopenharmony_ci if (unlikely(!sqe)) { 74028c2ecf20Sopenharmony_ci list_add(&req->inflight_entry, &ctx->submit_state.free_list); 74038c2ecf20Sopenharmony_ci break; 74048c2ecf20Sopenharmony_ci } 74058c2ecf20Sopenharmony_ci /* will complete beyond this point, count as submitted */ 74068c2ecf20Sopenharmony_ci submitted++; 74078c2ecf20Sopenharmony_ci if (io_submit_sqe(ctx, req, sqe)) 74088c2ecf20Sopenharmony_ci break; 74098c2ecf20Sopenharmony_ci } 74108c2ecf20Sopenharmony_ci 74118c2ecf20Sopenharmony_ci if (unlikely(submitted != nr)) { 74128c2ecf20Sopenharmony_ci int ref_used = (submitted == -EAGAIN) ? 0 : submitted; 74138c2ecf20Sopenharmony_ci int unused = nr - ref_used; 74148c2ecf20Sopenharmony_ci 74158c2ecf20Sopenharmony_ci current->io_uring->cached_refs += unused; 74168c2ecf20Sopenharmony_ci percpu_ref_put_many(&ctx->refs, unused); 74178c2ecf20Sopenharmony_ci } 74188c2ecf20Sopenharmony_ci 74198c2ecf20Sopenharmony_ci io_submit_state_end(&ctx->submit_state, ctx); 74208c2ecf20Sopenharmony_ci /* Commit SQ ring head once we've consumed and submitted all SQEs */ 74218c2ecf20Sopenharmony_ci io_commit_sqring(ctx); 74228c2ecf20Sopenharmony_ci 74238c2ecf20Sopenharmony_ci return submitted; 74248c2ecf20Sopenharmony_ci} 74258c2ecf20Sopenharmony_ci 74268c2ecf20Sopenharmony_cistatic inline bool io_sqd_events_pending(struct io_sq_data *sqd) 74278c2ecf20Sopenharmony_ci{ 74288c2ecf20Sopenharmony_ci return READ_ONCE(sqd->state); 74298c2ecf20Sopenharmony_ci} 74308c2ecf20Sopenharmony_ci 74318c2ecf20Sopenharmony_cistatic inline void io_ring_set_wakeup_flag(struct io_ring_ctx *ctx) 74328c2ecf20Sopenharmony_ci{ 74338c2ecf20Sopenharmony_ci /* Tell userspace we may need a wakeup call */ 74348c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 74358c2ecf20Sopenharmony_ci WRITE_ONCE(ctx->rings->sq_flags, 74368c2ecf20Sopenharmony_ci ctx->rings->sq_flags | IORING_SQ_NEED_WAKEUP); 74378c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 74388c2ecf20Sopenharmony_ci} 74398c2ecf20Sopenharmony_ci 74408c2ecf20Sopenharmony_cistatic inline void io_ring_clear_wakeup_flag(struct io_ring_ctx *ctx) 74418c2ecf20Sopenharmony_ci{ 74428c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 74438c2ecf20Sopenharmony_ci WRITE_ONCE(ctx->rings->sq_flags, 74448c2ecf20Sopenharmony_ci ctx->rings->sq_flags & ~IORING_SQ_NEED_WAKEUP); 74458c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 74468c2ecf20Sopenharmony_ci} 74478c2ecf20Sopenharmony_ci 74488c2ecf20Sopenharmony_cistatic int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) 74498c2ecf20Sopenharmony_ci{ 74508c2ecf20Sopenharmony_ci unsigned int to_submit; 74518c2ecf20Sopenharmony_ci int ret = 0; 74528c2ecf20Sopenharmony_ci 74538c2ecf20Sopenharmony_ci to_submit = io_sqring_entries(ctx); 74548c2ecf20Sopenharmony_ci /* if we're handling multiple rings, cap submit size for fairness */ 74558c2ecf20Sopenharmony_ci if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) 74568c2ecf20Sopenharmony_ci to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; 74578c2ecf20Sopenharmony_ci 74588c2ecf20Sopenharmony_ci if (!list_empty(&ctx->iopoll_list) || to_submit) { 74598c2ecf20Sopenharmony_ci unsigned nr_events = 0; 74608c2ecf20Sopenharmony_ci const struct cred *creds = NULL; 74618c2ecf20Sopenharmony_ci 74628c2ecf20Sopenharmony_ci if (ctx->sq_creds != current_cred()) 74638c2ecf20Sopenharmony_ci creds = override_creds(ctx->sq_creds); 74648c2ecf20Sopenharmony_ci 74658c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 74668c2ecf20Sopenharmony_ci if (!list_empty(&ctx->iopoll_list)) 74678c2ecf20Sopenharmony_ci io_do_iopoll(ctx, &nr_events, 0); 74688c2ecf20Sopenharmony_ci 74698c2ecf20Sopenharmony_ci /* 74708c2ecf20Sopenharmony_ci * Don't submit if refs are dying, good for io_uring_register(), 74718c2ecf20Sopenharmony_ci * but also it is relied upon by io_ring_exit_work() 74728c2ecf20Sopenharmony_ci */ 74738c2ecf20Sopenharmony_ci if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && 74748c2ecf20Sopenharmony_ci !(ctx->flags & IORING_SETUP_R_DISABLED)) 74758c2ecf20Sopenharmony_ci ret = io_submit_sqes(ctx, to_submit); 74768c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 74778c2ecf20Sopenharmony_ci 74788c2ecf20Sopenharmony_ci if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) 74798c2ecf20Sopenharmony_ci wake_up(&ctx->sqo_sq_wait); 74808c2ecf20Sopenharmony_ci if (creds) 74818c2ecf20Sopenharmony_ci revert_creds(creds); 74828c2ecf20Sopenharmony_ci } 74838c2ecf20Sopenharmony_ci 74848c2ecf20Sopenharmony_ci return ret; 74858c2ecf20Sopenharmony_ci} 74868c2ecf20Sopenharmony_ci 74878c2ecf20Sopenharmony_cistatic void io_sqd_update_thread_idle(struct io_sq_data *sqd) 74888c2ecf20Sopenharmony_ci{ 74898c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 74908c2ecf20Sopenharmony_ci unsigned sq_thread_idle = 0; 74918c2ecf20Sopenharmony_ci 74928c2ecf20Sopenharmony_ci list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 74938c2ecf20Sopenharmony_ci sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle); 74948c2ecf20Sopenharmony_ci sqd->sq_thread_idle = sq_thread_idle; 74958c2ecf20Sopenharmony_ci} 74968c2ecf20Sopenharmony_ci 74978c2ecf20Sopenharmony_cistatic bool io_sqd_handle_event(struct io_sq_data *sqd) 74988c2ecf20Sopenharmony_ci{ 74998c2ecf20Sopenharmony_ci bool did_sig = false; 75008c2ecf20Sopenharmony_ci struct ksignal ksig; 75018c2ecf20Sopenharmony_ci 75028c2ecf20Sopenharmony_ci if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || 75038c2ecf20Sopenharmony_ci signal_pending(current)) { 75048c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 75058c2ecf20Sopenharmony_ci if (signal_pending(current)) 75068c2ecf20Sopenharmony_ci did_sig = get_signal(&ksig); 75078c2ecf20Sopenharmony_ci cond_resched(); 75088c2ecf20Sopenharmony_ci mutex_lock(&sqd->lock); 75098c2ecf20Sopenharmony_ci } 75108c2ecf20Sopenharmony_ci return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); 75118c2ecf20Sopenharmony_ci} 75128c2ecf20Sopenharmony_ci 75138c2ecf20Sopenharmony_cistatic int io_sq_thread(void *data) 75148c2ecf20Sopenharmony_ci{ 75158c2ecf20Sopenharmony_ci struct io_sq_data *sqd = data; 75168c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 75178c2ecf20Sopenharmony_ci unsigned long timeout = 0; 75188c2ecf20Sopenharmony_ci char buf[TASK_COMM_LEN]; 75198c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 75208c2ecf20Sopenharmony_ci 75218c2ecf20Sopenharmony_ci snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); 75228c2ecf20Sopenharmony_ci set_task_comm(current, buf); 75238c2ecf20Sopenharmony_ci 75248c2ecf20Sopenharmony_ci if (sqd->sq_cpu != -1) 75258c2ecf20Sopenharmony_ci set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); 75268c2ecf20Sopenharmony_ci else 75278c2ecf20Sopenharmony_ci set_cpus_allowed_ptr(current, cpu_online_mask); 75288c2ecf20Sopenharmony_ci current->flags |= PF_NO_SETAFFINITY; 75298c2ecf20Sopenharmony_ci 75308c2ecf20Sopenharmony_ci mutex_lock(&sqd->lock); 75318c2ecf20Sopenharmony_ci while (1) { 75328c2ecf20Sopenharmony_ci bool cap_entries, sqt_spin = false; 75338c2ecf20Sopenharmony_ci 75348c2ecf20Sopenharmony_ci if (io_sqd_events_pending(sqd) || signal_pending(current)) { 75358c2ecf20Sopenharmony_ci if (io_sqd_handle_event(sqd)) 75368c2ecf20Sopenharmony_ci break; 75378c2ecf20Sopenharmony_ci timeout = jiffies + sqd->sq_thread_idle; 75388c2ecf20Sopenharmony_ci } 75398c2ecf20Sopenharmony_ci 75408c2ecf20Sopenharmony_ci cap_entries = !list_is_singular(&sqd->ctx_list); 75418c2ecf20Sopenharmony_ci list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { 75428c2ecf20Sopenharmony_ci int ret = __io_sq_thread(ctx, cap_entries); 75438c2ecf20Sopenharmony_ci 75448c2ecf20Sopenharmony_ci if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list))) 75458c2ecf20Sopenharmony_ci sqt_spin = true; 75468c2ecf20Sopenharmony_ci } 75478c2ecf20Sopenharmony_ci if (io_run_task_work()) 75488c2ecf20Sopenharmony_ci sqt_spin = true; 75498c2ecf20Sopenharmony_ci 75508c2ecf20Sopenharmony_ci if (sqt_spin || !time_after(jiffies, timeout)) { 75518c2ecf20Sopenharmony_ci cond_resched(); 75528c2ecf20Sopenharmony_ci if (sqt_spin) 75538c2ecf20Sopenharmony_ci timeout = jiffies + sqd->sq_thread_idle; 75548c2ecf20Sopenharmony_ci continue; 75558c2ecf20Sopenharmony_ci } 75568c2ecf20Sopenharmony_ci 75578c2ecf20Sopenharmony_ci prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); 75588c2ecf20Sopenharmony_ci if (!io_sqd_events_pending(sqd) && !current->task_works) { 75598c2ecf20Sopenharmony_ci bool needs_sched = true; 75608c2ecf20Sopenharmony_ci 75618c2ecf20Sopenharmony_ci list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { 75628c2ecf20Sopenharmony_ci io_ring_set_wakeup_flag(ctx); 75638c2ecf20Sopenharmony_ci 75648c2ecf20Sopenharmony_ci if ((ctx->flags & IORING_SETUP_IOPOLL) && 75658c2ecf20Sopenharmony_ci !list_empty_careful(&ctx->iopoll_list)) { 75668c2ecf20Sopenharmony_ci needs_sched = false; 75678c2ecf20Sopenharmony_ci break; 75688c2ecf20Sopenharmony_ci } 75698c2ecf20Sopenharmony_ci if (io_sqring_entries(ctx)) { 75708c2ecf20Sopenharmony_ci needs_sched = false; 75718c2ecf20Sopenharmony_ci break; 75728c2ecf20Sopenharmony_ci } 75738c2ecf20Sopenharmony_ci } 75748c2ecf20Sopenharmony_ci 75758c2ecf20Sopenharmony_ci if (needs_sched) { 75768c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 75778c2ecf20Sopenharmony_ci schedule(); 75788c2ecf20Sopenharmony_ci mutex_lock(&sqd->lock); 75798c2ecf20Sopenharmony_ci } 75808c2ecf20Sopenharmony_ci list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 75818c2ecf20Sopenharmony_ci io_ring_clear_wakeup_flag(ctx); 75828c2ecf20Sopenharmony_ci } 75838c2ecf20Sopenharmony_ci 75848c2ecf20Sopenharmony_ci finish_wait(&sqd->wait, &wait); 75858c2ecf20Sopenharmony_ci timeout = jiffies + sqd->sq_thread_idle; 75868c2ecf20Sopenharmony_ci } 75878c2ecf20Sopenharmony_ci 75888c2ecf20Sopenharmony_ci io_uring_cancel_generic(true, sqd); 75898c2ecf20Sopenharmony_ci sqd->thread = NULL; 75908c2ecf20Sopenharmony_ci list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 75918c2ecf20Sopenharmony_ci io_ring_set_wakeup_flag(ctx); 75928c2ecf20Sopenharmony_ci io_run_task_work(); 75938c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 75948c2ecf20Sopenharmony_ci 75958c2ecf20Sopenharmony_ci complete(&sqd->exited); 75968c2ecf20Sopenharmony_ci do_exit(0); 75978c2ecf20Sopenharmony_ci} 75988c2ecf20Sopenharmony_ci 75998c2ecf20Sopenharmony_cistruct io_wait_queue { 76008c2ecf20Sopenharmony_ci struct wait_queue_entry wq; 76018c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 76028c2ecf20Sopenharmony_ci unsigned cq_tail; 76038c2ecf20Sopenharmony_ci unsigned nr_timeouts; 76048c2ecf20Sopenharmony_ci}; 76058c2ecf20Sopenharmony_ci 76068c2ecf20Sopenharmony_cistatic inline bool io_should_wake(struct io_wait_queue *iowq) 76078c2ecf20Sopenharmony_ci{ 76088c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = iowq->ctx; 76098c2ecf20Sopenharmony_ci int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; 76108c2ecf20Sopenharmony_ci 76118c2ecf20Sopenharmony_ci /* 76128c2ecf20Sopenharmony_ci * Wake up if we have enough events, or if a timeout occurred since we 76138c2ecf20Sopenharmony_ci * started waiting. For timeouts, we always want to return to userspace, 76148c2ecf20Sopenharmony_ci * regardless of event count. 76158c2ecf20Sopenharmony_ci */ 76168c2ecf20Sopenharmony_ci return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; 76178c2ecf20Sopenharmony_ci} 76188c2ecf20Sopenharmony_ci 76198c2ecf20Sopenharmony_cistatic int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, 76208c2ecf20Sopenharmony_ci int wake_flags, void *key) 76218c2ecf20Sopenharmony_ci{ 76228c2ecf20Sopenharmony_ci struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, 76238c2ecf20Sopenharmony_ci wq); 76248c2ecf20Sopenharmony_ci 76258c2ecf20Sopenharmony_ci /* 76268c2ecf20Sopenharmony_ci * Cannot safely flush overflowed CQEs from here, ensure we wake up 76278c2ecf20Sopenharmony_ci * the task, and the next invocation will do it. 76288c2ecf20Sopenharmony_ci */ 76298c2ecf20Sopenharmony_ci if (io_should_wake(iowq) || test_bit(0, &iowq->ctx->check_cq_overflow)) 76308c2ecf20Sopenharmony_ci return autoremove_wake_function(curr, mode, wake_flags, key); 76318c2ecf20Sopenharmony_ci return -1; 76328c2ecf20Sopenharmony_ci} 76338c2ecf20Sopenharmony_ci 76348c2ecf20Sopenharmony_cistatic int io_run_task_work_sig(void) 76358c2ecf20Sopenharmony_ci{ 76368c2ecf20Sopenharmony_ci if (io_run_task_work()) 76378c2ecf20Sopenharmony_ci return 1; 76388c2ecf20Sopenharmony_ci if (!signal_pending(current)) 76398c2ecf20Sopenharmony_ci return 0; 76408c2ecf20Sopenharmony_ci if (test_thread_flag(TIF_NOTIFY_SIGNAL)) 76418c2ecf20Sopenharmony_ci return -ERESTARTSYS; 76428c2ecf20Sopenharmony_ci return -EINTR; 76438c2ecf20Sopenharmony_ci} 76448c2ecf20Sopenharmony_ci 76458c2ecf20Sopenharmony_cistatic bool current_pending_io(void) 76468c2ecf20Sopenharmony_ci{ 76478c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 76488c2ecf20Sopenharmony_ci 76498c2ecf20Sopenharmony_ci if (!tctx) 76508c2ecf20Sopenharmony_ci return false; 76518c2ecf20Sopenharmony_ci return percpu_counter_read_positive(&tctx->inflight); 76528c2ecf20Sopenharmony_ci} 76538c2ecf20Sopenharmony_ci 76548c2ecf20Sopenharmony_ci/* when returns >0, the caller should retry */ 76558c2ecf20Sopenharmony_cistatic inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, 76568c2ecf20Sopenharmony_ci struct io_wait_queue *iowq, 76578c2ecf20Sopenharmony_ci ktime_t *timeout) 76588c2ecf20Sopenharmony_ci{ 76598c2ecf20Sopenharmony_ci int io_wait, ret; 76608c2ecf20Sopenharmony_ci 76618c2ecf20Sopenharmony_ci /* make sure we run task_work before checking for signals */ 76628c2ecf20Sopenharmony_ci ret = io_run_task_work_sig(); 76638c2ecf20Sopenharmony_ci if (ret || io_should_wake(iowq)) 76648c2ecf20Sopenharmony_ci return ret; 76658c2ecf20Sopenharmony_ci /* let the caller flush overflows, retry */ 76668c2ecf20Sopenharmony_ci if (test_bit(0, &ctx->check_cq_overflow)) 76678c2ecf20Sopenharmony_ci return 1; 76688c2ecf20Sopenharmony_ci 76698c2ecf20Sopenharmony_ci /* 76708c2ecf20Sopenharmony_ci * Mark us as being in io_wait if we have pending requests, so cpufreq 76718c2ecf20Sopenharmony_ci * can take into account that the task is waiting for IO - turns out 76728c2ecf20Sopenharmony_ci * to be important for low QD IO. 76738c2ecf20Sopenharmony_ci */ 76748c2ecf20Sopenharmony_ci io_wait = current->in_iowait; 76758c2ecf20Sopenharmony_ci if (current_pending_io()) 76768c2ecf20Sopenharmony_ci current->in_iowait = 1; 76778c2ecf20Sopenharmony_ci ret = 1; 76788c2ecf20Sopenharmony_ci if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) 76798c2ecf20Sopenharmony_ci ret = -ETIME; 76808c2ecf20Sopenharmony_ci current->in_iowait = io_wait; 76818c2ecf20Sopenharmony_ci return ret; 76828c2ecf20Sopenharmony_ci} 76838c2ecf20Sopenharmony_ci 76848c2ecf20Sopenharmony_ci/* 76858c2ecf20Sopenharmony_ci * Wait until events become available, if we don't already have some. The 76868c2ecf20Sopenharmony_ci * application must reap them itself, as they reside on the shared cq ring. 76878c2ecf20Sopenharmony_ci */ 76888c2ecf20Sopenharmony_cistatic int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, 76898c2ecf20Sopenharmony_ci const sigset_t __user *sig, size_t sigsz, 76908c2ecf20Sopenharmony_ci struct __kernel_timespec __user *uts) 76918c2ecf20Sopenharmony_ci{ 76928c2ecf20Sopenharmony_ci struct io_wait_queue iowq; 76938c2ecf20Sopenharmony_ci struct io_rings *rings = ctx->rings; 76948c2ecf20Sopenharmony_ci ktime_t timeout = KTIME_MAX; 76958c2ecf20Sopenharmony_ci int ret; 76968c2ecf20Sopenharmony_ci 76978c2ecf20Sopenharmony_ci do { 76988c2ecf20Sopenharmony_ci io_cqring_overflow_flush(ctx); 76998c2ecf20Sopenharmony_ci if (io_cqring_events(ctx) >= min_events) 77008c2ecf20Sopenharmony_ci return 0; 77018c2ecf20Sopenharmony_ci if (!io_run_task_work()) 77028c2ecf20Sopenharmony_ci break; 77038c2ecf20Sopenharmony_ci } while (1); 77048c2ecf20Sopenharmony_ci 77058c2ecf20Sopenharmony_ci if (uts) { 77068c2ecf20Sopenharmony_ci struct timespec64 ts; 77078c2ecf20Sopenharmony_ci 77088c2ecf20Sopenharmony_ci if (get_timespec64(&ts, uts)) 77098c2ecf20Sopenharmony_ci return -EFAULT; 77108c2ecf20Sopenharmony_ci timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); 77118c2ecf20Sopenharmony_ci } 77128c2ecf20Sopenharmony_ci 77138c2ecf20Sopenharmony_ci if (sig) { 77148c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 77158c2ecf20Sopenharmony_ci if (in_compat_syscall()) 77168c2ecf20Sopenharmony_ci ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, 77178c2ecf20Sopenharmony_ci sigsz); 77188c2ecf20Sopenharmony_ci else 77198c2ecf20Sopenharmony_ci#endif 77208c2ecf20Sopenharmony_ci ret = set_user_sigmask(sig, sigsz); 77218c2ecf20Sopenharmony_ci 77228c2ecf20Sopenharmony_ci if (ret) 77238c2ecf20Sopenharmony_ci return ret; 77248c2ecf20Sopenharmony_ci } 77258c2ecf20Sopenharmony_ci 77268c2ecf20Sopenharmony_ci init_waitqueue_func_entry(&iowq.wq, io_wake_function); 77278c2ecf20Sopenharmony_ci iowq.wq.private = current; 77288c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&iowq.wq.entry); 77298c2ecf20Sopenharmony_ci iowq.ctx = ctx; 77308c2ecf20Sopenharmony_ci iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); 77318c2ecf20Sopenharmony_ci iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; 77328c2ecf20Sopenharmony_ci 77338c2ecf20Sopenharmony_ci trace_io_uring_cqring_wait(ctx, min_events); 77348c2ecf20Sopenharmony_ci do { 77358c2ecf20Sopenharmony_ci /* if we can't even flush overflow, don't wait for more */ 77368c2ecf20Sopenharmony_ci if (!io_cqring_overflow_flush(ctx)) { 77378c2ecf20Sopenharmony_ci ret = -EBUSY; 77388c2ecf20Sopenharmony_ci break; 77398c2ecf20Sopenharmony_ci } 77408c2ecf20Sopenharmony_ci prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, 77418c2ecf20Sopenharmony_ci TASK_INTERRUPTIBLE); 77428c2ecf20Sopenharmony_ci ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); 77438c2ecf20Sopenharmony_ci finish_wait(&ctx->cq_wait, &iowq.wq); 77448c2ecf20Sopenharmony_ci cond_resched(); 77458c2ecf20Sopenharmony_ci } while (ret > 0); 77468c2ecf20Sopenharmony_ci 77478c2ecf20Sopenharmony_ci restore_saved_sigmask_unless(ret == -EINTR); 77488c2ecf20Sopenharmony_ci 77498c2ecf20Sopenharmony_ci return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; 77508c2ecf20Sopenharmony_ci} 77518c2ecf20Sopenharmony_ci 77528c2ecf20Sopenharmony_cistatic void io_free_page_table(void **table, size_t size) 77538c2ecf20Sopenharmony_ci{ 77548c2ecf20Sopenharmony_ci unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); 77558c2ecf20Sopenharmony_ci 77568c2ecf20Sopenharmony_ci for (i = 0; i < nr_tables; i++) 77578c2ecf20Sopenharmony_ci kfree(table[i]); 77588c2ecf20Sopenharmony_ci kfree(table); 77598c2ecf20Sopenharmony_ci} 77608c2ecf20Sopenharmony_ci 77618c2ecf20Sopenharmony_cistatic void **io_alloc_page_table(size_t size) 77628c2ecf20Sopenharmony_ci{ 77638c2ecf20Sopenharmony_ci unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); 77648c2ecf20Sopenharmony_ci size_t init_size = size; 77658c2ecf20Sopenharmony_ci void **table; 77668c2ecf20Sopenharmony_ci 77678c2ecf20Sopenharmony_ci table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); 77688c2ecf20Sopenharmony_ci if (!table) 77698c2ecf20Sopenharmony_ci return NULL; 77708c2ecf20Sopenharmony_ci 77718c2ecf20Sopenharmony_ci for (i = 0; i < nr_tables; i++) { 77728c2ecf20Sopenharmony_ci unsigned int this_size = min_t(size_t, size, PAGE_SIZE); 77738c2ecf20Sopenharmony_ci 77748c2ecf20Sopenharmony_ci table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); 77758c2ecf20Sopenharmony_ci if (!table[i]) { 77768c2ecf20Sopenharmony_ci io_free_page_table(table, init_size); 77778c2ecf20Sopenharmony_ci return NULL; 77788c2ecf20Sopenharmony_ci } 77798c2ecf20Sopenharmony_ci size -= this_size; 77808c2ecf20Sopenharmony_ci } 77818c2ecf20Sopenharmony_ci return table; 77828c2ecf20Sopenharmony_ci} 77838c2ecf20Sopenharmony_ci 77848c2ecf20Sopenharmony_cistatic void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) 77858c2ecf20Sopenharmony_ci{ 77868c2ecf20Sopenharmony_ci percpu_ref_exit(&ref_node->refs); 77878c2ecf20Sopenharmony_ci kfree(ref_node); 77888c2ecf20Sopenharmony_ci} 77898c2ecf20Sopenharmony_ci 77908c2ecf20Sopenharmony_cistatic void io_rsrc_node_ref_zero(struct percpu_ref *ref) 77918c2ecf20Sopenharmony_ci{ 77928c2ecf20Sopenharmony_ci struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); 77938c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = node->rsrc_data->ctx; 77948c2ecf20Sopenharmony_ci unsigned long flags; 77958c2ecf20Sopenharmony_ci bool first_add = false; 77968c2ecf20Sopenharmony_ci unsigned long delay = HZ; 77978c2ecf20Sopenharmony_ci 77988c2ecf20Sopenharmony_ci spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); 77998c2ecf20Sopenharmony_ci node->done = true; 78008c2ecf20Sopenharmony_ci 78018c2ecf20Sopenharmony_ci /* if we are mid-quiesce then do not delay */ 78028c2ecf20Sopenharmony_ci if (node->rsrc_data->quiesce) 78038c2ecf20Sopenharmony_ci delay = 0; 78048c2ecf20Sopenharmony_ci 78058c2ecf20Sopenharmony_ci while (!list_empty(&ctx->rsrc_ref_list)) { 78068c2ecf20Sopenharmony_ci node = list_first_entry(&ctx->rsrc_ref_list, 78078c2ecf20Sopenharmony_ci struct io_rsrc_node, node); 78088c2ecf20Sopenharmony_ci /* recycle ref nodes in order */ 78098c2ecf20Sopenharmony_ci if (!node->done) 78108c2ecf20Sopenharmony_ci break; 78118c2ecf20Sopenharmony_ci list_del(&node->node); 78128c2ecf20Sopenharmony_ci first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); 78138c2ecf20Sopenharmony_ci } 78148c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); 78158c2ecf20Sopenharmony_ci 78168c2ecf20Sopenharmony_ci if (first_add) 78178c2ecf20Sopenharmony_ci mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); 78188c2ecf20Sopenharmony_ci} 78198c2ecf20Sopenharmony_ci 78208c2ecf20Sopenharmony_cistatic struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) 78218c2ecf20Sopenharmony_ci{ 78228c2ecf20Sopenharmony_ci struct io_rsrc_node *ref_node; 78238c2ecf20Sopenharmony_ci 78248c2ecf20Sopenharmony_ci ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); 78258c2ecf20Sopenharmony_ci if (!ref_node) 78268c2ecf20Sopenharmony_ci return NULL; 78278c2ecf20Sopenharmony_ci 78288c2ecf20Sopenharmony_ci if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, 78298c2ecf20Sopenharmony_ci 0, GFP_KERNEL)) { 78308c2ecf20Sopenharmony_ci kfree(ref_node); 78318c2ecf20Sopenharmony_ci return NULL; 78328c2ecf20Sopenharmony_ci } 78338c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ref_node->node); 78348c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ref_node->rsrc_list); 78358c2ecf20Sopenharmony_ci ref_node->done = false; 78368c2ecf20Sopenharmony_ci return ref_node; 78378c2ecf20Sopenharmony_ci} 78388c2ecf20Sopenharmony_ci 78398c2ecf20Sopenharmony_cistatic void io_rsrc_node_switch(struct io_ring_ctx *ctx, 78408c2ecf20Sopenharmony_ci struct io_rsrc_data *data_to_kill) 78418c2ecf20Sopenharmony_ci{ 78428c2ecf20Sopenharmony_ci WARN_ON_ONCE(!ctx->rsrc_backup_node); 78438c2ecf20Sopenharmony_ci WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); 78448c2ecf20Sopenharmony_ci 78458c2ecf20Sopenharmony_ci if (data_to_kill) { 78468c2ecf20Sopenharmony_ci struct io_rsrc_node *rsrc_node = ctx->rsrc_node; 78478c2ecf20Sopenharmony_ci 78488c2ecf20Sopenharmony_ci rsrc_node->rsrc_data = data_to_kill; 78498c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->rsrc_ref_lock); 78508c2ecf20Sopenharmony_ci list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); 78518c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->rsrc_ref_lock); 78528c2ecf20Sopenharmony_ci 78538c2ecf20Sopenharmony_ci atomic_inc(&data_to_kill->refs); 78548c2ecf20Sopenharmony_ci percpu_ref_kill(&rsrc_node->refs); 78558c2ecf20Sopenharmony_ci ctx->rsrc_node = NULL; 78568c2ecf20Sopenharmony_ci } 78578c2ecf20Sopenharmony_ci 78588c2ecf20Sopenharmony_ci if (!ctx->rsrc_node) { 78598c2ecf20Sopenharmony_ci ctx->rsrc_node = ctx->rsrc_backup_node; 78608c2ecf20Sopenharmony_ci ctx->rsrc_backup_node = NULL; 78618c2ecf20Sopenharmony_ci } 78628c2ecf20Sopenharmony_ci} 78638c2ecf20Sopenharmony_ci 78648c2ecf20Sopenharmony_cistatic int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) 78658c2ecf20Sopenharmony_ci{ 78668c2ecf20Sopenharmony_ci if (ctx->rsrc_backup_node) 78678c2ecf20Sopenharmony_ci return 0; 78688c2ecf20Sopenharmony_ci ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); 78698c2ecf20Sopenharmony_ci return ctx->rsrc_backup_node ? 0 : -ENOMEM; 78708c2ecf20Sopenharmony_ci} 78718c2ecf20Sopenharmony_ci 78728c2ecf20Sopenharmony_cistatic int io_rsrc_ref_quiesce(struct io_rsrc_data *data, struct io_ring_ctx *ctx) 78738c2ecf20Sopenharmony_ci{ 78748c2ecf20Sopenharmony_ci int ret; 78758c2ecf20Sopenharmony_ci 78768c2ecf20Sopenharmony_ci /* As we may drop ->uring_lock, other task may have started quiesce */ 78778c2ecf20Sopenharmony_ci if (data->quiesce) 78788c2ecf20Sopenharmony_ci return -ENXIO; 78798c2ecf20Sopenharmony_ci 78808c2ecf20Sopenharmony_ci data->quiesce = true; 78818c2ecf20Sopenharmony_ci do { 78828c2ecf20Sopenharmony_ci ret = io_rsrc_node_switch_start(ctx); 78838c2ecf20Sopenharmony_ci if (ret) 78848c2ecf20Sopenharmony_ci break; 78858c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, data); 78868c2ecf20Sopenharmony_ci 78878c2ecf20Sopenharmony_ci /* kill initial ref, already quiesced if zero */ 78888c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&data->refs)) 78898c2ecf20Sopenharmony_ci break; 78908c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 78918c2ecf20Sopenharmony_ci flush_delayed_work(&ctx->rsrc_put_work); 78928c2ecf20Sopenharmony_ci ret = wait_for_completion_interruptible(&data->done); 78938c2ecf20Sopenharmony_ci if (!ret) { 78948c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 78958c2ecf20Sopenharmony_ci if (atomic_read(&data->refs) > 0) { 78968c2ecf20Sopenharmony_ci /* 78978c2ecf20Sopenharmony_ci * it has been revived by another thread while 78988c2ecf20Sopenharmony_ci * we were unlocked 78998c2ecf20Sopenharmony_ci */ 79008c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 79018c2ecf20Sopenharmony_ci } else { 79028c2ecf20Sopenharmony_ci break; 79038c2ecf20Sopenharmony_ci } 79048c2ecf20Sopenharmony_ci } 79058c2ecf20Sopenharmony_ci 79068c2ecf20Sopenharmony_ci atomic_inc(&data->refs); 79078c2ecf20Sopenharmony_ci /* wait for all works potentially completing data->done */ 79088c2ecf20Sopenharmony_ci flush_delayed_work(&ctx->rsrc_put_work); 79098c2ecf20Sopenharmony_ci reinit_completion(&data->done); 79108c2ecf20Sopenharmony_ci 79118c2ecf20Sopenharmony_ci ret = io_run_task_work_sig(); 79128c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 79138c2ecf20Sopenharmony_ci } while (ret >= 0); 79148c2ecf20Sopenharmony_ci data->quiesce = false; 79158c2ecf20Sopenharmony_ci 79168c2ecf20Sopenharmony_ci return ret; 79178c2ecf20Sopenharmony_ci} 79188c2ecf20Sopenharmony_ci 79198c2ecf20Sopenharmony_cistatic u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) 79208c2ecf20Sopenharmony_ci{ 79218c2ecf20Sopenharmony_ci unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK; 79228c2ecf20Sopenharmony_ci unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT; 79238c2ecf20Sopenharmony_ci 79248c2ecf20Sopenharmony_ci return &data->tags[table_idx][off]; 79258c2ecf20Sopenharmony_ci} 79268c2ecf20Sopenharmony_ci 79278c2ecf20Sopenharmony_cistatic void io_rsrc_data_free(struct io_rsrc_data *data) 79288c2ecf20Sopenharmony_ci{ 79298c2ecf20Sopenharmony_ci size_t size = data->nr * sizeof(data->tags[0][0]); 79308c2ecf20Sopenharmony_ci 79318c2ecf20Sopenharmony_ci if (data->tags) 79328c2ecf20Sopenharmony_ci io_free_page_table((void **)data->tags, size); 79338c2ecf20Sopenharmony_ci kfree(data); 79348c2ecf20Sopenharmony_ci} 79358c2ecf20Sopenharmony_ci 79368c2ecf20Sopenharmony_cistatic int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, 79378c2ecf20Sopenharmony_ci u64 __user *utags, unsigned nr, 79388c2ecf20Sopenharmony_ci struct io_rsrc_data **pdata) 79398c2ecf20Sopenharmony_ci{ 79408c2ecf20Sopenharmony_ci struct io_rsrc_data *data; 79418c2ecf20Sopenharmony_ci int ret = -ENOMEM; 79428c2ecf20Sopenharmony_ci unsigned i; 79438c2ecf20Sopenharmony_ci 79448c2ecf20Sopenharmony_ci data = kzalloc(sizeof(*data), GFP_KERNEL); 79458c2ecf20Sopenharmony_ci if (!data) 79468c2ecf20Sopenharmony_ci return -ENOMEM; 79478c2ecf20Sopenharmony_ci data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); 79488c2ecf20Sopenharmony_ci if (!data->tags) { 79498c2ecf20Sopenharmony_ci kfree(data); 79508c2ecf20Sopenharmony_ci return -ENOMEM; 79518c2ecf20Sopenharmony_ci } 79528c2ecf20Sopenharmony_ci 79538c2ecf20Sopenharmony_ci data->nr = nr; 79548c2ecf20Sopenharmony_ci data->ctx = ctx; 79558c2ecf20Sopenharmony_ci data->do_put = do_put; 79568c2ecf20Sopenharmony_ci if (utags) { 79578c2ecf20Sopenharmony_ci ret = -EFAULT; 79588c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 79598c2ecf20Sopenharmony_ci u64 *tag_slot = io_get_tag_slot(data, i); 79608c2ecf20Sopenharmony_ci 79618c2ecf20Sopenharmony_ci if (copy_from_user(tag_slot, &utags[i], 79628c2ecf20Sopenharmony_ci sizeof(*tag_slot))) 79638c2ecf20Sopenharmony_ci goto fail; 79648c2ecf20Sopenharmony_ci } 79658c2ecf20Sopenharmony_ci } 79668c2ecf20Sopenharmony_ci 79678c2ecf20Sopenharmony_ci atomic_set(&data->refs, 1); 79688c2ecf20Sopenharmony_ci init_completion(&data->done); 79698c2ecf20Sopenharmony_ci *pdata = data; 79708c2ecf20Sopenharmony_ci return 0; 79718c2ecf20Sopenharmony_cifail: 79728c2ecf20Sopenharmony_ci io_rsrc_data_free(data); 79738c2ecf20Sopenharmony_ci return ret; 79748c2ecf20Sopenharmony_ci} 79758c2ecf20Sopenharmony_ci 79768c2ecf20Sopenharmony_cistatic bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) 79778c2ecf20Sopenharmony_ci{ 79788c2ecf20Sopenharmony_ci table->files = kvcalloc(nr_files, sizeof(table->files[0]), 79798c2ecf20Sopenharmony_ci GFP_KERNEL_ACCOUNT); 79808c2ecf20Sopenharmony_ci return !!table->files; 79818c2ecf20Sopenharmony_ci} 79828c2ecf20Sopenharmony_ci 79838c2ecf20Sopenharmony_cistatic void io_free_file_tables(struct io_file_table *table) 79848c2ecf20Sopenharmony_ci{ 79858c2ecf20Sopenharmony_ci kvfree(table->files); 79868c2ecf20Sopenharmony_ci table->files = NULL; 79878c2ecf20Sopenharmony_ci} 79888c2ecf20Sopenharmony_ci 79898c2ecf20Sopenharmony_cistatic void __io_sqe_files_unregister(struct io_ring_ctx *ctx) 79908c2ecf20Sopenharmony_ci{ 79918c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 79928c2ecf20Sopenharmony_ci if (ctx->ring_sock) { 79938c2ecf20Sopenharmony_ci struct sock *sock = ctx->ring_sock->sk; 79948c2ecf20Sopenharmony_ci struct sk_buff *skb; 79958c2ecf20Sopenharmony_ci 79968c2ecf20Sopenharmony_ci while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) 79978c2ecf20Sopenharmony_ci kfree_skb(skb); 79988c2ecf20Sopenharmony_ci } 79998c2ecf20Sopenharmony_ci#else 80008c2ecf20Sopenharmony_ci int i; 80018c2ecf20Sopenharmony_ci 80028c2ecf20Sopenharmony_ci for (i = 0; i < ctx->nr_user_files; i++) { 80038c2ecf20Sopenharmony_ci struct file *file; 80048c2ecf20Sopenharmony_ci 80058c2ecf20Sopenharmony_ci file = io_file_from_index(ctx, i); 80068c2ecf20Sopenharmony_ci if (file) 80078c2ecf20Sopenharmony_ci fput(file); 80088c2ecf20Sopenharmony_ci } 80098c2ecf20Sopenharmony_ci#endif 80108c2ecf20Sopenharmony_ci io_free_file_tables(&ctx->file_table); 80118c2ecf20Sopenharmony_ci io_rsrc_data_free(ctx->file_data); 80128c2ecf20Sopenharmony_ci ctx->file_data = NULL; 80138c2ecf20Sopenharmony_ci ctx->nr_user_files = 0; 80148c2ecf20Sopenharmony_ci} 80158c2ecf20Sopenharmony_ci 80168c2ecf20Sopenharmony_cistatic int io_sqe_files_unregister(struct io_ring_ctx *ctx) 80178c2ecf20Sopenharmony_ci{ 80188c2ecf20Sopenharmony_ci unsigned nr = ctx->nr_user_files; 80198c2ecf20Sopenharmony_ci int ret; 80208c2ecf20Sopenharmony_ci 80218c2ecf20Sopenharmony_ci if (!ctx->file_data) 80228c2ecf20Sopenharmony_ci return -ENXIO; 80238c2ecf20Sopenharmony_ci 80248c2ecf20Sopenharmony_ci /* 80258c2ecf20Sopenharmony_ci * Quiesce may unlock ->uring_lock, and while it's not held 80268c2ecf20Sopenharmony_ci * prevent new requests using the table. 80278c2ecf20Sopenharmony_ci */ 80288c2ecf20Sopenharmony_ci ctx->nr_user_files = 0; 80298c2ecf20Sopenharmony_ci ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); 80308c2ecf20Sopenharmony_ci ctx->nr_user_files = nr; 80318c2ecf20Sopenharmony_ci if (!ret) 80328c2ecf20Sopenharmony_ci __io_sqe_files_unregister(ctx); 80338c2ecf20Sopenharmony_ci return ret; 80348c2ecf20Sopenharmony_ci} 80358c2ecf20Sopenharmony_ci 80368c2ecf20Sopenharmony_cistatic void io_sq_thread_unpark(struct io_sq_data *sqd) 80378c2ecf20Sopenharmony_ci __releases(&sqd->lock) 80388c2ecf20Sopenharmony_ci{ 80398c2ecf20Sopenharmony_ci WARN_ON_ONCE(sqd->thread == current); 80408c2ecf20Sopenharmony_ci 80418c2ecf20Sopenharmony_ci /* 80428c2ecf20Sopenharmony_ci * Do the dance but not conditional clear_bit() because it'd race with 80438c2ecf20Sopenharmony_ci * other threads incrementing park_pending and setting the bit. 80448c2ecf20Sopenharmony_ci */ 80458c2ecf20Sopenharmony_ci clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); 80468c2ecf20Sopenharmony_ci if (atomic_dec_return(&sqd->park_pending)) 80478c2ecf20Sopenharmony_ci set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); 80488c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 80498c2ecf20Sopenharmony_ci} 80508c2ecf20Sopenharmony_ci 80518c2ecf20Sopenharmony_cistatic void io_sq_thread_park(struct io_sq_data *sqd) 80528c2ecf20Sopenharmony_ci __acquires(&sqd->lock) 80538c2ecf20Sopenharmony_ci{ 80548c2ecf20Sopenharmony_ci WARN_ON_ONCE(sqd->thread == current); 80558c2ecf20Sopenharmony_ci 80568c2ecf20Sopenharmony_ci atomic_inc(&sqd->park_pending); 80578c2ecf20Sopenharmony_ci set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); 80588c2ecf20Sopenharmony_ci mutex_lock(&sqd->lock); 80598c2ecf20Sopenharmony_ci if (sqd->thread) 80608c2ecf20Sopenharmony_ci wake_up_process(sqd->thread); 80618c2ecf20Sopenharmony_ci} 80628c2ecf20Sopenharmony_ci 80638c2ecf20Sopenharmony_cistatic void io_sq_thread_stop(struct io_sq_data *sqd) 80648c2ecf20Sopenharmony_ci{ 80658c2ecf20Sopenharmony_ci WARN_ON_ONCE(sqd->thread == current); 80668c2ecf20Sopenharmony_ci WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); 80678c2ecf20Sopenharmony_ci 80688c2ecf20Sopenharmony_ci set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); 80698c2ecf20Sopenharmony_ci mutex_lock(&sqd->lock); 80708c2ecf20Sopenharmony_ci if (sqd->thread) 80718c2ecf20Sopenharmony_ci wake_up_process(sqd->thread); 80728c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 80738c2ecf20Sopenharmony_ci wait_for_completion(&sqd->exited); 80748c2ecf20Sopenharmony_ci} 80758c2ecf20Sopenharmony_ci 80768c2ecf20Sopenharmony_cistatic void io_put_sq_data(struct io_sq_data *sqd) 80778c2ecf20Sopenharmony_ci{ 80788c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&sqd->refs)) { 80798c2ecf20Sopenharmony_ci WARN_ON_ONCE(atomic_read(&sqd->park_pending)); 80808c2ecf20Sopenharmony_ci 80818c2ecf20Sopenharmony_ci io_sq_thread_stop(sqd); 80828c2ecf20Sopenharmony_ci kfree(sqd); 80838c2ecf20Sopenharmony_ci } 80848c2ecf20Sopenharmony_ci} 80858c2ecf20Sopenharmony_ci 80868c2ecf20Sopenharmony_cistatic void io_sq_thread_finish(struct io_ring_ctx *ctx) 80878c2ecf20Sopenharmony_ci{ 80888c2ecf20Sopenharmony_ci struct io_sq_data *sqd = ctx->sq_data; 80898c2ecf20Sopenharmony_ci 80908c2ecf20Sopenharmony_ci if (sqd) { 80918c2ecf20Sopenharmony_ci io_sq_thread_park(sqd); 80928c2ecf20Sopenharmony_ci list_del_init(&ctx->sqd_list); 80938c2ecf20Sopenharmony_ci io_sqd_update_thread_idle(sqd); 80948c2ecf20Sopenharmony_ci io_sq_thread_unpark(sqd); 80958c2ecf20Sopenharmony_ci 80968c2ecf20Sopenharmony_ci io_put_sq_data(sqd); 80978c2ecf20Sopenharmony_ci ctx->sq_data = NULL; 80988c2ecf20Sopenharmony_ci } 80998c2ecf20Sopenharmony_ci} 81008c2ecf20Sopenharmony_ci 81018c2ecf20Sopenharmony_cistatic struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) 81028c2ecf20Sopenharmony_ci{ 81038c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx_attach; 81048c2ecf20Sopenharmony_ci struct io_sq_data *sqd; 81058c2ecf20Sopenharmony_ci struct fd f; 81068c2ecf20Sopenharmony_ci 81078c2ecf20Sopenharmony_ci f = fdget(p->wq_fd); 81088c2ecf20Sopenharmony_ci if (!f.file) 81098c2ecf20Sopenharmony_ci return ERR_PTR(-ENXIO); 81108c2ecf20Sopenharmony_ci if (f.file->f_op != &io_uring_fops) { 81118c2ecf20Sopenharmony_ci fdput(f); 81128c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 81138c2ecf20Sopenharmony_ci } 81148c2ecf20Sopenharmony_ci 81158c2ecf20Sopenharmony_ci ctx_attach = f.file->private_data; 81168c2ecf20Sopenharmony_ci sqd = ctx_attach->sq_data; 81178c2ecf20Sopenharmony_ci if (!sqd) { 81188c2ecf20Sopenharmony_ci fdput(f); 81198c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 81208c2ecf20Sopenharmony_ci } 81218c2ecf20Sopenharmony_ci if (sqd->task_tgid != current->tgid) { 81228c2ecf20Sopenharmony_ci fdput(f); 81238c2ecf20Sopenharmony_ci return ERR_PTR(-EPERM); 81248c2ecf20Sopenharmony_ci } 81258c2ecf20Sopenharmony_ci 81268c2ecf20Sopenharmony_ci refcount_inc(&sqd->refs); 81278c2ecf20Sopenharmony_ci fdput(f); 81288c2ecf20Sopenharmony_ci return sqd; 81298c2ecf20Sopenharmony_ci} 81308c2ecf20Sopenharmony_ci 81318c2ecf20Sopenharmony_cistatic struct io_sq_data *io_get_sq_data(struct io_uring_params *p, 81328c2ecf20Sopenharmony_ci bool *attached) 81338c2ecf20Sopenharmony_ci{ 81348c2ecf20Sopenharmony_ci struct io_sq_data *sqd; 81358c2ecf20Sopenharmony_ci 81368c2ecf20Sopenharmony_ci *attached = false; 81378c2ecf20Sopenharmony_ci if (p->flags & IORING_SETUP_ATTACH_WQ) { 81388c2ecf20Sopenharmony_ci sqd = io_attach_sq_data(p); 81398c2ecf20Sopenharmony_ci if (!IS_ERR(sqd)) { 81408c2ecf20Sopenharmony_ci *attached = true; 81418c2ecf20Sopenharmony_ci return sqd; 81428c2ecf20Sopenharmony_ci } 81438c2ecf20Sopenharmony_ci /* fall through for EPERM case, setup new sqd/task */ 81448c2ecf20Sopenharmony_ci if (PTR_ERR(sqd) != -EPERM) 81458c2ecf20Sopenharmony_ci return sqd; 81468c2ecf20Sopenharmony_ci } 81478c2ecf20Sopenharmony_ci 81488c2ecf20Sopenharmony_ci sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); 81498c2ecf20Sopenharmony_ci if (!sqd) 81508c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 81518c2ecf20Sopenharmony_ci 81528c2ecf20Sopenharmony_ci atomic_set(&sqd->park_pending, 0); 81538c2ecf20Sopenharmony_ci refcount_set(&sqd->refs, 1); 81548c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&sqd->ctx_list); 81558c2ecf20Sopenharmony_ci mutex_init(&sqd->lock); 81568c2ecf20Sopenharmony_ci init_waitqueue_head(&sqd->wait); 81578c2ecf20Sopenharmony_ci init_completion(&sqd->exited); 81588c2ecf20Sopenharmony_ci return sqd; 81598c2ecf20Sopenharmony_ci} 81608c2ecf20Sopenharmony_ci 81618c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 81628c2ecf20Sopenharmony_ci/* 81638c2ecf20Sopenharmony_ci * Ensure the UNIX gc is aware of our file set, so we are certain that 81648c2ecf20Sopenharmony_ci * the io_uring can be safely unregistered on process exit, even if we have 81658c2ecf20Sopenharmony_ci * loops in the file referencing. 81668c2ecf20Sopenharmony_ci */ 81678c2ecf20Sopenharmony_cistatic int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset) 81688c2ecf20Sopenharmony_ci{ 81698c2ecf20Sopenharmony_ci struct sock *sk = ctx->ring_sock->sk; 81708c2ecf20Sopenharmony_ci struct scm_fp_list *fpl; 81718c2ecf20Sopenharmony_ci struct sk_buff *skb; 81728c2ecf20Sopenharmony_ci int i, nr_files; 81738c2ecf20Sopenharmony_ci 81748c2ecf20Sopenharmony_ci fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); 81758c2ecf20Sopenharmony_ci if (!fpl) 81768c2ecf20Sopenharmony_ci return -ENOMEM; 81778c2ecf20Sopenharmony_ci 81788c2ecf20Sopenharmony_ci skb = alloc_skb(0, GFP_KERNEL); 81798c2ecf20Sopenharmony_ci if (!skb) { 81808c2ecf20Sopenharmony_ci kfree(fpl); 81818c2ecf20Sopenharmony_ci return -ENOMEM; 81828c2ecf20Sopenharmony_ci } 81838c2ecf20Sopenharmony_ci 81848c2ecf20Sopenharmony_ci skb->sk = sk; 81858c2ecf20Sopenharmony_ci skb->scm_io_uring = 1; 81868c2ecf20Sopenharmony_ci 81878c2ecf20Sopenharmony_ci nr_files = 0; 81888c2ecf20Sopenharmony_ci fpl->user = get_uid(current_user()); 81898c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 81908c2ecf20Sopenharmony_ci struct file *file = io_file_from_index(ctx, i + offset); 81918c2ecf20Sopenharmony_ci 81928c2ecf20Sopenharmony_ci if (!file) 81938c2ecf20Sopenharmony_ci continue; 81948c2ecf20Sopenharmony_ci fpl->fp[nr_files] = get_file(file); 81958c2ecf20Sopenharmony_ci unix_inflight(fpl->user, fpl->fp[nr_files]); 81968c2ecf20Sopenharmony_ci nr_files++; 81978c2ecf20Sopenharmony_ci } 81988c2ecf20Sopenharmony_ci 81998c2ecf20Sopenharmony_ci if (nr_files) { 82008c2ecf20Sopenharmony_ci fpl->max = SCM_MAX_FD; 82018c2ecf20Sopenharmony_ci fpl->count = nr_files; 82028c2ecf20Sopenharmony_ci UNIXCB(skb).fp = fpl; 82038c2ecf20Sopenharmony_ci skb->destructor = unix_destruct_scm; 82048c2ecf20Sopenharmony_ci refcount_add(skb->truesize, &sk->sk_wmem_alloc); 82058c2ecf20Sopenharmony_ci skb_queue_head(&sk->sk_receive_queue, skb); 82068c2ecf20Sopenharmony_ci 82078c2ecf20Sopenharmony_ci for (i = 0; i < nr; i++) { 82088c2ecf20Sopenharmony_ci struct file *file = io_file_from_index(ctx, i + offset); 82098c2ecf20Sopenharmony_ci 82108c2ecf20Sopenharmony_ci if (file) 82118c2ecf20Sopenharmony_ci fput(file); 82128c2ecf20Sopenharmony_ci } 82138c2ecf20Sopenharmony_ci } else { 82148c2ecf20Sopenharmony_ci kfree_skb(skb); 82158c2ecf20Sopenharmony_ci free_uid(fpl->user); 82168c2ecf20Sopenharmony_ci kfree(fpl); 82178c2ecf20Sopenharmony_ci } 82188c2ecf20Sopenharmony_ci 82198c2ecf20Sopenharmony_ci return 0; 82208c2ecf20Sopenharmony_ci} 82218c2ecf20Sopenharmony_ci 82228c2ecf20Sopenharmony_ci/* 82238c2ecf20Sopenharmony_ci * If UNIX sockets are enabled, fd passing can cause a reference cycle which 82248c2ecf20Sopenharmony_ci * causes regular reference counting to break down. We rely on the UNIX 82258c2ecf20Sopenharmony_ci * garbage collection to take care of this problem for us. 82268c2ecf20Sopenharmony_ci */ 82278c2ecf20Sopenharmony_cistatic int io_sqe_files_scm(struct io_ring_ctx *ctx) 82288c2ecf20Sopenharmony_ci{ 82298c2ecf20Sopenharmony_ci unsigned left, total; 82308c2ecf20Sopenharmony_ci int ret = 0; 82318c2ecf20Sopenharmony_ci 82328c2ecf20Sopenharmony_ci total = 0; 82338c2ecf20Sopenharmony_ci left = ctx->nr_user_files; 82348c2ecf20Sopenharmony_ci while (left) { 82358c2ecf20Sopenharmony_ci unsigned this_files = min_t(unsigned, left, SCM_MAX_FD); 82368c2ecf20Sopenharmony_ci 82378c2ecf20Sopenharmony_ci ret = __io_sqe_files_scm(ctx, this_files, total); 82388c2ecf20Sopenharmony_ci if (ret) 82398c2ecf20Sopenharmony_ci break; 82408c2ecf20Sopenharmony_ci left -= this_files; 82418c2ecf20Sopenharmony_ci total += this_files; 82428c2ecf20Sopenharmony_ci } 82438c2ecf20Sopenharmony_ci 82448c2ecf20Sopenharmony_ci if (!ret) 82458c2ecf20Sopenharmony_ci return 0; 82468c2ecf20Sopenharmony_ci 82478c2ecf20Sopenharmony_ci while (total < ctx->nr_user_files) { 82488c2ecf20Sopenharmony_ci struct file *file = io_file_from_index(ctx, total); 82498c2ecf20Sopenharmony_ci 82508c2ecf20Sopenharmony_ci if (file) 82518c2ecf20Sopenharmony_ci fput(file); 82528c2ecf20Sopenharmony_ci total++; 82538c2ecf20Sopenharmony_ci } 82548c2ecf20Sopenharmony_ci 82558c2ecf20Sopenharmony_ci return ret; 82568c2ecf20Sopenharmony_ci} 82578c2ecf20Sopenharmony_ci#else 82588c2ecf20Sopenharmony_cistatic int io_sqe_files_scm(struct io_ring_ctx *ctx) 82598c2ecf20Sopenharmony_ci{ 82608c2ecf20Sopenharmony_ci return 0; 82618c2ecf20Sopenharmony_ci} 82628c2ecf20Sopenharmony_ci#endif 82638c2ecf20Sopenharmony_ci 82648c2ecf20Sopenharmony_cistatic void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) 82658c2ecf20Sopenharmony_ci{ 82668c2ecf20Sopenharmony_ci struct file *file = prsrc->file; 82678c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 82688c2ecf20Sopenharmony_ci struct sock *sock = ctx->ring_sock->sk; 82698c2ecf20Sopenharmony_ci struct sk_buff_head list, *head = &sock->sk_receive_queue; 82708c2ecf20Sopenharmony_ci struct sk_buff *skb; 82718c2ecf20Sopenharmony_ci int i; 82728c2ecf20Sopenharmony_ci 82738c2ecf20Sopenharmony_ci __skb_queue_head_init(&list); 82748c2ecf20Sopenharmony_ci 82758c2ecf20Sopenharmony_ci /* 82768c2ecf20Sopenharmony_ci * Find the skb that holds this file in its SCM_RIGHTS. When found, 82778c2ecf20Sopenharmony_ci * remove this entry and rearrange the file array. 82788c2ecf20Sopenharmony_ci */ 82798c2ecf20Sopenharmony_ci skb = skb_dequeue(head); 82808c2ecf20Sopenharmony_ci while (skb) { 82818c2ecf20Sopenharmony_ci struct scm_fp_list *fp; 82828c2ecf20Sopenharmony_ci 82838c2ecf20Sopenharmony_ci fp = UNIXCB(skb).fp; 82848c2ecf20Sopenharmony_ci for (i = 0; i < fp->count; i++) { 82858c2ecf20Sopenharmony_ci int left; 82868c2ecf20Sopenharmony_ci 82878c2ecf20Sopenharmony_ci if (fp->fp[i] != file) 82888c2ecf20Sopenharmony_ci continue; 82898c2ecf20Sopenharmony_ci 82908c2ecf20Sopenharmony_ci unix_notinflight(fp->user, fp->fp[i]); 82918c2ecf20Sopenharmony_ci left = fp->count - 1 - i; 82928c2ecf20Sopenharmony_ci if (left) { 82938c2ecf20Sopenharmony_ci memmove(&fp->fp[i], &fp->fp[i + 1], 82948c2ecf20Sopenharmony_ci left * sizeof(struct file *)); 82958c2ecf20Sopenharmony_ci } 82968c2ecf20Sopenharmony_ci fp->count--; 82978c2ecf20Sopenharmony_ci if (!fp->count) { 82988c2ecf20Sopenharmony_ci kfree_skb(skb); 82998c2ecf20Sopenharmony_ci skb = NULL; 83008c2ecf20Sopenharmony_ci } else { 83018c2ecf20Sopenharmony_ci __skb_queue_tail(&list, skb); 83028c2ecf20Sopenharmony_ci } 83038c2ecf20Sopenharmony_ci fput(file); 83048c2ecf20Sopenharmony_ci file = NULL; 83058c2ecf20Sopenharmony_ci break; 83068c2ecf20Sopenharmony_ci } 83078c2ecf20Sopenharmony_ci 83088c2ecf20Sopenharmony_ci if (!file) 83098c2ecf20Sopenharmony_ci break; 83108c2ecf20Sopenharmony_ci 83118c2ecf20Sopenharmony_ci __skb_queue_tail(&list, skb); 83128c2ecf20Sopenharmony_ci 83138c2ecf20Sopenharmony_ci skb = skb_dequeue(head); 83148c2ecf20Sopenharmony_ci } 83158c2ecf20Sopenharmony_ci 83168c2ecf20Sopenharmony_ci if (skb_peek(&list)) { 83178c2ecf20Sopenharmony_ci spin_lock_irq(&head->lock); 83188c2ecf20Sopenharmony_ci while ((skb = __skb_dequeue(&list)) != NULL) 83198c2ecf20Sopenharmony_ci __skb_queue_tail(head, skb); 83208c2ecf20Sopenharmony_ci spin_unlock_irq(&head->lock); 83218c2ecf20Sopenharmony_ci } 83228c2ecf20Sopenharmony_ci#else 83238c2ecf20Sopenharmony_ci fput(file); 83248c2ecf20Sopenharmony_ci#endif 83258c2ecf20Sopenharmony_ci} 83268c2ecf20Sopenharmony_ci 83278c2ecf20Sopenharmony_cistatic void __io_rsrc_put_work(struct io_rsrc_node *ref_node) 83288c2ecf20Sopenharmony_ci{ 83298c2ecf20Sopenharmony_ci struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; 83308c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = rsrc_data->ctx; 83318c2ecf20Sopenharmony_ci struct io_rsrc_put *prsrc, *tmp; 83328c2ecf20Sopenharmony_ci 83338c2ecf20Sopenharmony_ci list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { 83348c2ecf20Sopenharmony_ci list_del(&prsrc->list); 83358c2ecf20Sopenharmony_ci 83368c2ecf20Sopenharmony_ci if (prsrc->tag) { 83378c2ecf20Sopenharmony_ci bool lock_ring = ctx->flags & IORING_SETUP_IOPOLL; 83388c2ecf20Sopenharmony_ci 83398c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, lock_ring); 83408c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 83418c2ecf20Sopenharmony_ci io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); 83428c2ecf20Sopenharmony_ci io_commit_cqring(ctx); 83438c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 83448c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 83458c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, lock_ring); 83468c2ecf20Sopenharmony_ci } 83478c2ecf20Sopenharmony_ci 83488c2ecf20Sopenharmony_ci rsrc_data->do_put(ctx, prsrc); 83498c2ecf20Sopenharmony_ci kfree(prsrc); 83508c2ecf20Sopenharmony_ci } 83518c2ecf20Sopenharmony_ci 83528c2ecf20Sopenharmony_ci io_rsrc_node_destroy(ref_node); 83538c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&rsrc_data->refs)) 83548c2ecf20Sopenharmony_ci complete(&rsrc_data->done); 83558c2ecf20Sopenharmony_ci} 83568c2ecf20Sopenharmony_ci 83578c2ecf20Sopenharmony_cistatic void io_rsrc_put_work(struct work_struct *work) 83588c2ecf20Sopenharmony_ci{ 83598c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 83608c2ecf20Sopenharmony_ci struct llist_node *node; 83618c2ecf20Sopenharmony_ci 83628c2ecf20Sopenharmony_ci ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); 83638c2ecf20Sopenharmony_ci node = llist_del_all(&ctx->rsrc_put_llist); 83648c2ecf20Sopenharmony_ci 83658c2ecf20Sopenharmony_ci while (node) { 83668c2ecf20Sopenharmony_ci struct io_rsrc_node *ref_node; 83678c2ecf20Sopenharmony_ci struct llist_node *next = node->next; 83688c2ecf20Sopenharmony_ci 83698c2ecf20Sopenharmony_ci ref_node = llist_entry(node, struct io_rsrc_node, llist); 83708c2ecf20Sopenharmony_ci __io_rsrc_put_work(ref_node); 83718c2ecf20Sopenharmony_ci node = next; 83728c2ecf20Sopenharmony_ci } 83738c2ecf20Sopenharmony_ci} 83748c2ecf20Sopenharmony_ci 83758c2ecf20Sopenharmony_cistatic int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, 83768c2ecf20Sopenharmony_ci unsigned nr_args, u64 __user *tags) 83778c2ecf20Sopenharmony_ci{ 83788c2ecf20Sopenharmony_ci __s32 __user *fds = (__s32 __user *) arg; 83798c2ecf20Sopenharmony_ci struct file *file; 83808c2ecf20Sopenharmony_ci int fd, ret; 83818c2ecf20Sopenharmony_ci unsigned i; 83828c2ecf20Sopenharmony_ci 83838c2ecf20Sopenharmony_ci if (ctx->file_data) 83848c2ecf20Sopenharmony_ci return -EBUSY; 83858c2ecf20Sopenharmony_ci if (!nr_args) 83868c2ecf20Sopenharmony_ci return -EINVAL; 83878c2ecf20Sopenharmony_ci if (nr_args > IORING_MAX_FIXED_FILES) 83888c2ecf20Sopenharmony_ci return -EMFILE; 83898c2ecf20Sopenharmony_ci if (nr_args > rlimit(RLIMIT_NOFILE)) 83908c2ecf20Sopenharmony_ci return -EMFILE; 83918c2ecf20Sopenharmony_ci ret = io_rsrc_node_switch_start(ctx); 83928c2ecf20Sopenharmony_ci if (ret) 83938c2ecf20Sopenharmony_ci return ret; 83948c2ecf20Sopenharmony_ci ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, 83958c2ecf20Sopenharmony_ci &ctx->file_data); 83968c2ecf20Sopenharmony_ci if (ret) 83978c2ecf20Sopenharmony_ci return ret; 83988c2ecf20Sopenharmony_ci 83998c2ecf20Sopenharmony_ci ret = -ENOMEM; 84008c2ecf20Sopenharmony_ci if (!io_alloc_file_tables(&ctx->file_table, nr_args)) 84018c2ecf20Sopenharmony_ci goto out_free; 84028c2ecf20Sopenharmony_ci 84038c2ecf20Sopenharmony_ci for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { 84048c2ecf20Sopenharmony_ci if (copy_from_user(&fd, &fds[i], sizeof(fd))) { 84058c2ecf20Sopenharmony_ci ret = -EFAULT; 84068c2ecf20Sopenharmony_ci goto out_fput; 84078c2ecf20Sopenharmony_ci } 84088c2ecf20Sopenharmony_ci /* allow sparse sets */ 84098c2ecf20Sopenharmony_ci if (fd == -1) { 84108c2ecf20Sopenharmony_ci ret = -EINVAL; 84118c2ecf20Sopenharmony_ci if (unlikely(*io_get_tag_slot(ctx->file_data, i))) 84128c2ecf20Sopenharmony_ci goto out_fput; 84138c2ecf20Sopenharmony_ci continue; 84148c2ecf20Sopenharmony_ci } 84158c2ecf20Sopenharmony_ci 84168c2ecf20Sopenharmony_ci file = fget(fd); 84178c2ecf20Sopenharmony_ci ret = -EBADF; 84188c2ecf20Sopenharmony_ci if (unlikely(!file)) 84198c2ecf20Sopenharmony_ci goto out_fput; 84208c2ecf20Sopenharmony_ci 84218c2ecf20Sopenharmony_ci /* 84228c2ecf20Sopenharmony_ci * Don't allow io_uring instances to be registered. If UNIX 84238c2ecf20Sopenharmony_ci * isn't enabled, then this causes a reference cycle and this 84248c2ecf20Sopenharmony_ci * instance can never get freed. If UNIX is enabled we'll 84258c2ecf20Sopenharmony_ci * handle it just fine, but there's still no point in allowing 84268c2ecf20Sopenharmony_ci * a ring fd as it doesn't support regular read/write anyway. 84278c2ecf20Sopenharmony_ci */ 84288c2ecf20Sopenharmony_ci if (file->f_op == &io_uring_fops) { 84298c2ecf20Sopenharmony_ci fput(file); 84308c2ecf20Sopenharmony_ci goto out_fput; 84318c2ecf20Sopenharmony_ci } 84328c2ecf20Sopenharmony_ci io_fixed_file_set(io_fixed_file_slot(&ctx->file_table, i), file); 84338c2ecf20Sopenharmony_ci } 84348c2ecf20Sopenharmony_ci 84358c2ecf20Sopenharmony_ci ret = io_sqe_files_scm(ctx); 84368c2ecf20Sopenharmony_ci if (ret) { 84378c2ecf20Sopenharmony_ci __io_sqe_files_unregister(ctx); 84388c2ecf20Sopenharmony_ci return ret; 84398c2ecf20Sopenharmony_ci } 84408c2ecf20Sopenharmony_ci 84418c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, NULL); 84428c2ecf20Sopenharmony_ci return ret; 84438c2ecf20Sopenharmony_ciout_fput: 84448c2ecf20Sopenharmony_ci for (i = 0; i < ctx->nr_user_files; i++) { 84458c2ecf20Sopenharmony_ci file = io_file_from_index(ctx, i); 84468c2ecf20Sopenharmony_ci if (file) 84478c2ecf20Sopenharmony_ci fput(file); 84488c2ecf20Sopenharmony_ci } 84498c2ecf20Sopenharmony_ci io_free_file_tables(&ctx->file_table); 84508c2ecf20Sopenharmony_ci ctx->nr_user_files = 0; 84518c2ecf20Sopenharmony_ciout_free: 84528c2ecf20Sopenharmony_ci io_rsrc_data_free(ctx->file_data); 84538c2ecf20Sopenharmony_ci ctx->file_data = NULL; 84548c2ecf20Sopenharmony_ci return ret; 84558c2ecf20Sopenharmony_ci} 84568c2ecf20Sopenharmony_ci 84578c2ecf20Sopenharmony_cistatic int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, 84588c2ecf20Sopenharmony_ci struct io_rsrc_node *node, void *rsrc) 84598c2ecf20Sopenharmony_ci{ 84608c2ecf20Sopenharmony_ci u64 *tag_slot = io_get_tag_slot(data, idx); 84618c2ecf20Sopenharmony_ci struct io_rsrc_put *prsrc; 84628c2ecf20Sopenharmony_ci 84638c2ecf20Sopenharmony_ci prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); 84648c2ecf20Sopenharmony_ci if (!prsrc) 84658c2ecf20Sopenharmony_ci return -ENOMEM; 84668c2ecf20Sopenharmony_ci 84678c2ecf20Sopenharmony_ci prsrc->tag = *tag_slot; 84688c2ecf20Sopenharmony_ci *tag_slot = 0; 84698c2ecf20Sopenharmony_ci prsrc->rsrc = rsrc; 84708c2ecf20Sopenharmony_ci list_add(&prsrc->list, &node->rsrc_list); 84718c2ecf20Sopenharmony_ci return 0; 84728c2ecf20Sopenharmony_ci} 84738c2ecf20Sopenharmony_ci 84748c2ecf20Sopenharmony_cistatic int io_install_fixed_file(struct io_kiocb *req, struct file *file, 84758c2ecf20Sopenharmony_ci unsigned int issue_flags, u32 slot_index) 84768c2ecf20Sopenharmony_ci{ 84778c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 84788c2ecf20Sopenharmony_ci bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; 84798c2ecf20Sopenharmony_ci bool needs_switch = false; 84808c2ecf20Sopenharmony_ci struct io_fixed_file *file_slot; 84818c2ecf20Sopenharmony_ci int ret = -EBADF; 84828c2ecf20Sopenharmony_ci 84838c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !force_nonblock); 84848c2ecf20Sopenharmony_ci if (file->f_op == &io_uring_fops) 84858c2ecf20Sopenharmony_ci goto err; 84868c2ecf20Sopenharmony_ci ret = -ENXIO; 84878c2ecf20Sopenharmony_ci if (!ctx->file_data) 84888c2ecf20Sopenharmony_ci goto err; 84898c2ecf20Sopenharmony_ci ret = -EINVAL; 84908c2ecf20Sopenharmony_ci if (slot_index >= ctx->nr_user_files) 84918c2ecf20Sopenharmony_ci goto err; 84928c2ecf20Sopenharmony_ci 84938c2ecf20Sopenharmony_ci slot_index = array_index_nospec(slot_index, ctx->nr_user_files); 84948c2ecf20Sopenharmony_ci file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); 84958c2ecf20Sopenharmony_ci 84968c2ecf20Sopenharmony_ci if (file_slot->file_ptr) { 84978c2ecf20Sopenharmony_ci struct file *old_file; 84988c2ecf20Sopenharmony_ci 84998c2ecf20Sopenharmony_ci ret = io_rsrc_node_switch_start(ctx); 85008c2ecf20Sopenharmony_ci if (ret) 85018c2ecf20Sopenharmony_ci goto err; 85028c2ecf20Sopenharmony_ci 85038c2ecf20Sopenharmony_ci old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); 85048c2ecf20Sopenharmony_ci ret = io_queue_rsrc_removal(ctx->file_data, slot_index, 85058c2ecf20Sopenharmony_ci ctx->rsrc_node, old_file); 85068c2ecf20Sopenharmony_ci if (ret) 85078c2ecf20Sopenharmony_ci goto err; 85088c2ecf20Sopenharmony_ci file_slot->file_ptr = 0; 85098c2ecf20Sopenharmony_ci needs_switch = true; 85108c2ecf20Sopenharmony_ci } 85118c2ecf20Sopenharmony_ci 85128c2ecf20Sopenharmony_ci *io_get_tag_slot(ctx->file_data, slot_index) = 0; 85138c2ecf20Sopenharmony_ci io_fixed_file_set(file_slot, file); 85148c2ecf20Sopenharmony_ci ret = 0; 85158c2ecf20Sopenharmony_cierr: 85168c2ecf20Sopenharmony_ci if (needs_switch) 85178c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, ctx->file_data); 85188c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !force_nonblock); 85198c2ecf20Sopenharmony_ci if (ret) 85208c2ecf20Sopenharmony_ci fput(file); 85218c2ecf20Sopenharmony_ci return ret; 85228c2ecf20Sopenharmony_ci} 85238c2ecf20Sopenharmony_ci 85248c2ecf20Sopenharmony_cistatic int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) 85258c2ecf20Sopenharmony_ci{ 85268c2ecf20Sopenharmony_ci unsigned int offset = req->close.file_slot - 1; 85278c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 85288c2ecf20Sopenharmony_ci struct io_fixed_file *file_slot; 85298c2ecf20Sopenharmony_ci struct file *file; 85308c2ecf20Sopenharmony_ci int ret; 85318c2ecf20Sopenharmony_ci 85328c2ecf20Sopenharmony_ci io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 85338c2ecf20Sopenharmony_ci ret = -ENXIO; 85348c2ecf20Sopenharmony_ci if (unlikely(!ctx->file_data)) 85358c2ecf20Sopenharmony_ci goto out; 85368c2ecf20Sopenharmony_ci ret = -EINVAL; 85378c2ecf20Sopenharmony_ci if (offset >= ctx->nr_user_files) 85388c2ecf20Sopenharmony_ci goto out; 85398c2ecf20Sopenharmony_ci ret = io_rsrc_node_switch_start(ctx); 85408c2ecf20Sopenharmony_ci if (ret) 85418c2ecf20Sopenharmony_ci goto out; 85428c2ecf20Sopenharmony_ci 85438c2ecf20Sopenharmony_ci offset = array_index_nospec(offset, ctx->nr_user_files); 85448c2ecf20Sopenharmony_ci file_slot = io_fixed_file_slot(&ctx->file_table, offset); 85458c2ecf20Sopenharmony_ci ret = -EBADF; 85468c2ecf20Sopenharmony_ci if (!file_slot->file_ptr) 85478c2ecf20Sopenharmony_ci goto out; 85488c2ecf20Sopenharmony_ci 85498c2ecf20Sopenharmony_ci file = (struct file *)(file_slot->file_ptr & FFS_MASK); 85508c2ecf20Sopenharmony_ci ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); 85518c2ecf20Sopenharmony_ci if (ret) 85528c2ecf20Sopenharmony_ci goto out; 85538c2ecf20Sopenharmony_ci 85548c2ecf20Sopenharmony_ci file_slot->file_ptr = 0; 85558c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, ctx->file_data); 85568c2ecf20Sopenharmony_ci ret = 0; 85578c2ecf20Sopenharmony_ciout: 85588c2ecf20Sopenharmony_ci io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK)); 85598c2ecf20Sopenharmony_ci return ret; 85608c2ecf20Sopenharmony_ci} 85618c2ecf20Sopenharmony_ci 85628c2ecf20Sopenharmony_cistatic int __io_sqe_files_update(struct io_ring_ctx *ctx, 85638c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 *up, 85648c2ecf20Sopenharmony_ci unsigned nr_args) 85658c2ecf20Sopenharmony_ci{ 85668c2ecf20Sopenharmony_ci u64 __user *tags = u64_to_user_ptr(up->tags); 85678c2ecf20Sopenharmony_ci __s32 __user *fds = u64_to_user_ptr(up->data); 85688c2ecf20Sopenharmony_ci struct io_rsrc_data *data = ctx->file_data; 85698c2ecf20Sopenharmony_ci struct io_fixed_file *file_slot; 85708c2ecf20Sopenharmony_ci struct file *file; 85718c2ecf20Sopenharmony_ci int fd, i, err = 0; 85728c2ecf20Sopenharmony_ci unsigned int done; 85738c2ecf20Sopenharmony_ci bool needs_switch = false; 85748c2ecf20Sopenharmony_ci 85758c2ecf20Sopenharmony_ci if (!ctx->file_data) 85768c2ecf20Sopenharmony_ci return -ENXIO; 85778c2ecf20Sopenharmony_ci if (up->offset + nr_args > ctx->nr_user_files) 85788c2ecf20Sopenharmony_ci return -EINVAL; 85798c2ecf20Sopenharmony_ci 85808c2ecf20Sopenharmony_ci for (done = 0; done < nr_args; done++) { 85818c2ecf20Sopenharmony_ci u64 tag = 0; 85828c2ecf20Sopenharmony_ci 85838c2ecf20Sopenharmony_ci if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || 85848c2ecf20Sopenharmony_ci copy_from_user(&fd, &fds[done], sizeof(fd))) { 85858c2ecf20Sopenharmony_ci err = -EFAULT; 85868c2ecf20Sopenharmony_ci break; 85878c2ecf20Sopenharmony_ci } 85888c2ecf20Sopenharmony_ci if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { 85898c2ecf20Sopenharmony_ci err = -EINVAL; 85908c2ecf20Sopenharmony_ci break; 85918c2ecf20Sopenharmony_ci } 85928c2ecf20Sopenharmony_ci if (fd == IORING_REGISTER_FILES_SKIP) 85938c2ecf20Sopenharmony_ci continue; 85948c2ecf20Sopenharmony_ci 85958c2ecf20Sopenharmony_ci i = array_index_nospec(up->offset + done, ctx->nr_user_files); 85968c2ecf20Sopenharmony_ci file_slot = io_fixed_file_slot(&ctx->file_table, i); 85978c2ecf20Sopenharmony_ci 85988c2ecf20Sopenharmony_ci if (file_slot->file_ptr) { 85998c2ecf20Sopenharmony_ci file = (struct file *)(file_slot->file_ptr & FFS_MASK); 86008c2ecf20Sopenharmony_ci err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); 86018c2ecf20Sopenharmony_ci if (err) 86028c2ecf20Sopenharmony_ci break; 86038c2ecf20Sopenharmony_ci file_slot->file_ptr = 0; 86048c2ecf20Sopenharmony_ci needs_switch = true; 86058c2ecf20Sopenharmony_ci } 86068c2ecf20Sopenharmony_ci if (fd != -1) { 86078c2ecf20Sopenharmony_ci file = fget(fd); 86088c2ecf20Sopenharmony_ci if (!file) { 86098c2ecf20Sopenharmony_ci err = -EBADF; 86108c2ecf20Sopenharmony_ci break; 86118c2ecf20Sopenharmony_ci } 86128c2ecf20Sopenharmony_ci /* 86138c2ecf20Sopenharmony_ci * Don't allow io_uring instances to be registered. If 86148c2ecf20Sopenharmony_ci * UNIX isn't enabled, then this causes a reference 86158c2ecf20Sopenharmony_ci * cycle and this instance can never get freed. If UNIX 86168c2ecf20Sopenharmony_ci * is enabled we'll handle it just fine, but there's 86178c2ecf20Sopenharmony_ci * still no point in allowing a ring fd as it doesn't 86188c2ecf20Sopenharmony_ci * support regular read/write anyway. 86198c2ecf20Sopenharmony_ci */ 86208c2ecf20Sopenharmony_ci if (file->f_op == &io_uring_fops) { 86218c2ecf20Sopenharmony_ci fput(file); 86228c2ecf20Sopenharmony_ci err = -EBADF; 86238c2ecf20Sopenharmony_ci break; 86248c2ecf20Sopenharmony_ci } 86258c2ecf20Sopenharmony_ci *io_get_tag_slot(data, i) = tag; 86268c2ecf20Sopenharmony_ci io_fixed_file_set(file_slot, file); 86278c2ecf20Sopenharmony_ci } 86288c2ecf20Sopenharmony_ci } 86298c2ecf20Sopenharmony_ci 86308c2ecf20Sopenharmony_ci if (needs_switch) 86318c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, data); 86328c2ecf20Sopenharmony_ci return done ? done : err; 86338c2ecf20Sopenharmony_ci} 86348c2ecf20Sopenharmony_ci 86358c2ecf20Sopenharmony_cistatic struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, 86368c2ecf20Sopenharmony_ci struct task_struct *task) 86378c2ecf20Sopenharmony_ci{ 86388c2ecf20Sopenharmony_ci struct io_wq_hash *hash; 86398c2ecf20Sopenharmony_ci struct io_wq_data data; 86408c2ecf20Sopenharmony_ci unsigned int concurrency; 86418c2ecf20Sopenharmony_ci 86428c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 86438c2ecf20Sopenharmony_ci hash = ctx->hash_map; 86448c2ecf20Sopenharmony_ci if (!hash) { 86458c2ecf20Sopenharmony_ci hash = kzalloc(sizeof(*hash), GFP_KERNEL); 86468c2ecf20Sopenharmony_ci if (!hash) { 86478c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 86488c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 86498c2ecf20Sopenharmony_ci } 86508c2ecf20Sopenharmony_ci refcount_set(&hash->refs, 1); 86518c2ecf20Sopenharmony_ci init_waitqueue_head(&hash->wait); 86528c2ecf20Sopenharmony_ci ctx->hash_map = hash; 86538c2ecf20Sopenharmony_ci } 86548c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 86558c2ecf20Sopenharmony_ci 86568c2ecf20Sopenharmony_ci data.hash = hash; 86578c2ecf20Sopenharmony_ci data.task = task; 86588c2ecf20Sopenharmony_ci data.free_work = io_wq_free_work; 86598c2ecf20Sopenharmony_ci data.do_work = io_wq_submit_work; 86608c2ecf20Sopenharmony_ci 86618c2ecf20Sopenharmony_ci /* Do QD, or 4 * CPUS, whatever is smallest */ 86628c2ecf20Sopenharmony_ci concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); 86638c2ecf20Sopenharmony_ci 86648c2ecf20Sopenharmony_ci return io_wq_create(concurrency, &data); 86658c2ecf20Sopenharmony_ci} 86668c2ecf20Sopenharmony_ci 86678c2ecf20Sopenharmony_cistatic int io_uring_alloc_task_context(struct task_struct *task, 86688c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx) 86698c2ecf20Sopenharmony_ci{ 86708c2ecf20Sopenharmony_ci struct io_uring_task *tctx; 86718c2ecf20Sopenharmony_ci int ret; 86728c2ecf20Sopenharmony_ci 86738c2ecf20Sopenharmony_ci tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); 86748c2ecf20Sopenharmony_ci if (unlikely(!tctx)) 86758c2ecf20Sopenharmony_ci return -ENOMEM; 86768c2ecf20Sopenharmony_ci 86778c2ecf20Sopenharmony_ci ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); 86788c2ecf20Sopenharmony_ci if (unlikely(ret)) { 86798c2ecf20Sopenharmony_ci kfree(tctx); 86808c2ecf20Sopenharmony_ci return ret; 86818c2ecf20Sopenharmony_ci } 86828c2ecf20Sopenharmony_ci 86838c2ecf20Sopenharmony_ci tctx->io_wq = io_init_wq_offload(ctx, task); 86848c2ecf20Sopenharmony_ci if (IS_ERR(tctx->io_wq)) { 86858c2ecf20Sopenharmony_ci ret = PTR_ERR(tctx->io_wq); 86868c2ecf20Sopenharmony_ci percpu_counter_destroy(&tctx->inflight); 86878c2ecf20Sopenharmony_ci kfree(tctx); 86888c2ecf20Sopenharmony_ci return ret; 86898c2ecf20Sopenharmony_ci } 86908c2ecf20Sopenharmony_ci 86918c2ecf20Sopenharmony_ci xa_init(&tctx->xa); 86928c2ecf20Sopenharmony_ci init_waitqueue_head(&tctx->wait); 86938c2ecf20Sopenharmony_ci atomic_set(&tctx->in_idle, 0); 86948c2ecf20Sopenharmony_ci atomic_set(&tctx->inflight_tracked, 0); 86958c2ecf20Sopenharmony_ci task->io_uring = tctx; 86968c2ecf20Sopenharmony_ci spin_lock_init(&tctx->task_lock); 86978c2ecf20Sopenharmony_ci INIT_WQ_LIST(&tctx->task_list); 86988c2ecf20Sopenharmony_ci init_task_work(&tctx->task_work, tctx_task_work); 86998c2ecf20Sopenharmony_ci return 0; 87008c2ecf20Sopenharmony_ci} 87018c2ecf20Sopenharmony_ci 87028c2ecf20Sopenharmony_civoid __io_uring_free(struct task_struct *tsk) 87038c2ecf20Sopenharmony_ci{ 87048c2ecf20Sopenharmony_ci struct io_uring_task *tctx = tsk->io_uring; 87058c2ecf20Sopenharmony_ci 87068c2ecf20Sopenharmony_ci WARN_ON_ONCE(!xa_empty(&tctx->xa)); 87078c2ecf20Sopenharmony_ci WARN_ON_ONCE(tctx->io_wq); 87088c2ecf20Sopenharmony_ci WARN_ON_ONCE(tctx->cached_refs); 87098c2ecf20Sopenharmony_ci 87108c2ecf20Sopenharmony_ci percpu_counter_destroy(&tctx->inflight); 87118c2ecf20Sopenharmony_ci kfree(tctx); 87128c2ecf20Sopenharmony_ci tsk->io_uring = NULL; 87138c2ecf20Sopenharmony_ci} 87148c2ecf20Sopenharmony_ci 87158c2ecf20Sopenharmony_cistatic int io_sq_offload_create(struct io_ring_ctx *ctx, 87168c2ecf20Sopenharmony_ci struct io_uring_params *p) 87178c2ecf20Sopenharmony_ci{ 87188c2ecf20Sopenharmony_ci int ret; 87198c2ecf20Sopenharmony_ci 87208c2ecf20Sopenharmony_ci /* Retain compatibility with failing for an invalid attach attempt */ 87218c2ecf20Sopenharmony_ci if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) == 87228c2ecf20Sopenharmony_ci IORING_SETUP_ATTACH_WQ) { 87238c2ecf20Sopenharmony_ci struct fd f; 87248c2ecf20Sopenharmony_ci 87258c2ecf20Sopenharmony_ci f = fdget(p->wq_fd); 87268c2ecf20Sopenharmony_ci if (!f.file) 87278c2ecf20Sopenharmony_ci return -ENXIO; 87288c2ecf20Sopenharmony_ci if (f.file->f_op != &io_uring_fops) { 87298c2ecf20Sopenharmony_ci fdput(f); 87308c2ecf20Sopenharmony_ci return -EINVAL; 87318c2ecf20Sopenharmony_ci } 87328c2ecf20Sopenharmony_ci fdput(f); 87338c2ecf20Sopenharmony_ci } 87348c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_SQPOLL) { 87358c2ecf20Sopenharmony_ci struct task_struct *tsk; 87368c2ecf20Sopenharmony_ci struct io_sq_data *sqd; 87378c2ecf20Sopenharmony_ci bool attached; 87388c2ecf20Sopenharmony_ci 87398c2ecf20Sopenharmony_ci sqd = io_get_sq_data(p, &attached); 87408c2ecf20Sopenharmony_ci if (IS_ERR(sqd)) { 87418c2ecf20Sopenharmony_ci ret = PTR_ERR(sqd); 87428c2ecf20Sopenharmony_ci goto err; 87438c2ecf20Sopenharmony_ci } 87448c2ecf20Sopenharmony_ci 87458c2ecf20Sopenharmony_ci ctx->sq_creds = get_current_cred(); 87468c2ecf20Sopenharmony_ci ctx->sq_data = sqd; 87478c2ecf20Sopenharmony_ci ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); 87488c2ecf20Sopenharmony_ci if (!ctx->sq_thread_idle) 87498c2ecf20Sopenharmony_ci ctx->sq_thread_idle = HZ; 87508c2ecf20Sopenharmony_ci 87518c2ecf20Sopenharmony_ci io_sq_thread_park(sqd); 87528c2ecf20Sopenharmony_ci list_add(&ctx->sqd_list, &sqd->ctx_list); 87538c2ecf20Sopenharmony_ci io_sqd_update_thread_idle(sqd); 87548c2ecf20Sopenharmony_ci /* don't attach to a dying SQPOLL thread, would be racy */ 87558c2ecf20Sopenharmony_ci ret = (attached && !sqd->thread) ? -ENXIO : 0; 87568c2ecf20Sopenharmony_ci io_sq_thread_unpark(sqd); 87578c2ecf20Sopenharmony_ci 87588c2ecf20Sopenharmony_ci if (ret < 0) 87598c2ecf20Sopenharmony_ci goto err; 87608c2ecf20Sopenharmony_ci if (attached) 87618c2ecf20Sopenharmony_ci return 0; 87628c2ecf20Sopenharmony_ci 87638c2ecf20Sopenharmony_ci if (p->flags & IORING_SETUP_SQ_AFF) { 87648c2ecf20Sopenharmony_ci int cpu = p->sq_thread_cpu; 87658c2ecf20Sopenharmony_ci 87668c2ecf20Sopenharmony_ci ret = -EINVAL; 87678c2ecf20Sopenharmony_ci if (cpu >= nr_cpu_ids || !cpu_online(cpu)) 87688c2ecf20Sopenharmony_ci goto err_sqpoll; 87698c2ecf20Sopenharmony_ci sqd->sq_cpu = cpu; 87708c2ecf20Sopenharmony_ci } else { 87718c2ecf20Sopenharmony_ci sqd->sq_cpu = -1; 87728c2ecf20Sopenharmony_ci } 87738c2ecf20Sopenharmony_ci 87748c2ecf20Sopenharmony_ci sqd->task_pid = current->pid; 87758c2ecf20Sopenharmony_ci sqd->task_tgid = current->tgid; 87768c2ecf20Sopenharmony_ci tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); 87778c2ecf20Sopenharmony_ci if (IS_ERR(tsk)) { 87788c2ecf20Sopenharmony_ci ret = PTR_ERR(tsk); 87798c2ecf20Sopenharmony_ci goto err_sqpoll; 87808c2ecf20Sopenharmony_ci } 87818c2ecf20Sopenharmony_ci 87828c2ecf20Sopenharmony_ci sqd->thread = tsk; 87838c2ecf20Sopenharmony_ci ret = io_uring_alloc_task_context(tsk, ctx); 87848c2ecf20Sopenharmony_ci wake_up_new_task(tsk); 87858c2ecf20Sopenharmony_ci if (ret) 87868c2ecf20Sopenharmony_ci goto err; 87878c2ecf20Sopenharmony_ci } else if (p->flags & IORING_SETUP_SQ_AFF) { 87888c2ecf20Sopenharmony_ci /* Can't have SQ_AFF without SQPOLL */ 87898c2ecf20Sopenharmony_ci ret = -EINVAL; 87908c2ecf20Sopenharmony_ci goto err; 87918c2ecf20Sopenharmony_ci } 87928c2ecf20Sopenharmony_ci 87938c2ecf20Sopenharmony_ci return 0; 87948c2ecf20Sopenharmony_cierr_sqpoll: 87958c2ecf20Sopenharmony_ci complete(&ctx->sq_data->exited); 87968c2ecf20Sopenharmony_cierr: 87978c2ecf20Sopenharmony_ci io_sq_thread_finish(ctx); 87988c2ecf20Sopenharmony_ci return ret; 87998c2ecf20Sopenharmony_ci} 88008c2ecf20Sopenharmony_ci 88018c2ecf20Sopenharmony_cistatic inline void __io_unaccount_mem(struct user_struct *user, 88028c2ecf20Sopenharmony_ci unsigned long nr_pages) 88038c2ecf20Sopenharmony_ci{ 88048c2ecf20Sopenharmony_ci atomic_long_sub(nr_pages, &user->locked_vm); 88058c2ecf20Sopenharmony_ci} 88068c2ecf20Sopenharmony_ci 88078c2ecf20Sopenharmony_cistatic inline int __io_account_mem(struct user_struct *user, 88088c2ecf20Sopenharmony_ci unsigned long nr_pages) 88098c2ecf20Sopenharmony_ci{ 88108c2ecf20Sopenharmony_ci unsigned long page_limit, cur_pages, new_pages; 88118c2ecf20Sopenharmony_ci 88128c2ecf20Sopenharmony_ci /* Don't allow more pages than we can safely lock */ 88138c2ecf20Sopenharmony_ci page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 88148c2ecf20Sopenharmony_ci 88158c2ecf20Sopenharmony_ci do { 88168c2ecf20Sopenharmony_ci cur_pages = atomic_long_read(&user->locked_vm); 88178c2ecf20Sopenharmony_ci new_pages = cur_pages + nr_pages; 88188c2ecf20Sopenharmony_ci if (new_pages > page_limit) 88198c2ecf20Sopenharmony_ci return -ENOMEM; 88208c2ecf20Sopenharmony_ci } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, 88218c2ecf20Sopenharmony_ci new_pages) != cur_pages); 88228c2ecf20Sopenharmony_ci 88238c2ecf20Sopenharmony_ci return 0; 88248c2ecf20Sopenharmony_ci} 88258c2ecf20Sopenharmony_ci 88268c2ecf20Sopenharmony_cistatic void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) 88278c2ecf20Sopenharmony_ci{ 88288c2ecf20Sopenharmony_ci if (ctx->user) 88298c2ecf20Sopenharmony_ci __io_unaccount_mem(ctx->user, nr_pages); 88308c2ecf20Sopenharmony_ci 88318c2ecf20Sopenharmony_ci if (ctx->mm_account) 88328c2ecf20Sopenharmony_ci atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); 88338c2ecf20Sopenharmony_ci} 88348c2ecf20Sopenharmony_ci 88358c2ecf20Sopenharmony_cistatic int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) 88368c2ecf20Sopenharmony_ci{ 88378c2ecf20Sopenharmony_ci int ret; 88388c2ecf20Sopenharmony_ci 88398c2ecf20Sopenharmony_ci if (ctx->user) { 88408c2ecf20Sopenharmony_ci ret = __io_account_mem(ctx->user, nr_pages); 88418c2ecf20Sopenharmony_ci if (ret) 88428c2ecf20Sopenharmony_ci return ret; 88438c2ecf20Sopenharmony_ci } 88448c2ecf20Sopenharmony_ci 88458c2ecf20Sopenharmony_ci if (ctx->mm_account) 88468c2ecf20Sopenharmony_ci atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); 88478c2ecf20Sopenharmony_ci 88488c2ecf20Sopenharmony_ci return 0; 88498c2ecf20Sopenharmony_ci} 88508c2ecf20Sopenharmony_ci 88518c2ecf20Sopenharmony_cistatic void io_mem_free(void *ptr) 88528c2ecf20Sopenharmony_ci{ 88538c2ecf20Sopenharmony_ci struct page *page; 88548c2ecf20Sopenharmony_ci 88558c2ecf20Sopenharmony_ci if (!ptr) 88568c2ecf20Sopenharmony_ci return; 88578c2ecf20Sopenharmony_ci 88588c2ecf20Sopenharmony_ci page = virt_to_head_page(ptr); 88598c2ecf20Sopenharmony_ci if (put_page_testzero(page)) 88608c2ecf20Sopenharmony_ci free_compound_page(page); 88618c2ecf20Sopenharmony_ci} 88628c2ecf20Sopenharmony_ci 88638c2ecf20Sopenharmony_cistatic void *io_mem_alloc(size_t size) 88648c2ecf20Sopenharmony_ci{ 88658c2ecf20Sopenharmony_ci gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; 88668c2ecf20Sopenharmony_ci 88678c2ecf20Sopenharmony_ci return (void *) __get_free_pages(gfp, get_order(size)); 88688c2ecf20Sopenharmony_ci} 88698c2ecf20Sopenharmony_ci 88708c2ecf20Sopenharmony_cistatic unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, 88718c2ecf20Sopenharmony_ci size_t *sq_offset) 88728c2ecf20Sopenharmony_ci{ 88738c2ecf20Sopenharmony_ci struct io_rings *rings; 88748c2ecf20Sopenharmony_ci size_t off, sq_array_size; 88758c2ecf20Sopenharmony_ci 88768c2ecf20Sopenharmony_ci off = struct_size(rings, cqes, cq_entries); 88778c2ecf20Sopenharmony_ci if (off == SIZE_MAX) 88788c2ecf20Sopenharmony_ci return SIZE_MAX; 88798c2ecf20Sopenharmony_ci 88808c2ecf20Sopenharmony_ci#ifdef CONFIG_SMP 88818c2ecf20Sopenharmony_ci off = ALIGN(off, SMP_CACHE_BYTES); 88828c2ecf20Sopenharmony_ci if (off == 0) 88838c2ecf20Sopenharmony_ci return SIZE_MAX; 88848c2ecf20Sopenharmony_ci#endif 88858c2ecf20Sopenharmony_ci 88868c2ecf20Sopenharmony_ci if (sq_offset) 88878c2ecf20Sopenharmony_ci *sq_offset = off; 88888c2ecf20Sopenharmony_ci 88898c2ecf20Sopenharmony_ci sq_array_size = array_size(sizeof(u32), sq_entries); 88908c2ecf20Sopenharmony_ci if (sq_array_size == SIZE_MAX) 88918c2ecf20Sopenharmony_ci return SIZE_MAX; 88928c2ecf20Sopenharmony_ci 88938c2ecf20Sopenharmony_ci if (check_add_overflow(off, sq_array_size, &off)) 88948c2ecf20Sopenharmony_ci return SIZE_MAX; 88958c2ecf20Sopenharmony_ci 88968c2ecf20Sopenharmony_ci return off; 88978c2ecf20Sopenharmony_ci} 88988c2ecf20Sopenharmony_ci 88998c2ecf20Sopenharmony_cistatic void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) 89008c2ecf20Sopenharmony_ci{ 89018c2ecf20Sopenharmony_ci struct io_mapped_ubuf *imu = *slot; 89028c2ecf20Sopenharmony_ci unsigned int i; 89038c2ecf20Sopenharmony_ci 89048c2ecf20Sopenharmony_ci if (imu != ctx->dummy_ubuf) { 89058c2ecf20Sopenharmony_ci for (i = 0; i < imu->nr_bvecs; i++) 89068c2ecf20Sopenharmony_ci unpin_user_page(imu->bvec[i].bv_page); 89078c2ecf20Sopenharmony_ci if (imu->acct_pages) 89088c2ecf20Sopenharmony_ci io_unaccount_mem(ctx, imu->acct_pages); 89098c2ecf20Sopenharmony_ci kvfree(imu); 89108c2ecf20Sopenharmony_ci } 89118c2ecf20Sopenharmony_ci *slot = NULL; 89128c2ecf20Sopenharmony_ci} 89138c2ecf20Sopenharmony_ci 89148c2ecf20Sopenharmony_cistatic void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) 89158c2ecf20Sopenharmony_ci{ 89168c2ecf20Sopenharmony_ci io_buffer_unmap(ctx, &prsrc->buf); 89178c2ecf20Sopenharmony_ci prsrc->buf = NULL; 89188c2ecf20Sopenharmony_ci} 89198c2ecf20Sopenharmony_ci 89208c2ecf20Sopenharmony_cistatic void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) 89218c2ecf20Sopenharmony_ci{ 89228c2ecf20Sopenharmony_ci unsigned int i; 89238c2ecf20Sopenharmony_ci 89248c2ecf20Sopenharmony_ci for (i = 0; i < ctx->nr_user_bufs; i++) 89258c2ecf20Sopenharmony_ci io_buffer_unmap(ctx, &ctx->user_bufs[i]); 89268c2ecf20Sopenharmony_ci kfree(ctx->user_bufs); 89278c2ecf20Sopenharmony_ci io_rsrc_data_free(ctx->buf_data); 89288c2ecf20Sopenharmony_ci ctx->user_bufs = NULL; 89298c2ecf20Sopenharmony_ci ctx->buf_data = NULL; 89308c2ecf20Sopenharmony_ci ctx->nr_user_bufs = 0; 89318c2ecf20Sopenharmony_ci} 89328c2ecf20Sopenharmony_ci 89338c2ecf20Sopenharmony_cistatic int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) 89348c2ecf20Sopenharmony_ci{ 89358c2ecf20Sopenharmony_ci unsigned nr = ctx->nr_user_bufs; 89368c2ecf20Sopenharmony_ci int ret; 89378c2ecf20Sopenharmony_ci 89388c2ecf20Sopenharmony_ci if (!ctx->buf_data) 89398c2ecf20Sopenharmony_ci return -ENXIO; 89408c2ecf20Sopenharmony_ci 89418c2ecf20Sopenharmony_ci /* 89428c2ecf20Sopenharmony_ci * Quiesce may unlock ->uring_lock, and while it's not held 89438c2ecf20Sopenharmony_ci * prevent new requests using the table. 89448c2ecf20Sopenharmony_ci */ 89458c2ecf20Sopenharmony_ci ctx->nr_user_bufs = 0; 89468c2ecf20Sopenharmony_ci ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); 89478c2ecf20Sopenharmony_ci ctx->nr_user_bufs = nr; 89488c2ecf20Sopenharmony_ci if (!ret) 89498c2ecf20Sopenharmony_ci __io_sqe_buffers_unregister(ctx); 89508c2ecf20Sopenharmony_ci return ret; 89518c2ecf20Sopenharmony_ci} 89528c2ecf20Sopenharmony_ci 89538c2ecf20Sopenharmony_cistatic int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, 89548c2ecf20Sopenharmony_ci void __user *arg, unsigned index) 89558c2ecf20Sopenharmony_ci{ 89568c2ecf20Sopenharmony_ci struct iovec __user *src; 89578c2ecf20Sopenharmony_ci 89588c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 89598c2ecf20Sopenharmony_ci if (ctx->compat) { 89608c2ecf20Sopenharmony_ci struct compat_iovec __user *ciovs; 89618c2ecf20Sopenharmony_ci struct compat_iovec ciov; 89628c2ecf20Sopenharmony_ci 89638c2ecf20Sopenharmony_ci ciovs = (struct compat_iovec __user *) arg; 89648c2ecf20Sopenharmony_ci if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) 89658c2ecf20Sopenharmony_ci return -EFAULT; 89668c2ecf20Sopenharmony_ci 89678c2ecf20Sopenharmony_ci dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); 89688c2ecf20Sopenharmony_ci dst->iov_len = ciov.iov_len; 89698c2ecf20Sopenharmony_ci return 0; 89708c2ecf20Sopenharmony_ci } 89718c2ecf20Sopenharmony_ci#endif 89728c2ecf20Sopenharmony_ci src = (struct iovec __user *) arg; 89738c2ecf20Sopenharmony_ci if (copy_from_user(dst, &src[index], sizeof(*dst))) 89748c2ecf20Sopenharmony_ci return -EFAULT; 89758c2ecf20Sopenharmony_ci return 0; 89768c2ecf20Sopenharmony_ci} 89778c2ecf20Sopenharmony_ci 89788c2ecf20Sopenharmony_ci/* 89798c2ecf20Sopenharmony_ci * Not super efficient, but this is just a registration time. And we do cache 89808c2ecf20Sopenharmony_ci * the last compound head, so generally we'll only do a full search if we don't 89818c2ecf20Sopenharmony_ci * match that one. 89828c2ecf20Sopenharmony_ci * 89838c2ecf20Sopenharmony_ci * We check if the given compound head page has already been accounted, to 89848c2ecf20Sopenharmony_ci * avoid double accounting it. This allows us to account the full size of the 89858c2ecf20Sopenharmony_ci * page, not just the constituent pages of a huge page. 89868c2ecf20Sopenharmony_ci */ 89878c2ecf20Sopenharmony_cistatic bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, 89888c2ecf20Sopenharmony_ci int nr_pages, struct page *hpage) 89898c2ecf20Sopenharmony_ci{ 89908c2ecf20Sopenharmony_ci int i, j; 89918c2ecf20Sopenharmony_ci 89928c2ecf20Sopenharmony_ci /* check current page array */ 89938c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 89948c2ecf20Sopenharmony_ci if (!PageCompound(pages[i])) 89958c2ecf20Sopenharmony_ci continue; 89968c2ecf20Sopenharmony_ci if (compound_head(pages[i]) == hpage) 89978c2ecf20Sopenharmony_ci return true; 89988c2ecf20Sopenharmony_ci } 89998c2ecf20Sopenharmony_ci 90008c2ecf20Sopenharmony_ci /* check previously registered pages */ 90018c2ecf20Sopenharmony_ci for (i = 0; i < ctx->nr_user_bufs; i++) { 90028c2ecf20Sopenharmony_ci struct io_mapped_ubuf *imu = ctx->user_bufs[i]; 90038c2ecf20Sopenharmony_ci 90048c2ecf20Sopenharmony_ci for (j = 0; j < imu->nr_bvecs; j++) { 90058c2ecf20Sopenharmony_ci if (!PageCompound(imu->bvec[j].bv_page)) 90068c2ecf20Sopenharmony_ci continue; 90078c2ecf20Sopenharmony_ci if (compound_head(imu->bvec[j].bv_page) == hpage) 90088c2ecf20Sopenharmony_ci return true; 90098c2ecf20Sopenharmony_ci } 90108c2ecf20Sopenharmony_ci } 90118c2ecf20Sopenharmony_ci 90128c2ecf20Sopenharmony_ci return false; 90138c2ecf20Sopenharmony_ci} 90148c2ecf20Sopenharmony_ci 90158c2ecf20Sopenharmony_cistatic int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, 90168c2ecf20Sopenharmony_ci int nr_pages, struct io_mapped_ubuf *imu, 90178c2ecf20Sopenharmony_ci struct page **last_hpage) 90188c2ecf20Sopenharmony_ci{ 90198c2ecf20Sopenharmony_ci int i, ret; 90208c2ecf20Sopenharmony_ci 90218c2ecf20Sopenharmony_ci imu->acct_pages = 0; 90228c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 90238c2ecf20Sopenharmony_ci if (!PageCompound(pages[i])) { 90248c2ecf20Sopenharmony_ci imu->acct_pages++; 90258c2ecf20Sopenharmony_ci } else { 90268c2ecf20Sopenharmony_ci struct page *hpage; 90278c2ecf20Sopenharmony_ci 90288c2ecf20Sopenharmony_ci hpage = compound_head(pages[i]); 90298c2ecf20Sopenharmony_ci if (hpage == *last_hpage) 90308c2ecf20Sopenharmony_ci continue; 90318c2ecf20Sopenharmony_ci *last_hpage = hpage; 90328c2ecf20Sopenharmony_ci if (headpage_already_acct(ctx, pages, i, hpage)) 90338c2ecf20Sopenharmony_ci continue; 90348c2ecf20Sopenharmony_ci imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; 90358c2ecf20Sopenharmony_ci } 90368c2ecf20Sopenharmony_ci } 90378c2ecf20Sopenharmony_ci 90388c2ecf20Sopenharmony_ci if (!imu->acct_pages) 90398c2ecf20Sopenharmony_ci return 0; 90408c2ecf20Sopenharmony_ci 90418c2ecf20Sopenharmony_ci ret = io_account_mem(ctx, imu->acct_pages); 90428c2ecf20Sopenharmony_ci if (ret) 90438c2ecf20Sopenharmony_ci imu->acct_pages = 0; 90448c2ecf20Sopenharmony_ci return ret; 90458c2ecf20Sopenharmony_ci} 90468c2ecf20Sopenharmony_ci 90478c2ecf20Sopenharmony_cistatic int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, 90488c2ecf20Sopenharmony_ci struct io_mapped_ubuf **pimu, 90498c2ecf20Sopenharmony_ci struct page **last_hpage) 90508c2ecf20Sopenharmony_ci{ 90518c2ecf20Sopenharmony_ci struct io_mapped_ubuf *imu = NULL; 90528c2ecf20Sopenharmony_ci struct vm_area_struct **vmas = NULL; 90538c2ecf20Sopenharmony_ci struct page **pages = NULL; 90548c2ecf20Sopenharmony_ci unsigned long off, start, end, ubuf; 90558c2ecf20Sopenharmony_ci size_t size; 90568c2ecf20Sopenharmony_ci int ret, pret, nr_pages, i; 90578c2ecf20Sopenharmony_ci 90588c2ecf20Sopenharmony_ci if (!iov->iov_base) { 90598c2ecf20Sopenharmony_ci *pimu = ctx->dummy_ubuf; 90608c2ecf20Sopenharmony_ci return 0; 90618c2ecf20Sopenharmony_ci } 90628c2ecf20Sopenharmony_ci 90638c2ecf20Sopenharmony_ci ubuf = (unsigned long) iov->iov_base; 90648c2ecf20Sopenharmony_ci end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT; 90658c2ecf20Sopenharmony_ci start = ubuf >> PAGE_SHIFT; 90668c2ecf20Sopenharmony_ci nr_pages = end - start; 90678c2ecf20Sopenharmony_ci 90688c2ecf20Sopenharmony_ci *pimu = NULL; 90698c2ecf20Sopenharmony_ci ret = -ENOMEM; 90708c2ecf20Sopenharmony_ci 90718c2ecf20Sopenharmony_ci pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); 90728c2ecf20Sopenharmony_ci if (!pages) 90738c2ecf20Sopenharmony_ci goto done; 90748c2ecf20Sopenharmony_ci 90758c2ecf20Sopenharmony_ci vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), 90768c2ecf20Sopenharmony_ci GFP_KERNEL); 90778c2ecf20Sopenharmony_ci if (!vmas) 90788c2ecf20Sopenharmony_ci goto done; 90798c2ecf20Sopenharmony_ci 90808c2ecf20Sopenharmony_ci imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); 90818c2ecf20Sopenharmony_ci if (!imu) 90828c2ecf20Sopenharmony_ci goto done; 90838c2ecf20Sopenharmony_ci 90848c2ecf20Sopenharmony_ci ret = 0; 90858c2ecf20Sopenharmony_ci mmap_read_lock(current->mm); 90868c2ecf20Sopenharmony_ci pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, 90878c2ecf20Sopenharmony_ci pages, vmas); 90888c2ecf20Sopenharmony_ci if (pret == nr_pages) { 90898c2ecf20Sopenharmony_ci struct file *file = vmas[0]->vm_file; 90908c2ecf20Sopenharmony_ci 90918c2ecf20Sopenharmony_ci /* don't support file backed memory */ 90928c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 90938c2ecf20Sopenharmony_ci if (vmas[i]->vm_file != file) { 90948c2ecf20Sopenharmony_ci ret = -EINVAL; 90958c2ecf20Sopenharmony_ci break; 90968c2ecf20Sopenharmony_ci } 90978c2ecf20Sopenharmony_ci if (!file) 90988c2ecf20Sopenharmony_ci continue; 90998c2ecf20Sopenharmony_ci if (!vma_is_shmem(vmas[i]) && !is_file_hugepages(file)) { 91008c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 91018c2ecf20Sopenharmony_ci break; 91028c2ecf20Sopenharmony_ci } 91038c2ecf20Sopenharmony_ci } 91048c2ecf20Sopenharmony_ci } else { 91058c2ecf20Sopenharmony_ci ret = pret < 0 ? pret : -EFAULT; 91068c2ecf20Sopenharmony_ci } 91078c2ecf20Sopenharmony_ci mmap_read_unlock(current->mm); 91088c2ecf20Sopenharmony_ci if (ret) { 91098c2ecf20Sopenharmony_ci /* 91108c2ecf20Sopenharmony_ci * if we did partial map, or found file backed vmas, 91118c2ecf20Sopenharmony_ci * release any pages we did get 91128c2ecf20Sopenharmony_ci */ 91138c2ecf20Sopenharmony_ci if (pret > 0) 91148c2ecf20Sopenharmony_ci unpin_user_pages(pages, pret); 91158c2ecf20Sopenharmony_ci goto done; 91168c2ecf20Sopenharmony_ci } 91178c2ecf20Sopenharmony_ci 91188c2ecf20Sopenharmony_ci ret = io_buffer_account_pin(ctx, pages, pret, imu, last_hpage); 91198c2ecf20Sopenharmony_ci if (ret) { 91208c2ecf20Sopenharmony_ci unpin_user_pages(pages, pret); 91218c2ecf20Sopenharmony_ci goto done; 91228c2ecf20Sopenharmony_ci } 91238c2ecf20Sopenharmony_ci 91248c2ecf20Sopenharmony_ci off = ubuf & ~PAGE_MASK; 91258c2ecf20Sopenharmony_ci size = iov->iov_len; 91268c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 91278c2ecf20Sopenharmony_ci size_t vec_len; 91288c2ecf20Sopenharmony_ci 91298c2ecf20Sopenharmony_ci vec_len = min_t(size_t, size, PAGE_SIZE - off); 91308c2ecf20Sopenharmony_ci imu->bvec[i].bv_page = pages[i]; 91318c2ecf20Sopenharmony_ci imu->bvec[i].bv_len = vec_len; 91328c2ecf20Sopenharmony_ci imu->bvec[i].bv_offset = off; 91338c2ecf20Sopenharmony_ci off = 0; 91348c2ecf20Sopenharmony_ci size -= vec_len; 91358c2ecf20Sopenharmony_ci } 91368c2ecf20Sopenharmony_ci /* store original address for later verification */ 91378c2ecf20Sopenharmony_ci imu->ubuf = ubuf; 91388c2ecf20Sopenharmony_ci imu->ubuf_end = ubuf + iov->iov_len; 91398c2ecf20Sopenharmony_ci imu->nr_bvecs = nr_pages; 91408c2ecf20Sopenharmony_ci *pimu = imu; 91418c2ecf20Sopenharmony_ci ret = 0; 91428c2ecf20Sopenharmony_cidone: 91438c2ecf20Sopenharmony_ci if (ret) 91448c2ecf20Sopenharmony_ci kvfree(imu); 91458c2ecf20Sopenharmony_ci kvfree(pages); 91468c2ecf20Sopenharmony_ci kvfree(vmas); 91478c2ecf20Sopenharmony_ci return ret; 91488c2ecf20Sopenharmony_ci} 91498c2ecf20Sopenharmony_ci 91508c2ecf20Sopenharmony_cistatic int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) 91518c2ecf20Sopenharmony_ci{ 91528c2ecf20Sopenharmony_ci ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); 91538c2ecf20Sopenharmony_ci return ctx->user_bufs ? 0 : -ENOMEM; 91548c2ecf20Sopenharmony_ci} 91558c2ecf20Sopenharmony_ci 91568c2ecf20Sopenharmony_cistatic int io_buffer_validate(struct iovec *iov) 91578c2ecf20Sopenharmony_ci{ 91588c2ecf20Sopenharmony_ci unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); 91598c2ecf20Sopenharmony_ci 91608c2ecf20Sopenharmony_ci /* 91618c2ecf20Sopenharmony_ci * Don't impose further limits on the size and buffer 91628c2ecf20Sopenharmony_ci * constraints here, we'll -EINVAL later when IO is 91638c2ecf20Sopenharmony_ci * submitted if they are wrong. 91648c2ecf20Sopenharmony_ci */ 91658c2ecf20Sopenharmony_ci if (!iov->iov_base) 91668c2ecf20Sopenharmony_ci return iov->iov_len ? -EFAULT : 0; 91678c2ecf20Sopenharmony_ci if (!iov->iov_len) 91688c2ecf20Sopenharmony_ci return -EFAULT; 91698c2ecf20Sopenharmony_ci 91708c2ecf20Sopenharmony_ci /* arbitrary limit, but we need something */ 91718c2ecf20Sopenharmony_ci if (iov->iov_len > SZ_1G) 91728c2ecf20Sopenharmony_ci return -EFAULT; 91738c2ecf20Sopenharmony_ci 91748c2ecf20Sopenharmony_ci if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) 91758c2ecf20Sopenharmony_ci return -EOVERFLOW; 91768c2ecf20Sopenharmony_ci 91778c2ecf20Sopenharmony_ci return 0; 91788c2ecf20Sopenharmony_ci} 91798c2ecf20Sopenharmony_ci 91808c2ecf20Sopenharmony_cistatic int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, 91818c2ecf20Sopenharmony_ci unsigned int nr_args, u64 __user *tags) 91828c2ecf20Sopenharmony_ci{ 91838c2ecf20Sopenharmony_ci struct page *last_hpage = NULL; 91848c2ecf20Sopenharmony_ci struct io_rsrc_data *data; 91858c2ecf20Sopenharmony_ci int i, ret; 91868c2ecf20Sopenharmony_ci struct iovec iov; 91878c2ecf20Sopenharmony_ci 91888c2ecf20Sopenharmony_ci if (ctx->user_bufs) 91898c2ecf20Sopenharmony_ci return -EBUSY; 91908c2ecf20Sopenharmony_ci if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) 91918c2ecf20Sopenharmony_ci return -EINVAL; 91928c2ecf20Sopenharmony_ci ret = io_rsrc_node_switch_start(ctx); 91938c2ecf20Sopenharmony_ci if (ret) 91948c2ecf20Sopenharmony_ci return ret; 91958c2ecf20Sopenharmony_ci ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); 91968c2ecf20Sopenharmony_ci if (ret) 91978c2ecf20Sopenharmony_ci return ret; 91988c2ecf20Sopenharmony_ci ret = io_buffers_map_alloc(ctx, nr_args); 91998c2ecf20Sopenharmony_ci if (ret) { 92008c2ecf20Sopenharmony_ci io_rsrc_data_free(data); 92018c2ecf20Sopenharmony_ci return ret; 92028c2ecf20Sopenharmony_ci } 92038c2ecf20Sopenharmony_ci 92048c2ecf20Sopenharmony_ci for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { 92058c2ecf20Sopenharmony_ci ret = io_copy_iov(ctx, &iov, arg, i); 92068c2ecf20Sopenharmony_ci if (ret) 92078c2ecf20Sopenharmony_ci break; 92088c2ecf20Sopenharmony_ci ret = io_buffer_validate(&iov); 92098c2ecf20Sopenharmony_ci if (ret) 92108c2ecf20Sopenharmony_ci break; 92118c2ecf20Sopenharmony_ci if (!iov.iov_base && *io_get_tag_slot(data, i)) { 92128c2ecf20Sopenharmony_ci ret = -EINVAL; 92138c2ecf20Sopenharmony_ci break; 92148c2ecf20Sopenharmony_ci } 92158c2ecf20Sopenharmony_ci 92168c2ecf20Sopenharmony_ci ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], 92178c2ecf20Sopenharmony_ci &last_hpage); 92188c2ecf20Sopenharmony_ci if (ret) 92198c2ecf20Sopenharmony_ci break; 92208c2ecf20Sopenharmony_ci } 92218c2ecf20Sopenharmony_ci 92228c2ecf20Sopenharmony_ci WARN_ON_ONCE(ctx->buf_data); 92238c2ecf20Sopenharmony_ci 92248c2ecf20Sopenharmony_ci ctx->buf_data = data; 92258c2ecf20Sopenharmony_ci if (ret) 92268c2ecf20Sopenharmony_ci __io_sqe_buffers_unregister(ctx); 92278c2ecf20Sopenharmony_ci else 92288c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, NULL); 92298c2ecf20Sopenharmony_ci return ret; 92308c2ecf20Sopenharmony_ci} 92318c2ecf20Sopenharmony_ci 92328c2ecf20Sopenharmony_cistatic int __io_sqe_buffers_update(struct io_ring_ctx *ctx, 92338c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 *up, 92348c2ecf20Sopenharmony_ci unsigned int nr_args) 92358c2ecf20Sopenharmony_ci{ 92368c2ecf20Sopenharmony_ci u64 __user *tags = u64_to_user_ptr(up->tags); 92378c2ecf20Sopenharmony_ci struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); 92388c2ecf20Sopenharmony_ci struct page *last_hpage = NULL; 92398c2ecf20Sopenharmony_ci bool needs_switch = false; 92408c2ecf20Sopenharmony_ci __u32 done; 92418c2ecf20Sopenharmony_ci int i, err; 92428c2ecf20Sopenharmony_ci 92438c2ecf20Sopenharmony_ci if (!ctx->buf_data) 92448c2ecf20Sopenharmony_ci return -ENXIO; 92458c2ecf20Sopenharmony_ci if (up->offset + nr_args > ctx->nr_user_bufs) 92468c2ecf20Sopenharmony_ci return -EINVAL; 92478c2ecf20Sopenharmony_ci 92488c2ecf20Sopenharmony_ci for (done = 0; done < nr_args; done++) { 92498c2ecf20Sopenharmony_ci struct io_mapped_ubuf *imu; 92508c2ecf20Sopenharmony_ci int offset = up->offset + done; 92518c2ecf20Sopenharmony_ci u64 tag = 0; 92528c2ecf20Sopenharmony_ci 92538c2ecf20Sopenharmony_ci err = io_copy_iov(ctx, &iov, iovs, done); 92548c2ecf20Sopenharmony_ci if (err) 92558c2ecf20Sopenharmony_ci break; 92568c2ecf20Sopenharmony_ci if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { 92578c2ecf20Sopenharmony_ci err = -EFAULT; 92588c2ecf20Sopenharmony_ci break; 92598c2ecf20Sopenharmony_ci } 92608c2ecf20Sopenharmony_ci err = io_buffer_validate(&iov); 92618c2ecf20Sopenharmony_ci if (err) 92628c2ecf20Sopenharmony_ci break; 92638c2ecf20Sopenharmony_ci if (!iov.iov_base && tag) { 92648c2ecf20Sopenharmony_ci err = -EINVAL; 92658c2ecf20Sopenharmony_ci break; 92668c2ecf20Sopenharmony_ci } 92678c2ecf20Sopenharmony_ci err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); 92688c2ecf20Sopenharmony_ci if (err) 92698c2ecf20Sopenharmony_ci break; 92708c2ecf20Sopenharmony_ci 92718c2ecf20Sopenharmony_ci i = array_index_nospec(offset, ctx->nr_user_bufs); 92728c2ecf20Sopenharmony_ci if (ctx->user_bufs[i] != ctx->dummy_ubuf) { 92738c2ecf20Sopenharmony_ci err = io_queue_rsrc_removal(ctx->buf_data, i, 92748c2ecf20Sopenharmony_ci ctx->rsrc_node, ctx->user_bufs[i]); 92758c2ecf20Sopenharmony_ci if (unlikely(err)) { 92768c2ecf20Sopenharmony_ci io_buffer_unmap(ctx, &imu); 92778c2ecf20Sopenharmony_ci break; 92788c2ecf20Sopenharmony_ci } 92798c2ecf20Sopenharmony_ci ctx->user_bufs[i] = NULL; 92808c2ecf20Sopenharmony_ci needs_switch = true; 92818c2ecf20Sopenharmony_ci } 92828c2ecf20Sopenharmony_ci 92838c2ecf20Sopenharmony_ci ctx->user_bufs[i] = imu; 92848c2ecf20Sopenharmony_ci *io_get_tag_slot(ctx->buf_data, offset) = tag; 92858c2ecf20Sopenharmony_ci } 92868c2ecf20Sopenharmony_ci 92878c2ecf20Sopenharmony_ci if (needs_switch) 92888c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, ctx->buf_data); 92898c2ecf20Sopenharmony_ci return done ? done : err; 92908c2ecf20Sopenharmony_ci} 92918c2ecf20Sopenharmony_ci 92928c2ecf20Sopenharmony_cistatic int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) 92938c2ecf20Sopenharmony_ci{ 92948c2ecf20Sopenharmony_ci __s32 __user *fds = arg; 92958c2ecf20Sopenharmony_ci int fd; 92968c2ecf20Sopenharmony_ci 92978c2ecf20Sopenharmony_ci if (ctx->cq_ev_fd) 92988c2ecf20Sopenharmony_ci return -EBUSY; 92998c2ecf20Sopenharmony_ci 93008c2ecf20Sopenharmony_ci if (copy_from_user(&fd, fds, sizeof(*fds))) 93018c2ecf20Sopenharmony_ci return -EFAULT; 93028c2ecf20Sopenharmony_ci 93038c2ecf20Sopenharmony_ci ctx->cq_ev_fd = eventfd_ctx_fdget(fd); 93048c2ecf20Sopenharmony_ci if (IS_ERR(ctx->cq_ev_fd)) { 93058c2ecf20Sopenharmony_ci int ret = PTR_ERR(ctx->cq_ev_fd); 93068c2ecf20Sopenharmony_ci 93078c2ecf20Sopenharmony_ci ctx->cq_ev_fd = NULL; 93088c2ecf20Sopenharmony_ci return ret; 93098c2ecf20Sopenharmony_ci } 93108c2ecf20Sopenharmony_ci 93118c2ecf20Sopenharmony_ci return 0; 93128c2ecf20Sopenharmony_ci} 93138c2ecf20Sopenharmony_ci 93148c2ecf20Sopenharmony_cistatic int io_eventfd_unregister(struct io_ring_ctx *ctx) 93158c2ecf20Sopenharmony_ci{ 93168c2ecf20Sopenharmony_ci if (ctx->cq_ev_fd) { 93178c2ecf20Sopenharmony_ci eventfd_ctx_put(ctx->cq_ev_fd); 93188c2ecf20Sopenharmony_ci ctx->cq_ev_fd = NULL; 93198c2ecf20Sopenharmony_ci return 0; 93208c2ecf20Sopenharmony_ci } 93218c2ecf20Sopenharmony_ci 93228c2ecf20Sopenharmony_ci return -ENXIO; 93238c2ecf20Sopenharmony_ci} 93248c2ecf20Sopenharmony_ci 93258c2ecf20Sopenharmony_cistatic void io_destroy_buffers(struct io_ring_ctx *ctx) 93268c2ecf20Sopenharmony_ci{ 93278c2ecf20Sopenharmony_ci struct io_buffer *buf; 93288c2ecf20Sopenharmony_ci unsigned long index; 93298c2ecf20Sopenharmony_ci 93308c2ecf20Sopenharmony_ci xa_for_each(&ctx->io_buffers, index, buf) 93318c2ecf20Sopenharmony_ci __io_remove_buffers(ctx, buf, index, -1U); 93328c2ecf20Sopenharmony_ci} 93338c2ecf20Sopenharmony_ci 93348c2ecf20Sopenharmony_cistatic void io_req_cache_free(struct list_head *list) 93358c2ecf20Sopenharmony_ci{ 93368c2ecf20Sopenharmony_ci struct io_kiocb *req, *nxt; 93378c2ecf20Sopenharmony_ci 93388c2ecf20Sopenharmony_ci list_for_each_entry_safe(req, nxt, list, inflight_entry) { 93398c2ecf20Sopenharmony_ci list_del(&req->inflight_entry); 93408c2ecf20Sopenharmony_ci kmem_cache_free(req_cachep, req); 93418c2ecf20Sopenharmony_ci } 93428c2ecf20Sopenharmony_ci} 93438c2ecf20Sopenharmony_ci 93448c2ecf20Sopenharmony_cistatic void io_req_caches_free(struct io_ring_ctx *ctx) 93458c2ecf20Sopenharmony_ci{ 93468c2ecf20Sopenharmony_ci struct io_submit_state *state = &ctx->submit_state; 93478c2ecf20Sopenharmony_ci 93488c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 93498c2ecf20Sopenharmony_ci 93508c2ecf20Sopenharmony_ci if (state->free_reqs) { 93518c2ecf20Sopenharmony_ci kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs); 93528c2ecf20Sopenharmony_ci state->free_reqs = 0; 93538c2ecf20Sopenharmony_ci } 93548c2ecf20Sopenharmony_ci 93558c2ecf20Sopenharmony_ci io_flush_cached_locked_reqs(ctx, state); 93568c2ecf20Sopenharmony_ci io_req_cache_free(&state->free_list); 93578c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 93588c2ecf20Sopenharmony_ci} 93598c2ecf20Sopenharmony_ci 93608c2ecf20Sopenharmony_cistatic void io_wait_rsrc_data(struct io_rsrc_data *data) 93618c2ecf20Sopenharmony_ci{ 93628c2ecf20Sopenharmony_ci if (data && !atomic_dec_and_test(&data->refs)) 93638c2ecf20Sopenharmony_ci wait_for_completion(&data->done); 93648c2ecf20Sopenharmony_ci} 93658c2ecf20Sopenharmony_ci 93668c2ecf20Sopenharmony_cistatic void io_ring_ctx_free(struct io_ring_ctx *ctx) 93678c2ecf20Sopenharmony_ci{ 93688c2ecf20Sopenharmony_ci io_sq_thread_finish(ctx); 93698c2ecf20Sopenharmony_ci 93708c2ecf20Sopenharmony_ci /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ 93718c2ecf20Sopenharmony_ci io_wait_rsrc_data(ctx->buf_data); 93728c2ecf20Sopenharmony_ci io_wait_rsrc_data(ctx->file_data); 93738c2ecf20Sopenharmony_ci 93748c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 93758c2ecf20Sopenharmony_ci if (ctx->buf_data) 93768c2ecf20Sopenharmony_ci __io_sqe_buffers_unregister(ctx); 93778c2ecf20Sopenharmony_ci if (ctx->file_data) 93788c2ecf20Sopenharmony_ci __io_sqe_files_unregister(ctx); 93798c2ecf20Sopenharmony_ci if (ctx->rings) 93808c2ecf20Sopenharmony_ci __io_cqring_overflow_flush(ctx, true); 93818c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 93828c2ecf20Sopenharmony_ci io_eventfd_unregister(ctx); 93838c2ecf20Sopenharmony_ci io_destroy_buffers(ctx); 93848c2ecf20Sopenharmony_ci if (ctx->sq_creds) 93858c2ecf20Sopenharmony_ci put_cred(ctx->sq_creds); 93868c2ecf20Sopenharmony_ci 93878c2ecf20Sopenharmony_ci /* there are no registered resources left, nobody uses it */ 93888c2ecf20Sopenharmony_ci if (ctx->rsrc_node) 93898c2ecf20Sopenharmony_ci io_rsrc_node_destroy(ctx->rsrc_node); 93908c2ecf20Sopenharmony_ci if (ctx->rsrc_backup_node) 93918c2ecf20Sopenharmony_ci io_rsrc_node_destroy(ctx->rsrc_backup_node); 93928c2ecf20Sopenharmony_ci flush_delayed_work(&ctx->rsrc_put_work); 93938c2ecf20Sopenharmony_ci 93948c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); 93958c2ecf20Sopenharmony_ci WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); 93968c2ecf20Sopenharmony_ci 93978c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 93988c2ecf20Sopenharmony_ci if (ctx->ring_sock) { 93998c2ecf20Sopenharmony_ci ctx->ring_sock->file = NULL; /* so that iput() is called */ 94008c2ecf20Sopenharmony_ci sock_release(ctx->ring_sock); 94018c2ecf20Sopenharmony_ci } 94028c2ecf20Sopenharmony_ci#endif 94038c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); 94048c2ecf20Sopenharmony_ci 94058c2ecf20Sopenharmony_ci if (ctx->mm_account) { 94068c2ecf20Sopenharmony_ci mmdrop(ctx->mm_account); 94078c2ecf20Sopenharmony_ci ctx->mm_account = NULL; 94088c2ecf20Sopenharmony_ci } 94098c2ecf20Sopenharmony_ci 94108c2ecf20Sopenharmony_ci io_mem_free(ctx->rings); 94118c2ecf20Sopenharmony_ci io_mem_free(ctx->sq_sqes); 94128c2ecf20Sopenharmony_ci 94138c2ecf20Sopenharmony_ci percpu_ref_exit(&ctx->refs); 94148c2ecf20Sopenharmony_ci free_uid(ctx->user); 94158c2ecf20Sopenharmony_ci io_req_caches_free(ctx); 94168c2ecf20Sopenharmony_ci if (ctx->hash_map) 94178c2ecf20Sopenharmony_ci io_wq_put_hash(ctx->hash_map); 94188c2ecf20Sopenharmony_ci kfree(ctx->cancel_hash); 94198c2ecf20Sopenharmony_ci kfree(ctx->dummy_ubuf); 94208c2ecf20Sopenharmony_ci kfree(ctx); 94218c2ecf20Sopenharmony_ci} 94228c2ecf20Sopenharmony_ci 94238c2ecf20Sopenharmony_cistatic __poll_t io_uring_poll(struct file *file, poll_table *wait) 94248c2ecf20Sopenharmony_ci{ 94258c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = file->private_data; 94268c2ecf20Sopenharmony_ci __poll_t mask = 0; 94278c2ecf20Sopenharmony_ci 94288c2ecf20Sopenharmony_ci poll_wait(file, &ctx->poll_wait, wait); 94298c2ecf20Sopenharmony_ci /* 94308c2ecf20Sopenharmony_ci * synchronizes with barrier from wq_has_sleeper call in 94318c2ecf20Sopenharmony_ci * io_commit_cqring 94328c2ecf20Sopenharmony_ci */ 94338c2ecf20Sopenharmony_ci smp_rmb(); 94348c2ecf20Sopenharmony_ci if (!io_sqring_full(ctx)) 94358c2ecf20Sopenharmony_ci mask |= EPOLLOUT | EPOLLWRNORM; 94368c2ecf20Sopenharmony_ci 94378c2ecf20Sopenharmony_ci /* 94388c2ecf20Sopenharmony_ci * Don't flush cqring overflow list here, just do a simple check. 94398c2ecf20Sopenharmony_ci * Otherwise there could possible be ABBA deadlock: 94408c2ecf20Sopenharmony_ci * CPU0 CPU1 94418c2ecf20Sopenharmony_ci * ---- ---- 94428c2ecf20Sopenharmony_ci * lock(&ctx->uring_lock); 94438c2ecf20Sopenharmony_ci * lock(&ep->mtx); 94448c2ecf20Sopenharmony_ci * lock(&ctx->uring_lock); 94458c2ecf20Sopenharmony_ci * lock(&ep->mtx); 94468c2ecf20Sopenharmony_ci * 94478c2ecf20Sopenharmony_ci * Users may get EPOLLIN meanwhile seeing nothing in cqring, this 94488c2ecf20Sopenharmony_ci * pushs them to do the flush. 94498c2ecf20Sopenharmony_ci */ 94508c2ecf20Sopenharmony_ci if (io_cqring_events(ctx) || test_bit(0, &ctx->check_cq_overflow)) 94518c2ecf20Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 94528c2ecf20Sopenharmony_ci 94538c2ecf20Sopenharmony_ci return mask; 94548c2ecf20Sopenharmony_ci} 94558c2ecf20Sopenharmony_ci 94568c2ecf20Sopenharmony_cistatic int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) 94578c2ecf20Sopenharmony_ci{ 94588c2ecf20Sopenharmony_ci const struct cred *creds; 94598c2ecf20Sopenharmony_ci 94608c2ecf20Sopenharmony_ci creds = xa_erase(&ctx->personalities, id); 94618c2ecf20Sopenharmony_ci if (creds) { 94628c2ecf20Sopenharmony_ci put_cred(creds); 94638c2ecf20Sopenharmony_ci return 0; 94648c2ecf20Sopenharmony_ci } 94658c2ecf20Sopenharmony_ci 94668c2ecf20Sopenharmony_ci return -EINVAL; 94678c2ecf20Sopenharmony_ci} 94688c2ecf20Sopenharmony_ci 94698c2ecf20Sopenharmony_cistruct io_tctx_exit { 94708c2ecf20Sopenharmony_ci struct callback_head task_work; 94718c2ecf20Sopenharmony_ci struct completion completion; 94728c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 94738c2ecf20Sopenharmony_ci}; 94748c2ecf20Sopenharmony_ci 94758c2ecf20Sopenharmony_cistatic void io_tctx_exit_cb(struct callback_head *cb) 94768c2ecf20Sopenharmony_ci{ 94778c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 94788c2ecf20Sopenharmony_ci struct io_tctx_exit *work; 94798c2ecf20Sopenharmony_ci 94808c2ecf20Sopenharmony_ci work = container_of(cb, struct io_tctx_exit, task_work); 94818c2ecf20Sopenharmony_ci /* 94828c2ecf20Sopenharmony_ci * When @in_idle, we're in cancellation and it's racy to remove the 94838c2ecf20Sopenharmony_ci * node. It'll be removed by the end of cancellation, just ignore it. 94848c2ecf20Sopenharmony_ci * tctx can be NULL if the queueing of this task_work raced with 94858c2ecf20Sopenharmony_ci * work cancelation off the exec path. 94868c2ecf20Sopenharmony_ci */ 94878c2ecf20Sopenharmony_ci if (tctx && !atomic_read(&tctx->in_idle)) 94888c2ecf20Sopenharmony_ci io_uring_del_tctx_node((unsigned long)work->ctx); 94898c2ecf20Sopenharmony_ci complete(&work->completion); 94908c2ecf20Sopenharmony_ci} 94918c2ecf20Sopenharmony_ci 94928c2ecf20Sopenharmony_cistatic bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) 94938c2ecf20Sopenharmony_ci{ 94948c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(work, struct io_kiocb, work); 94958c2ecf20Sopenharmony_ci 94968c2ecf20Sopenharmony_ci return req->ctx == data; 94978c2ecf20Sopenharmony_ci} 94988c2ecf20Sopenharmony_ci 94998c2ecf20Sopenharmony_cistatic void io_ring_exit_work(struct work_struct *work) 95008c2ecf20Sopenharmony_ci{ 95018c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); 95028c2ecf20Sopenharmony_ci unsigned long timeout = jiffies + HZ * 60 * 5; 95038c2ecf20Sopenharmony_ci unsigned long interval = HZ / 20; 95048c2ecf20Sopenharmony_ci struct io_tctx_exit exit; 95058c2ecf20Sopenharmony_ci struct io_tctx_node *node; 95068c2ecf20Sopenharmony_ci int ret; 95078c2ecf20Sopenharmony_ci 95088c2ecf20Sopenharmony_ci /* 95098c2ecf20Sopenharmony_ci * If we're doing polled IO and end up having requests being 95108c2ecf20Sopenharmony_ci * submitted async (out-of-line), then completions can come in while 95118c2ecf20Sopenharmony_ci * we're waiting for refs to drop. We need to reap these manually, 95128c2ecf20Sopenharmony_ci * as nobody else will be looking for them. 95138c2ecf20Sopenharmony_ci */ 95148c2ecf20Sopenharmony_ci do { 95158c2ecf20Sopenharmony_ci io_uring_try_cancel_requests(ctx, NULL, true); 95168c2ecf20Sopenharmony_ci if (ctx->sq_data) { 95178c2ecf20Sopenharmony_ci struct io_sq_data *sqd = ctx->sq_data; 95188c2ecf20Sopenharmony_ci struct task_struct *tsk; 95198c2ecf20Sopenharmony_ci 95208c2ecf20Sopenharmony_ci io_sq_thread_park(sqd); 95218c2ecf20Sopenharmony_ci tsk = sqd->thread; 95228c2ecf20Sopenharmony_ci if (tsk && tsk->io_uring && tsk->io_uring->io_wq) 95238c2ecf20Sopenharmony_ci io_wq_cancel_cb(tsk->io_uring->io_wq, 95248c2ecf20Sopenharmony_ci io_cancel_ctx_cb, ctx, true); 95258c2ecf20Sopenharmony_ci io_sq_thread_unpark(sqd); 95268c2ecf20Sopenharmony_ci } 95278c2ecf20Sopenharmony_ci 95288c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(time_after(jiffies, timeout))) { 95298c2ecf20Sopenharmony_ci /* there is little hope left, don't run it too often */ 95308c2ecf20Sopenharmony_ci interval = HZ * 60; 95318c2ecf20Sopenharmony_ci } 95328c2ecf20Sopenharmony_ci /* 95338c2ecf20Sopenharmony_ci * This is really an uninterruptible wait, as it has to be 95348c2ecf20Sopenharmony_ci * complete. But it's also run from a kworker, which doesn't 95358c2ecf20Sopenharmony_ci * take signals, so it's fine to make it interruptible. This 95368c2ecf20Sopenharmony_ci * avoids scenarios where we knowingly can wait much longer 95378c2ecf20Sopenharmony_ci * on completions, for example if someone does a SIGSTOP on 95388c2ecf20Sopenharmony_ci * a task that needs to finish task_work to make this loop 95398c2ecf20Sopenharmony_ci * complete. That's a synthetic situation that should not 95408c2ecf20Sopenharmony_ci * cause a stuck task backtrace, and hence a potential panic 95418c2ecf20Sopenharmony_ci * on stuck tasks if that is enabled. 95428c2ecf20Sopenharmony_ci */ 95438c2ecf20Sopenharmony_ci } while (!wait_for_completion_interruptible_timeout(&ctx->ref_comp, interval)); 95448c2ecf20Sopenharmony_ci 95458c2ecf20Sopenharmony_ci init_completion(&exit.completion); 95468c2ecf20Sopenharmony_ci init_task_work(&exit.task_work, io_tctx_exit_cb); 95478c2ecf20Sopenharmony_ci exit.ctx = ctx; 95488c2ecf20Sopenharmony_ci /* 95498c2ecf20Sopenharmony_ci * Some may use context even when all refs and requests have been put, 95508c2ecf20Sopenharmony_ci * and they are free to do so while still holding uring_lock or 95518c2ecf20Sopenharmony_ci * completion_lock, see io_req_task_submit(). Apart from other work, 95528c2ecf20Sopenharmony_ci * this lock/unlock section also waits them to finish. 95538c2ecf20Sopenharmony_ci */ 95548c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 95558c2ecf20Sopenharmony_ci while (!list_empty(&ctx->tctx_list)) { 95568c2ecf20Sopenharmony_ci WARN_ON_ONCE(time_after(jiffies, timeout)); 95578c2ecf20Sopenharmony_ci 95588c2ecf20Sopenharmony_ci node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, 95598c2ecf20Sopenharmony_ci ctx_node); 95608c2ecf20Sopenharmony_ci /* don't spin on a single task if cancellation failed */ 95618c2ecf20Sopenharmony_ci list_rotate_left(&ctx->tctx_list); 95628c2ecf20Sopenharmony_ci ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); 95638c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(ret)) 95648c2ecf20Sopenharmony_ci continue; 95658c2ecf20Sopenharmony_ci wake_up_process(node->task); 95668c2ecf20Sopenharmony_ci 95678c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 95688c2ecf20Sopenharmony_ci /* 95698c2ecf20Sopenharmony_ci * See comment above for 95708c2ecf20Sopenharmony_ci * wait_for_completion_interruptible_timeout() on why this 95718c2ecf20Sopenharmony_ci * wait is marked as interruptible. 95728c2ecf20Sopenharmony_ci */ 95738c2ecf20Sopenharmony_ci wait_for_completion_interruptible(&exit.completion); 95748c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 95758c2ecf20Sopenharmony_ci } 95768c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 95778c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 95788c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 95798c2ecf20Sopenharmony_ci 95808c2ecf20Sopenharmony_ci io_ring_ctx_free(ctx); 95818c2ecf20Sopenharmony_ci} 95828c2ecf20Sopenharmony_ci 95838c2ecf20Sopenharmony_ci/* Returns true if we found and killed one or more timeouts */ 95848c2ecf20Sopenharmony_cistatic bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, 95858c2ecf20Sopenharmony_ci bool cancel_all) 95868c2ecf20Sopenharmony_ci{ 95878c2ecf20Sopenharmony_ci struct io_kiocb *req, *tmp; 95888c2ecf20Sopenharmony_ci int canceled = 0; 95898c2ecf20Sopenharmony_ci 95908c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 95918c2ecf20Sopenharmony_ci spin_lock_irq(&ctx->timeout_lock); 95928c2ecf20Sopenharmony_ci list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { 95938c2ecf20Sopenharmony_ci if (io_match_task(req, tsk, cancel_all)) { 95948c2ecf20Sopenharmony_ci io_kill_timeout(req, -ECANCELED); 95958c2ecf20Sopenharmony_ci canceled++; 95968c2ecf20Sopenharmony_ci } 95978c2ecf20Sopenharmony_ci } 95988c2ecf20Sopenharmony_ci spin_unlock_irq(&ctx->timeout_lock); 95998c2ecf20Sopenharmony_ci if (canceled != 0) 96008c2ecf20Sopenharmony_ci io_commit_cqring(ctx); 96018c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 96028c2ecf20Sopenharmony_ci if (canceled != 0) 96038c2ecf20Sopenharmony_ci io_cqring_ev_posted(ctx); 96048c2ecf20Sopenharmony_ci return canceled != 0; 96058c2ecf20Sopenharmony_ci} 96068c2ecf20Sopenharmony_ci 96078c2ecf20Sopenharmony_cistatic void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) 96088c2ecf20Sopenharmony_ci{ 96098c2ecf20Sopenharmony_ci unsigned long index; 96108c2ecf20Sopenharmony_ci struct creds *creds; 96118c2ecf20Sopenharmony_ci 96128c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 96138c2ecf20Sopenharmony_ci percpu_ref_kill(&ctx->refs); 96148c2ecf20Sopenharmony_ci if (ctx->rings) 96158c2ecf20Sopenharmony_ci __io_cqring_overflow_flush(ctx, true); 96168c2ecf20Sopenharmony_ci xa_for_each(&ctx->personalities, index, creds) 96178c2ecf20Sopenharmony_ci io_unregister_personality(ctx, index); 96188c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 96198c2ecf20Sopenharmony_ci 96208c2ecf20Sopenharmony_ci io_kill_timeouts(ctx, NULL, true); 96218c2ecf20Sopenharmony_ci io_poll_remove_all(ctx, NULL, true); 96228c2ecf20Sopenharmony_ci 96238c2ecf20Sopenharmony_ci /* if we failed setting up the ctx, we might not have any rings */ 96248c2ecf20Sopenharmony_ci io_iopoll_try_reap_events(ctx); 96258c2ecf20Sopenharmony_ci 96268c2ecf20Sopenharmony_ci /* drop cached put refs after potentially doing completions */ 96278c2ecf20Sopenharmony_ci if (current->io_uring) 96288c2ecf20Sopenharmony_ci io_uring_drop_tctx_refs(current); 96298c2ecf20Sopenharmony_ci 96308c2ecf20Sopenharmony_ci INIT_WORK(&ctx->exit_work, io_ring_exit_work); 96318c2ecf20Sopenharmony_ci /* 96328c2ecf20Sopenharmony_ci * Use system_unbound_wq to avoid spawning tons of event kworkers 96338c2ecf20Sopenharmony_ci * if we're exiting a ton of rings at the same time. It just adds 96348c2ecf20Sopenharmony_ci * noise and overhead, there's no discernable change in runtime 96358c2ecf20Sopenharmony_ci * over using system_wq. 96368c2ecf20Sopenharmony_ci */ 96378c2ecf20Sopenharmony_ci queue_work(system_unbound_wq, &ctx->exit_work); 96388c2ecf20Sopenharmony_ci} 96398c2ecf20Sopenharmony_ci 96408c2ecf20Sopenharmony_cistatic int io_uring_release(struct inode *inode, struct file *file) 96418c2ecf20Sopenharmony_ci{ 96428c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = file->private_data; 96438c2ecf20Sopenharmony_ci 96448c2ecf20Sopenharmony_ci file->private_data = NULL; 96458c2ecf20Sopenharmony_ci io_ring_ctx_wait_and_kill(ctx); 96468c2ecf20Sopenharmony_ci return 0; 96478c2ecf20Sopenharmony_ci} 96488c2ecf20Sopenharmony_ci 96498c2ecf20Sopenharmony_cistruct io_task_cancel { 96508c2ecf20Sopenharmony_ci struct task_struct *task; 96518c2ecf20Sopenharmony_ci bool all; 96528c2ecf20Sopenharmony_ci}; 96538c2ecf20Sopenharmony_ci 96548c2ecf20Sopenharmony_cistatic bool io_cancel_task_cb(struct io_wq_work *work, void *data) 96558c2ecf20Sopenharmony_ci{ 96568c2ecf20Sopenharmony_ci struct io_kiocb *req = container_of(work, struct io_kiocb, work); 96578c2ecf20Sopenharmony_ci struct io_task_cancel *cancel = data; 96588c2ecf20Sopenharmony_ci 96598c2ecf20Sopenharmony_ci return io_match_task_safe(req, cancel->task, cancel->all); 96608c2ecf20Sopenharmony_ci} 96618c2ecf20Sopenharmony_ci 96628c2ecf20Sopenharmony_cistatic bool io_cancel_defer_files(struct io_ring_ctx *ctx, 96638c2ecf20Sopenharmony_ci struct task_struct *task, bool cancel_all) 96648c2ecf20Sopenharmony_ci{ 96658c2ecf20Sopenharmony_ci struct io_defer_entry *de; 96668c2ecf20Sopenharmony_ci LIST_HEAD(list); 96678c2ecf20Sopenharmony_ci 96688c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 96698c2ecf20Sopenharmony_ci list_for_each_entry_reverse(de, &ctx->defer_list, list) { 96708c2ecf20Sopenharmony_ci if (io_match_task_safe(de->req, task, cancel_all)) { 96718c2ecf20Sopenharmony_ci list_cut_position(&list, &ctx->defer_list, &de->list); 96728c2ecf20Sopenharmony_ci break; 96738c2ecf20Sopenharmony_ci } 96748c2ecf20Sopenharmony_ci } 96758c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 96768c2ecf20Sopenharmony_ci if (list_empty(&list)) 96778c2ecf20Sopenharmony_ci return false; 96788c2ecf20Sopenharmony_ci 96798c2ecf20Sopenharmony_ci while (!list_empty(&list)) { 96808c2ecf20Sopenharmony_ci de = list_first_entry(&list, struct io_defer_entry, list); 96818c2ecf20Sopenharmony_ci list_del_init(&de->list); 96828c2ecf20Sopenharmony_ci io_req_complete_failed(de->req, -ECANCELED); 96838c2ecf20Sopenharmony_ci kfree(de); 96848c2ecf20Sopenharmony_ci } 96858c2ecf20Sopenharmony_ci return true; 96868c2ecf20Sopenharmony_ci} 96878c2ecf20Sopenharmony_ci 96888c2ecf20Sopenharmony_cistatic bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) 96898c2ecf20Sopenharmony_ci{ 96908c2ecf20Sopenharmony_ci struct io_tctx_node *node; 96918c2ecf20Sopenharmony_ci enum io_wq_cancel cret; 96928c2ecf20Sopenharmony_ci bool ret = false; 96938c2ecf20Sopenharmony_ci 96948c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 96958c2ecf20Sopenharmony_ci list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 96968c2ecf20Sopenharmony_ci struct io_uring_task *tctx = node->task->io_uring; 96978c2ecf20Sopenharmony_ci 96988c2ecf20Sopenharmony_ci /* 96998c2ecf20Sopenharmony_ci * io_wq will stay alive while we hold uring_lock, because it's 97008c2ecf20Sopenharmony_ci * killed after ctx nodes, which requires to take the lock. 97018c2ecf20Sopenharmony_ci */ 97028c2ecf20Sopenharmony_ci if (!tctx || !tctx->io_wq) 97038c2ecf20Sopenharmony_ci continue; 97048c2ecf20Sopenharmony_ci cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); 97058c2ecf20Sopenharmony_ci ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 97068c2ecf20Sopenharmony_ci } 97078c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 97088c2ecf20Sopenharmony_ci 97098c2ecf20Sopenharmony_ci return ret; 97108c2ecf20Sopenharmony_ci} 97118c2ecf20Sopenharmony_ci 97128c2ecf20Sopenharmony_cistatic void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, 97138c2ecf20Sopenharmony_ci struct task_struct *task, 97148c2ecf20Sopenharmony_ci bool cancel_all) 97158c2ecf20Sopenharmony_ci{ 97168c2ecf20Sopenharmony_ci struct io_task_cancel cancel = { .task = task, .all = cancel_all, }; 97178c2ecf20Sopenharmony_ci struct io_uring_task *tctx = task ? task->io_uring : NULL; 97188c2ecf20Sopenharmony_ci 97198c2ecf20Sopenharmony_ci while (1) { 97208c2ecf20Sopenharmony_ci enum io_wq_cancel cret; 97218c2ecf20Sopenharmony_ci bool ret = false; 97228c2ecf20Sopenharmony_ci 97238c2ecf20Sopenharmony_ci if (!task) { 97248c2ecf20Sopenharmony_ci ret |= io_uring_try_cancel_iowq(ctx); 97258c2ecf20Sopenharmony_ci } else if (tctx && tctx->io_wq) { 97268c2ecf20Sopenharmony_ci /* 97278c2ecf20Sopenharmony_ci * Cancels requests of all rings, not only @ctx, but 97288c2ecf20Sopenharmony_ci * it's fine as the task is in exit/exec. 97298c2ecf20Sopenharmony_ci */ 97308c2ecf20Sopenharmony_ci cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, 97318c2ecf20Sopenharmony_ci &cancel, true); 97328c2ecf20Sopenharmony_ci ret |= (cret != IO_WQ_CANCEL_NOTFOUND); 97338c2ecf20Sopenharmony_ci } 97348c2ecf20Sopenharmony_ci 97358c2ecf20Sopenharmony_ci /* SQPOLL thread does its own polling */ 97368c2ecf20Sopenharmony_ci if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || 97378c2ecf20Sopenharmony_ci (ctx->sq_data && ctx->sq_data->thread == current)) { 97388c2ecf20Sopenharmony_ci while (!list_empty_careful(&ctx->iopoll_list)) { 97398c2ecf20Sopenharmony_ci io_iopoll_try_reap_events(ctx); 97408c2ecf20Sopenharmony_ci ret = true; 97418c2ecf20Sopenharmony_ci cond_resched(); 97428c2ecf20Sopenharmony_ci } 97438c2ecf20Sopenharmony_ci } 97448c2ecf20Sopenharmony_ci 97458c2ecf20Sopenharmony_ci ret |= io_cancel_defer_files(ctx, task, cancel_all); 97468c2ecf20Sopenharmony_ci ret |= io_poll_remove_all(ctx, task, cancel_all); 97478c2ecf20Sopenharmony_ci ret |= io_kill_timeouts(ctx, task, cancel_all); 97488c2ecf20Sopenharmony_ci if (task) 97498c2ecf20Sopenharmony_ci ret |= io_run_task_work(); 97508c2ecf20Sopenharmony_ci if (!ret) 97518c2ecf20Sopenharmony_ci break; 97528c2ecf20Sopenharmony_ci cond_resched(); 97538c2ecf20Sopenharmony_ci } 97548c2ecf20Sopenharmony_ci} 97558c2ecf20Sopenharmony_ci 97568c2ecf20Sopenharmony_cistatic int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) 97578c2ecf20Sopenharmony_ci{ 97588c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 97598c2ecf20Sopenharmony_ci struct io_tctx_node *node; 97608c2ecf20Sopenharmony_ci int ret; 97618c2ecf20Sopenharmony_ci 97628c2ecf20Sopenharmony_ci if (unlikely(!tctx)) { 97638c2ecf20Sopenharmony_ci ret = io_uring_alloc_task_context(current, ctx); 97648c2ecf20Sopenharmony_ci if (unlikely(ret)) 97658c2ecf20Sopenharmony_ci return ret; 97668c2ecf20Sopenharmony_ci 97678c2ecf20Sopenharmony_ci tctx = current->io_uring; 97688c2ecf20Sopenharmony_ci if (ctx->iowq_limits_set) { 97698c2ecf20Sopenharmony_ci unsigned int limits[2] = { ctx->iowq_limits[0], 97708c2ecf20Sopenharmony_ci ctx->iowq_limits[1], }; 97718c2ecf20Sopenharmony_ci 97728c2ecf20Sopenharmony_ci ret = io_wq_max_workers(tctx->io_wq, limits); 97738c2ecf20Sopenharmony_ci if (ret) 97748c2ecf20Sopenharmony_ci return ret; 97758c2ecf20Sopenharmony_ci } 97768c2ecf20Sopenharmony_ci } 97778c2ecf20Sopenharmony_ci if (!xa_load(&tctx->xa, (unsigned long)ctx)) { 97788c2ecf20Sopenharmony_ci node = kmalloc(sizeof(*node), GFP_KERNEL); 97798c2ecf20Sopenharmony_ci if (!node) 97808c2ecf20Sopenharmony_ci return -ENOMEM; 97818c2ecf20Sopenharmony_ci node->ctx = ctx; 97828c2ecf20Sopenharmony_ci node->task = current; 97838c2ecf20Sopenharmony_ci 97848c2ecf20Sopenharmony_ci ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, 97858c2ecf20Sopenharmony_ci node, GFP_KERNEL)); 97868c2ecf20Sopenharmony_ci if (ret) { 97878c2ecf20Sopenharmony_ci kfree(node); 97888c2ecf20Sopenharmony_ci return ret; 97898c2ecf20Sopenharmony_ci } 97908c2ecf20Sopenharmony_ci 97918c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 97928c2ecf20Sopenharmony_ci list_add(&node->ctx_node, &ctx->tctx_list); 97938c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 97948c2ecf20Sopenharmony_ci } 97958c2ecf20Sopenharmony_ci tctx->last = ctx; 97968c2ecf20Sopenharmony_ci return 0; 97978c2ecf20Sopenharmony_ci} 97988c2ecf20Sopenharmony_ci 97998c2ecf20Sopenharmony_ci/* 98008c2ecf20Sopenharmony_ci * Note that this task has used io_uring. We use it for cancelation purposes. 98018c2ecf20Sopenharmony_ci */ 98028c2ecf20Sopenharmony_cistatic inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) 98038c2ecf20Sopenharmony_ci{ 98048c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 98058c2ecf20Sopenharmony_ci 98068c2ecf20Sopenharmony_ci if (likely(tctx && tctx->last == ctx)) 98078c2ecf20Sopenharmony_ci return 0; 98088c2ecf20Sopenharmony_ci return __io_uring_add_tctx_node(ctx); 98098c2ecf20Sopenharmony_ci} 98108c2ecf20Sopenharmony_ci 98118c2ecf20Sopenharmony_ci/* 98128c2ecf20Sopenharmony_ci * Remove this io_uring_file -> task mapping. 98138c2ecf20Sopenharmony_ci */ 98148c2ecf20Sopenharmony_cistatic void io_uring_del_tctx_node(unsigned long index) 98158c2ecf20Sopenharmony_ci{ 98168c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 98178c2ecf20Sopenharmony_ci struct io_tctx_node *node; 98188c2ecf20Sopenharmony_ci 98198c2ecf20Sopenharmony_ci if (!tctx) 98208c2ecf20Sopenharmony_ci return; 98218c2ecf20Sopenharmony_ci node = xa_erase(&tctx->xa, index); 98228c2ecf20Sopenharmony_ci if (!node) 98238c2ecf20Sopenharmony_ci return; 98248c2ecf20Sopenharmony_ci 98258c2ecf20Sopenharmony_ci WARN_ON_ONCE(current != node->task); 98268c2ecf20Sopenharmony_ci WARN_ON_ONCE(list_empty(&node->ctx_node)); 98278c2ecf20Sopenharmony_ci 98288c2ecf20Sopenharmony_ci mutex_lock(&node->ctx->uring_lock); 98298c2ecf20Sopenharmony_ci list_del(&node->ctx_node); 98308c2ecf20Sopenharmony_ci mutex_unlock(&node->ctx->uring_lock); 98318c2ecf20Sopenharmony_ci 98328c2ecf20Sopenharmony_ci if (tctx->last == node->ctx) 98338c2ecf20Sopenharmony_ci tctx->last = NULL; 98348c2ecf20Sopenharmony_ci kfree(node); 98358c2ecf20Sopenharmony_ci} 98368c2ecf20Sopenharmony_ci 98378c2ecf20Sopenharmony_cistatic void io_uring_clean_tctx(struct io_uring_task *tctx) 98388c2ecf20Sopenharmony_ci{ 98398c2ecf20Sopenharmony_ci struct io_wq *wq = tctx->io_wq; 98408c2ecf20Sopenharmony_ci struct io_tctx_node *node; 98418c2ecf20Sopenharmony_ci unsigned long index; 98428c2ecf20Sopenharmony_ci 98438c2ecf20Sopenharmony_ci xa_for_each(&tctx->xa, index, node) { 98448c2ecf20Sopenharmony_ci io_uring_del_tctx_node(index); 98458c2ecf20Sopenharmony_ci cond_resched(); 98468c2ecf20Sopenharmony_ci } 98478c2ecf20Sopenharmony_ci if (wq) { 98488c2ecf20Sopenharmony_ci /* 98498c2ecf20Sopenharmony_ci * Must be after io_uring_del_task_file() (removes nodes under 98508c2ecf20Sopenharmony_ci * uring_lock) to avoid race with io_uring_try_cancel_iowq(). 98518c2ecf20Sopenharmony_ci */ 98528c2ecf20Sopenharmony_ci io_wq_put_and_exit(wq); 98538c2ecf20Sopenharmony_ci tctx->io_wq = NULL; 98548c2ecf20Sopenharmony_ci } 98558c2ecf20Sopenharmony_ci} 98568c2ecf20Sopenharmony_ci 98578c2ecf20Sopenharmony_cistatic s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) 98588c2ecf20Sopenharmony_ci{ 98598c2ecf20Sopenharmony_ci if (tracked) 98608c2ecf20Sopenharmony_ci return atomic_read(&tctx->inflight_tracked); 98618c2ecf20Sopenharmony_ci return percpu_counter_sum(&tctx->inflight); 98628c2ecf20Sopenharmony_ci} 98638c2ecf20Sopenharmony_ci 98648c2ecf20Sopenharmony_ci/* 98658c2ecf20Sopenharmony_ci * Find any io_uring ctx that this task has registered or done IO on, and cancel 98668c2ecf20Sopenharmony_ci * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. 98678c2ecf20Sopenharmony_ci */ 98688c2ecf20Sopenharmony_cistatic void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd) 98698c2ecf20Sopenharmony_ci{ 98708c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 98718c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 98728c2ecf20Sopenharmony_ci s64 inflight; 98738c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 98748c2ecf20Sopenharmony_ci 98758c2ecf20Sopenharmony_ci WARN_ON_ONCE(sqd && sqd->thread != current); 98768c2ecf20Sopenharmony_ci 98778c2ecf20Sopenharmony_ci if (!current->io_uring) 98788c2ecf20Sopenharmony_ci return; 98798c2ecf20Sopenharmony_ci if (tctx->io_wq) 98808c2ecf20Sopenharmony_ci io_wq_exit_start(tctx->io_wq); 98818c2ecf20Sopenharmony_ci 98828c2ecf20Sopenharmony_ci atomic_inc(&tctx->in_idle); 98838c2ecf20Sopenharmony_ci do { 98848c2ecf20Sopenharmony_ci io_uring_drop_tctx_refs(current); 98858c2ecf20Sopenharmony_ci /* read completions before cancelations */ 98868c2ecf20Sopenharmony_ci inflight = tctx_inflight(tctx, !cancel_all); 98878c2ecf20Sopenharmony_ci if (!inflight) 98888c2ecf20Sopenharmony_ci break; 98898c2ecf20Sopenharmony_ci 98908c2ecf20Sopenharmony_ci if (!sqd) { 98918c2ecf20Sopenharmony_ci struct io_tctx_node *node; 98928c2ecf20Sopenharmony_ci unsigned long index; 98938c2ecf20Sopenharmony_ci 98948c2ecf20Sopenharmony_ci xa_for_each(&tctx->xa, index, node) { 98958c2ecf20Sopenharmony_ci /* sqpoll task will cancel all its requests */ 98968c2ecf20Sopenharmony_ci if (node->ctx->sq_data) 98978c2ecf20Sopenharmony_ci continue; 98988c2ecf20Sopenharmony_ci io_uring_try_cancel_requests(node->ctx, current, 98998c2ecf20Sopenharmony_ci cancel_all); 99008c2ecf20Sopenharmony_ci } 99018c2ecf20Sopenharmony_ci } else { 99028c2ecf20Sopenharmony_ci list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) 99038c2ecf20Sopenharmony_ci io_uring_try_cancel_requests(ctx, current, 99048c2ecf20Sopenharmony_ci cancel_all); 99058c2ecf20Sopenharmony_ci } 99068c2ecf20Sopenharmony_ci 99078c2ecf20Sopenharmony_ci prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); 99088c2ecf20Sopenharmony_ci io_run_task_work(); 99098c2ecf20Sopenharmony_ci io_uring_drop_tctx_refs(current); 99108c2ecf20Sopenharmony_ci 99118c2ecf20Sopenharmony_ci /* 99128c2ecf20Sopenharmony_ci * If we've seen completions, retry without waiting. This 99138c2ecf20Sopenharmony_ci * avoids a race where a completion comes in before we did 99148c2ecf20Sopenharmony_ci * prepare_to_wait(). 99158c2ecf20Sopenharmony_ci */ 99168c2ecf20Sopenharmony_ci if (inflight == tctx_inflight(tctx, !cancel_all)) 99178c2ecf20Sopenharmony_ci schedule(); 99188c2ecf20Sopenharmony_ci finish_wait(&tctx->wait, &wait); 99198c2ecf20Sopenharmony_ci } while (1); 99208c2ecf20Sopenharmony_ci 99218c2ecf20Sopenharmony_ci io_uring_clean_tctx(tctx); 99228c2ecf20Sopenharmony_ci if (cancel_all) { 99238c2ecf20Sopenharmony_ci /* 99248c2ecf20Sopenharmony_ci * We shouldn't run task_works after cancel, so just leave 99258c2ecf20Sopenharmony_ci * ->in_idle set for normal exit. 99268c2ecf20Sopenharmony_ci */ 99278c2ecf20Sopenharmony_ci atomic_dec(&tctx->in_idle); 99288c2ecf20Sopenharmony_ci /* for exec all current's requests should be gone, kill tctx */ 99298c2ecf20Sopenharmony_ci __io_uring_free(current); 99308c2ecf20Sopenharmony_ci } 99318c2ecf20Sopenharmony_ci} 99328c2ecf20Sopenharmony_ci 99338c2ecf20Sopenharmony_civoid __io_uring_cancel(bool cancel_all) 99348c2ecf20Sopenharmony_ci{ 99358c2ecf20Sopenharmony_ci io_uring_cancel_generic(cancel_all, NULL); 99368c2ecf20Sopenharmony_ci} 99378c2ecf20Sopenharmony_ci 99388c2ecf20Sopenharmony_cistatic void *io_uring_validate_mmap_request(struct file *file, 99398c2ecf20Sopenharmony_ci loff_t pgoff, size_t sz) 99408c2ecf20Sopenharmony_ci{ 99418c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = file->private_data; 99428c2ecf20Sopenharmony_ci loff_t offset = pgoff << PAGE_SHIFT; 99438c2ecf20Sopenharmony_ci struct page *page; 99448c2ecf20Sopenharmony_ci void *ptr; 99458c2ecf20Sopenharmony_ci 99468c2ecf20Sopenharmony_ci switch (offset) { 99478c2ecf20Sopenharmony_ci case IORING_OFF_SQ_RING: 99488c2ecf20Sopenharmony_ci case IORING_OFF_CQ_RING: 99498c2ecf20Sopenharmony_ci ptr = ctx->rings; 99508c2ecf20Sopenharmony_ci break; 99518c2ecf20Sopenharmony_ci case IORING_OFF_SQES: 99528c2ecf20Sopenharmony_ci ptr = ctx->sq_sqes; 99538c2ecf20Sopenharmony_ci break; 99548c2ecf20Sopenharmony_ci default: 99558c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 99568c2ecf20Sopenharmony_ci } 99578c2ecf20Sopenharmony_ci 99588c2ecf20Sopenharmony_ci page = virt_to_head_page(ptr); 99598c2ecf20Sopenharmony_ci if (sz > page_size(page)) 99608c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 99618c2ecf20Sopenharmony_ci 99628c2ecf20Sopenharmony_ci return ptr; 99638c2ecf20Sopenharmony_ci} 99648c2ecf20Sopenharmony_ci 99658c2ecf20Sopenharmony_ci#ifdef CONFIG_MMU 99668c2ecf20Sopenharmony_ci 99678c2ecf20Sopenharmony_cistatic int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 99688c2ecf20Sopenharmony_ci{ 99698c2ecf20Sopenharmony_ci size_t sz = vma->vm_end - vma->vm_start; 99708c2ecf20Sopenharmony_ci unsigned long pfn; 99718c2ecf20Sopenharmony_ci void *ptr; 99728c2ecf20Sopenharmony_ci 99738c2ecf20Sopenharmony_ci ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); 99748c2ecf20Sopenharmony_ci if (IS_ERR(ptr)) 99758c2ecf20Sopenharmony_ci return PTR_ERR(ptr); 99768c2ecf20Sopenharmony_ci 99778c2ecf20Sopenharmony_ci pfn = virt_to_phys(ptr) >> PAGE_SHIFT; 99788c2ecf20Sopenharmony_ci return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); 99798c2ecf20Sopenharmony_ci} 99808c2ecf20Sopenharmony_ci 99818c2ecf20Sopenharmony_ci#else /* !CONFIG_MMU */ 99828c2ecf20Sopenharmony_ci 99838c2ecf20Sopenharmony_cistatic int io_uring_mmap(struct file *file, struct vm_area_struct *vma) 99848c2ecf20Sopenharmony_ci{ 99858c2ecf20Sopenharmony_ci return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL; 99868c2ecf20Sopenharmony_ci} 99878c2ecf20Sopenharmony_ci 99888c2ecf20Sopenharmony_cistatic unsigned int io_uring_nommu_mmap_capabilities(struct file *file) 99898c2ecf20Sopenharmony_ci{ 99908c2ecf20Sopenharmony_ci return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; 99918c2ecf20Sopenharmony_ci} 99928c2ecf20Sopenharmony_ci 99938c2ecf20Sopenharmony_cistatic unsigned long io_uring_nommu_get_unmapped_area(struct file *file, 99948c2ecf20Sopenharmony_ci unsigned long addr, unsigned long len, 99958c2ecf20Sopenharmony_ci unsigned long pgoff, unsigned long flags) 99968c2ecf20Sopenharmony_ci{ 99978c2ecf20Sopenharmony_ci void *ptr; 99988c2ecf20Sopenharmony_ci 99998c2ecf20Sopenharmony_ci ptr = io_uring_validate_mmap_request(file, pgoff, len); 100008c2ecf20Sopenharmony_ci if (IS_ERR(ptr)) 100018c2ecf20Sopenharmony_ci return PTR_ERR(ptr); 100028c2ecf20Sopenharmony_ci 100038c2ecf20Sopenharmony_ci return (unsigned long) ptr; 100048c2ecf20Sopenharmony_ci} 100058c2ecf20Sopenharmony_ci 100068c2ecf20Sopenharmony_ci#endif /* !CONFIG_MMU */ 100078c2ecf20Sopenharmony_ci 100088c2ecf20Sopenharmony_cistatic int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) 100098c2ecf20Sopenharmony_ci{ 100108c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 100118c2ecf20Sopenharmony_ci 100128c2ecf20Sopenharmony_ci do { 100138c2ecf20Sopenharmony_ci if (!io_sqring_full(ctx)) 100148c2ecf20Sopenharmony_ci break; 100158c2ecf20Sopenharmony_ci prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); 100168c2ecf20Sopenharmony_ci 100178c2ecf20Sopenharmony_ci if (!io_sqring_full(ctx)) 100188c2ecf20Sopenharmony_ci break; 100198c2ecf20Sopenharmony_ci schedule(); 100208c2ecf20Sopenharmony_ci } while (!signal_pending(current)); 100218c2ecf20Sopenharmony_ci 100228c2ecf20Sopenharmony_ci finish_wait(&ctx->sqo_sq_wait, &wait); 100238c2ecf20Sopenharmony_ci return 0; 100248c2ecf20Sopenharmony_ci} 100258c2ecf20Sopenharmony_ci 100268c2ecf20Sopenharmony_cistatic int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, 100278c2ecf20Sopenharmony_ci struct __kernel_timespec __user **ts, 100288c2ecf20Sopenharmony_ci const sigset_t __user **sig) 100298c2ecf20Sopenharmony_ci{ 100308c2ecf20Sopenharmony_ci struct io_uring_getevents_arg arg; 100318c2ecf20Sopenharmony_ci 100328c2ecf20Sopenharmony_ci /* 100338c2ecf20Sopenharmony_ci * If EXT_ARG isn't set, then we have no timespec and the argp pointer 100348c2ecf20Sopenharmony_ci * is just a pointer to the sigset_t. 100358c2ecf20Sopenharmony_ci */ 100368c2ecf20Sopenharmony_ci if (!(flags & IORING_ENTER_EXT_ARG)) { 100378c2ecf20Sopenharmony_ci *sig = (const sigset_t __user *) argp; 100388c2ecf20Sopenharmony_ci *ts = NULL; 100398c2ecf20Sopenharmony_ci return 0; 100408c2ecf20Sopenharmony_ci } 100418c2ecf20Sopenharmony_ci 100428c2ecf20Sopenharmony_ci /* 100438c2ecf20Sopenharmony_ci * EXT_ARG is set - ensure we agree on the size of it and copy in our 100448c2ecf20Sopenharmony_ci * timespec and sigset_t pointers if good. 100458c2ecf20Sopenharmony_ci */ 100468c2ecf20Sopenharmony_ci if (*argsz != sizeof(arg)) 100478c2ecf20Sopenharmony_ci return -EINVAL; 100488c2ecf20Sopenharmony_ci if (copy_from_user(&arg, argp, sizeof(arg))) 100498c2ecf20Sopenharmony_ci return -EFAULT; 100508c2ecf20Sopenharmony_ci if (arg.pad) 100518c2ecf20Sopenharmony_ci return -EINVAL; 100528c2ecf20Sopenharmony_ci *sig = u64_to_user_ptr(arg.sigmask); 100538c2ecf20Sopenharmony_ci *argsz = arg.sigmask_sz; 100548c2ecf20Sopenharmony_ci *ts = u64_to_user_ptr(arg.ts); 100558c2ecf20Sopenharmony_ci return 0; 100568c2ecf20Sopenharmony_ci} 100578c2ecf20Sopenharmony_ci 100588c2ecf20Sopenharmony_ciSYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, 100598c2ecf20Sopenharmony_ci u32, min_complete, u32, flags, const void __user *, argp, 100608c2ecf20Sopenharmony_ci size_t, argsz) 100618c2ecf20Sopenharmony_ci{ 100628c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 100638c2ecf20Sopenharmony_ci int submitted = 0; 100648c2ecf20Sopenharmony_ci struct fd f; 100658c2ecf20Sopenharmony_ci long ret; 100668c2ecf20Sopenharmony_ci 100678c2ecf20Sopenharmony_ci io_run_task_work(); 100688c2ecf20Sopenharmony_ci 100698c2ecf20Sopenharmony_ci if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | 100708c2ecf20Sopenharmony_ci IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG))) 100718c2ecf20Sopenharmony_ci return -EINVAL; 100728c2ecf20Sopenharmony_ci 100738c2ecf20Sopenharmony_ci f = fdget(fd); 100748c2ecf20Sopenharmony_ci if (unlikely(!f.file)) 100758c2ecf20Sopenharmony_ci return -EBADF; 100768c2ecf20Sopenharmony_ci 100778c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 100788c2ecf20Sopenharmony_ci if (unlikely(f.file->f_op != &io_uring_fops)) 100798c2ecf20Sopenharmony_ci goto out_fput; 100808c2ecf20Sopenharmony_ci 100818c2ecf20Sopenharmony_ci ret = -ENXIO; 100828c2ecf20Sopenharmony_ci ctx = f.file->private_data; 100838c2ecf20Sopenharmony_ci if (unlikely(!percpu_ref_tryget(&ctx->refs))) 100848c2ecf20Sopenharmony_ci goto out_fput; 100858c2ecf20Sopenharmony_ci 100868c2ecf20Sopenharmony_ci ret = -EBADFD; 100878c2ecf20Sopenharmony_ci if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) 100888c2ecf20Sopenharmony_ci goto out; 100898c2ecf20Sopenharmony_ci 100908c2ecf20Sopenharmony_ci /* 100918c2ecf20Sopenharmony_ci * For SQ polling, the thread will do all submissions and completions. 100928c2ecf20Sopenharmony_ci * Just return the requested submit count, and wake the thread if 100938c2ecf20Sopenharmony_ci * we were asked to. 100948c2ecf20Sopenharmony_ci */ 100958c2ecf20Sopenharmony_ci ret = 0; 100968c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_SQPOLL) { 100978c2ecf20Sopenharmony_ci io_cqring_overflow_flush(ctx); 100988c2ecf20Sopenharmony_ci 100998c2ecf20Sopenharmony_ci if (unlikely(ctx->sq_data->thread == NULL)) { 101008c2ecf20Sopenharmony_ci ret = -EOWNERDEAD; 101018c2ecf20Sopenharmony_ci goto out; 101028c2ecf20Sopenharmony_ci } 101038c2ecf20Sopenharmony_ci if (flags & IORING_ENTER_SQ_WAKEUP) 101048c2ecf20Sopenharmony_ci wake_up(&ctx->sq_data->wait); 101058c2ecf20Sopenharmony_ci if (flags & IORING_ENTER_SQ_WAIT) { 101068c2ecf20Sopenharmony_ci ret = io_sqpoll_wait_sq(ctx); 101078c2ecf20Sopenharmony_ci if (ret) 101088c2ecf20Sopenharmony_ci goto out; 101098c2ecf20Sopenharmony_ci } 101108c2ecf20Sopenharmony_ci submitted = to_submit; 101118c2ecf20Sopenharmony_ci } else if (to_submit) { 101128c2ecf20Sopenharmony_ci ret = io_uring_add_tctx_node(ctx); 101138c2ecf20Sopenharmony_ci if (unlikely(ret)) 101148c2ecf20Sopenharmony_ci goto out; 101158c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 101168c2ecf20Sopenharmony_ci submitted = io_submit_sqes(ctx, to_submit); 101178c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 101188c2ecf20Sopenharmony_ci 101198c2ecf20Sopenharmony_ci if (submitted != to_submit) 101208c2ecf20Sopenharmony_ci goto out; 101218c2ecf20Sopenharmony_ci } 101228c2ecf20Sopenharmony_ci if (flags & IORING_ENTER_GETEVENTS) { 101238c2ecf20Sopenharmony_ci const sigset_t __user *sig; 101248c2ecf20Sopenharmony_ci struct __kernel_timespec __user *ts; 101258c2ecf20Sopenharmony_ci 101268c2ecf20Sopenharmony_ci ret = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); 101278c2ecf20Sopenharmony_ci if (unlikely(ret)) 101288c2ecf20Sopenharmony_ci goto out; 101298c2ecf20Sopenharmony_ci 101308c2ecf20Sopenharmony_ci min_complete = min(min_complete, ctx->cq_entries); 101318c2ecf20Sopenharmony_ci 101328c2ecf20Sopenharmony_ci /* 101338c2ecf20Sopenharmony_ci * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user 101348c2ecf20Sopenharmony_ci * space applications don't need to do io completion events 101358c2ecf20Sopenharmony_ci * polling again, they can rely on io_sq_thread to do polling 101368c2ecf20Sopenharmony_ci * work, which can reduce cpu usage and uring_lock contention. 101378c2ecf20Sopenharmony_ci */ 101388c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_IOPOLL && 101398c2ecf20Sopenharmony_ci !(ctx->flags & IORING_SETUP_SQPOLL)) { 101408c2ecf20Sopenharmony_ci ret = io_iopoll_check(ctx, min_complete); 101418c2ecf20Sopenharmony_ci } else { 101428c2ecf20Sopenharmony_ci ret = io_cqring_wait(ctx, min_complete, sig, argsz, ts); 101438c2ecf20Sopenharmony_ci } 101448c2ecf20Sopenharmony_ci } 101458c2ecf20Sopenharmony_ci 101468c2ecf20Sopenharmony_ciout: 101478c2ecf20Sopenharmony_ci percpu_ref_put(&ctx->refs); 101488c2ecf20Sopenharmony_ciout_fput: 101498c2ecf20Sopenharmony_ci fdput(f); 101508c2ecf20Sopenharmony_ci return submitted ? submitted : ret; 101518c2ecf20Sopenharmony_ci} 101528c2ecf20Sopenharmony_ci 101538c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS 101548c2ecf20Sopenharmony_cistatic int io_uring_show_cred(struct seq_file *m, unsigned int id, 101558c2ecf20Sopenharmony_ci const struct cred *cred) 101568c2ecf20Sopenharmony_ci{ 101578c2ecf20Sopenharmony_ci struct user_namespace *uns = seq_user_ns(m); 101588c2ecf20Sopenharmony_ci struct group_info *gi; 101598c2ecf20Sopenharmony_ci kernel_cap_t cap; 101608c2ecf20Sopenharmony_ci unsigned __capi; 101618c2ecf20Sopenharmony_ci int g; 101628c2ecf20Sopenharmony_ci 101638c2ecf20Sopenharmony_ci seq_printf(m, "%5d\n", id); 101648c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); 101658c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); 101668c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); 101678c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); 101688c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); 101698c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); 101708c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); 101718c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); 101728c2ecf20Sopenharmony_ci seq_puts(m, "\n\tGroups:\t"); 101738c2ecf20Sopenharmony_ci gi = cred->group_info; 101748c2ecf20Sopenharmony_ci for (g = 0; g < gi->ngroups; g++) { 101758c2ecf20Sopenharmony_ci seq_put_decimal_ull(m, g ? " " : "", 101768c2ecf20Sopenharmony_ci from_kgid_munged(uns, gi->gid[g])); 101778c2ecf20Sopenharmony_ci } 101788c2ecf20Sopenharmony_ci seq_puts(m, "\n\tCapEff:\t"); 101798c2ecf20Sopenharmony_ci cap = cred->cap_effective; 101808c2ecf20Sopenharmony_ci CAP_FOR_EACH_U32(__capi) 101818c2ecf20Sopenharmony_ci seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); 101828c2ecf20Sopenharmony_ci seq_putc(m, '\n'); 101838c2ecf20Sopenharmony_ci return 0; 101848c2ecf20Sopenharmony_ci} 101858c2ecf20Sopenharmony_ci 101868c2ecf20Sopenharmony_cistatic void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m) 101878c2ecf20Sopenharmony_ci{ 101888c2ecf20Sopenharmony_ci int sq_pid = -1, sq_cpu = -1; 101898c2ecf20Sopenharmony_ci bool has_lock; 101908c2ecf20Sopenharmony_ci int i; 101918c2ecf20Sopenharmony_ci 101928c2ecf20Sopenharmony_ci /* 101938c2ecf20Sopenharmony_ci * Avoid ABBA deadlock between the seq lock and the io_uring mutex, 101948c2ecf20Sopenharmony_ci * since fdinfo case grabs it in the opposite direction of normal use 101958c2ecf20Sopenharmony_ci * cases. If we fail to get the lock, we just don't iterate any 101968c2ecf20Sopenharmony_ci * structures that could be going away outside the io_uring mutex. 101978c2ecf20Sopenharmony_ci */ 101988c2ecf20Sopenharmony_ci has_lock = mutex_trylock(&ctx->uring_lock); 101998c2ecf20Sopenharmony_ci 102008c2ecf20Sopenharmony_ci if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { 102018c2ecf20Sopenharmony_ci struct io_sq_data *sq = ctx->sq_data; 102028c2ecf20Sopenharmony_ci 102038c2ecf20Sopenharmony_ci if (mutex_trylock(&sq->lock)) { 102048c2ecf20Sopenharmony_ci if (sq->thread) { 102058c2ecf20Sopenharmony_ci sq_pid = task_pid_nr(sq->thread); 102068c2ecf20Sopenharmony_ci sq_cpu = task_cpu(sq->thread); 102078c2ecf20Sopenharmony_ci } 102088c2ecf20Sopenharmony_ci mutex_unlock(&sq->lock); 102098c2ecf20Sopenharmony_ci } 102108c2ecf20Sopenharmony_ci } 102118c2ecf20Sopenharmony_ci 102128c2ecf20Sopenharmony_ci seq_printf(m, "SqThread:\t%d\n", sq_pid); 102138c2ecf20Sopenharmony_ci seq_printf(m, "SqThreadCpu:\t%d\n", sq_cpu); 102148c2ecf20Sopenharmony_ci seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); 102158c2ecf20Sopenharmony_ci for (i = 0; has_lock && i < ctx->nr_user_files; i++) { 102168c2ecf20Sopenharmony_ci struct file *f = io_file_from_index(ctx, i); 102178c2ecf20Sopenharmony_ci 102188c2ecf20Sopenharmony_ci if (f) 102198c2ecf20Sopenharmony_ci seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); 102208c2ecf20Sopenharmony_ci else 102218c2ecf20Sopenharmony_ci seq_printf(m, "%5u: <none>\n", i); 102228c2ecf20Sopenharmony_ci } 102238c2ecf20Sopenharmony_ci seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); 102248c2ecf20Sopenharmony_ci for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { 102258c2ecf20Sopenharmony_ci struct io_mapped_ubuf *buf = ctx->user_bufs[i]; 102268c2ecf20Sopenharmony_ci unsigned int len = buf->ubuf_end - buf->ubuf; 102278c2ecf20Sopenharmony_ci 102288c2ecf20Sopenharmony_ci seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); 102298c2ecf20Sopenharmony_ci } 102308c2ecf20Sopenharmony_ci if (has_lock && !xa_empty(&ctx->personalities)) { 102318c2ecf20Sopenharmony_ci unsigned long index; 102328c2ecf20Sopenharmony_ci const struct cred *cred; 102338c2ecf20Sopenharmony_ci 102348c2ecf20Sopenharmony_ci seq_printf(m, "Personalities:\n"); 102358c2ecf20Sopenharmony_ci xa_for_each(&ctx->personalities, index, cred) 102368c2ecf20Sopenharmony_ci io_uring_show_cred(m, index, cred); 102378c2ecf20Sopenharmony_ci } 102388c2ecf20Sopenharmony_ci seq_printf(m, "PollList:\n"); 102398c2ecf20Sopenharmony_ci spin_lock(&ctx->completion_lock); 102408c2ecf20Sopenharmony_ci for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { 102418c2ecf20Sopenharmony_ci struct hlist_head *list = &ctx->cancel_hash[i]; 102428c2ecf20Sopenharmony_ci struct io_kiocb *req; 102438c2ecf20Sopenharmony_ci 102448c2ecf20Sopenharmony_ci hlist_for_each_entry(req, list, hash_node) 102458c2ecf20Sopenharmony_ci seq_printf(m, " op=%d, task_works=%d\n", req->opcode, 102468c2ecf20Sopenharmony_ci req->task->task_works != NULL); 102478c2ecf20Sopenharmony_ci } 102488c2ecf20Sopenharmony_ci spin_unlock(&ctx->completion_lock); 102498c2ecf20Sopenharmony_ci if (has_lock) 102508c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 102518c2ecf20Sopenharmony_ci} 102528c2ecf20Sopenharmony_ci 102538c2ecf20Sopenharmony_cistatic void io_uring_show_fdinfo(struct seq_file *m, struct file *f) 102548c2ecf20Sopenharmony_ci{ 102558c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx = f->private_data; 102568c2ecf20Sopenharmony_ci 102578c2ecf20Sopenharmony_ci if (percpu_ref_tryget(&ctx->refs)) { 102588c2ecf20Sopenharmony_ci __io_uring_show_fdinfo(ctx, m); 102598c2ecf20Sopenharmony_ci percpu_ref_put(&ctx->refs); 102608c2ecf20Sopenharmony_ci } 102618c2ecf20Sopenharmony_ci} 102628c2ecf20Sopenharmony_ci#endif 102638c2ecf20Sopenharmony_ci 102648c2ecf20Sopenharmony_cistatic const struct file_operations io_uring_fops = { 102658c2ecf20Sopenharmony_ci .release = io_uring_release, 102668c2ecf20Sopenharmony_ci .mmap = io_uring_mmap, 102678c2ecf20Sopenharmony_ci#ifndef CONFIG_MMU 102688c2ecf20Sopenharmony_ci .get_unmapped_area = io_uring_nommu_get_unmapped_area, 102698c2ecf20Sopenharmony_ci .mmap_capabilities = io_uring_nommu_mmap_capabilities, 102708c2ecf20Sopenharmony_ci#endif 102718c2ecf20Sopenharmony_ci .poll = io_uring_poll, 102728c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS 102738c2ecf20Sopenharmony_ci .show_fdinfo = io_uring_show_fdinfo, 102748c2ecf20Sopenharmony_ci#endif 102758c2ecf20Sopenharmony_ci}; 102768c2ecf20Sopenharmony_ci 102778c2ecf20Sopenharmony_cistatic int io_allocate_scq_urings(struct io_ring_ctx *ctx, 102788c2ecf20Sopenharmony_ci struct io_uring_params *p) 102798c2ecf20Sopenharmony_ci{ 102808c2ecf20Sopenharmony_ci struct io_rings *rings; 102818c2ecf20Sopenharmony_ci size_t size, sq_array_offset; 102828c2ecf20Sopenharmony_ci 102838c2ecf20Sopenharmony_ci /* make sure these are sane, as we already accounted them */ 102848c2ecf20Sopenharmony_ci ctx->sq_entries = p->sq_entries; 102858c2ecf20Sopenharmony_ci ctx->cq_entries = p->cq_entries; 102868c2ecf20Sopenharmony_ci 102878c2ecf20Sopenharmony_ci size = rings_size(p->sq_entries, p->cq_entries, &sq_array_offset); 102888c2ecf20Sopenharmony_ci if (size == SIZE_MAX) 102898c2ecf20Sopenharmony_ci return -EOVERFLOW; 102908c2ecf20Sopenharmony_ci 102918c2ecf20Sopenharmony_ci rings = io_mem_alloc(size); 102928c2ecf20Sopenharmony_ci if (!rings) 102938c2ecf20Sopenharmony_ci return -ENOMEM; 102948c2ecf20Sopenharmony_ci 102958c2ecf20Sopenharmony_ci ctx->rings = rings; 102968c2ecf20Sopenharmony_ci ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); 102978c2ecf20Sopenharmony_ci rings->sq_ring_mask = p->sq_entries - 1; 102988c2ecf20Sopenharmony_ci rings->cq_ring_mask = p->cq_entries - 1; 102998c2ecf20Sopenharmony_ci rings->sq_ring_entries = p->sq_entries; 103008c2ecf20Sopenharmony_ci rings->cq_ring_entries = p->cq_entries; 103018c2ecf20Sopenharmony_ci 103028c2ecf20Sopenharmony_ci size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); 103038c2ecf20Sopenharmony_ci if (size == SIZE_MAX) { 103048c2ecf20Sopenharmony_ci io_mem_free(ctx->rings); 103058c2ecf20Sopenharmony_ci ctx->rings = NULL; 103068c2ecf20Sopenharmony_ci return -EOVERFLOW; 103078c2ecf20Sopenharmony_ci } 103088c2ecf20Sopenharmony_ci 103098c2ecf20Sopenharmony_ci ctx->sq_sqes = io_mem_alloc(size); 103108c2ecf20Sopenharmony_ci if (!ctx->sq_sqes) { 103118c2ecf20Sopenharmony_ci io_mem_free(ctx->rings); 103128c2ecf20Sopenharmony_ci ctx->rings = NULL; 103138c2ecf20Sopenharmony_ci return -ENOMEM; 103148c2ecf20Sopenharmony_ci } 103158c2ecf20Sopenharmony_ci 103168c2ecf20Sopenharmony_ci return 0; 103178c2ecf20Sopenharmony_ci} 103188c2ecf20Sopenharmony_ci 103198c2ecf20Sopenharmony_cistatic int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) 103208c2ecf20Sopenharmony_ci{ 103218c2ecf20Sopenharmony_ci int ret, fd; 103228c2ecf20Sopenharmony_ci 103238c2ecf20Sopenharmony_ci fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); 103248c2ecf20Sopenharmony_ci if (fd < 0) 103258c2ecf20Sopenharmony_ci return fd; 103268c2ecf20Sopenharmony_ci 103278c2ecf20Sopenharmony_ci ret = io_uring_add_tctx_node(ctx); 103288c2ecf20Sopenharmony_ci if (ret) { 103298c2ecf20Sopenharmony_ci put_unused_fd(fd); 103308c2ecf20Sopenharmony_ci return ret; 103318c2ecf20Sopenharmony_ci } 103328c2ecf20Sopenharmony_ci fd_install(fd, file); 103338c2ecf20Sopenharmony_ci return fd; 103348c2ecf20Sopenharmony_ci} 103358c2ecf20Sopenharmony_ci 103368c2ecf20Sopenharmony_ci/* 103378c2ecf20Sopenharmony_ci * Allocate an anonymous fd, this is what constitutes the application 103388c2ecf20Sopenharmony_ci * visible backing of an io_uring instance. The application mmaps this 103398c2ecf20Sopenharmony_ci * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, 103408c2ecf20Sopenharmony_ci * we have to tie this fd to a socket for file garbage collection purposes. 103418c2ecf20Sopenharmony_ci */ 103428c2ecf20Sopenharmony_cistatic struct file *io_uring_get_file(struct io_ring_ctx *ctx) 103438c2ecf20Sopenharmony_ci{ 103448c2ecf20Sopenharmony_ci struct file *file; 103458c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 103468c2ecf20Sopenharmony_ci int ret; 103478c2ecf20Sopenharmony_ci 103488c2ecf20Sopenharmony_ci ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, 103498c2ecf20Sopenharmony_ci &ctx->ring_sock); 103508c2ecf20Sopenharmony_ci if (ret) 103518c2ecf20Sopenharmony_ci return ERR_PTR(ret); 103528c2ecf20Sopenharmony_ci#endif 103538c2ecf20Sopenharmony_ci 103548c2ecf20Sopenharmony_ci file = anon_inode_getfile("[io_uring]", &io_uring_fops, ctx, 103558c2ecf20Sopenharmony_ci O_RDWR | O_CLOEXEC); 103568c2ecf20Sopenharmony_ci#if defined(CONFIG_UNIX) 103578c2ecf20Sopenharmony_ci if (IS_ERR(file)) { 103588c2ecf20Sopenharmony_ci sock_release(ctx->ring_sock); 103598c2ecf20Sopenharmony_ci ctx->ring_sock = NULL; 103608c2ecf20Sopenharmony_ci } else { 103618c2ecf20Sopenharmony_ci ctx->ring_sock->file = file; 103628c2ecf20Sopenharmony_ci } 103638c2ecf20Sopenharmony_ci#endif 103648c2ecf20Sopenharmony_ci return file; 103658c2ecf20Sopenharmony_ci} 103668c2ecf20Sopenharmony_ci 103678c2ecf20Sopenharmony_cistatic int io_uring_create(unsigned entries, struct io_uring_params *p, 103688c2ecf20Sopenharmony_ci struct io_uring_params __user *params) 103698c2ecf20Sopenharmony_ci{ 103708c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 103718c2ecf20Sopenharmony_ci struct file *file; 103728c2ecf20Sopenharmony_ci int ret; 103738c2ecf20Sopenharmony_ci 103748c2ecf20Sopenharmony_ci if (!entries) 103758c2ecf20Sopenharmony_ci return -EINVAL; 103768c2ecf20Sopenharmony_ci if (entries > IORING_MAX_ENTRIES) { 103778c2ecf20Sopenharmony_ci if (!(p->flags & IORING_SETUP_CLAMP)) 103788c2ecf20Sopenharmony_ci return -EINVAL; 103798c2ecf20Sopenharmony_ci entries = IORING_MAX_ENTRIES; 103808c2ecf20Sopenharmony_ci } 103818c2ecf20Sopenharmony_ci 103828c2ecf20Sopenharmony_ci /* 103838c2ecf20Sopenharmony_ci * Use twice as many entries for the CQ ring. It's possible for the 103848c2ecf20Sopenharmony_ci * application to drive a higher depth than the size of the SQ ring, 103858c2ecf20Sopenharmony_ci * since the sqes are only used at submission time. This allows for 103868c2ecf20Sopenharmony_ci * some flexibility in overcommitting a bit. If the application has 103878c2ecf20Sopenharmony_ci * set IORING_SETUP_CQSIZE, it will have passed in the desired number 103888c2ecf20Sopenharmony_ci * of CQ ring entries manually. 103898c2ecf20Sopenharmony_ci */ 103908c2ecf20Sopenharmony_ci p->sq_entries = roundup_pow_of_two(entries); 103918c2ecf20Sopenharmony_ci if (p->flags & IORING_SETUP_CQSIZE) { 103928c2ecf20Sopenharmony_ci /* 103938c2ecf20Sopenharmony_ci * If IORING_SETUP_CQSIZE is set, we do the same roundup 103948c2ecf20Sopenharmony_ci * to a power-of-two, if it isn't already. We do NOT impose 103958c2ecf20Sopenharmony_ci * any cq vs sq ring sizing. 103968c2ecf20Sopenharmony_ci */ 103978c2ecf20Sopenharmony_ci if (!p->cq_entries) 103988c2ecf20Sopenharmony_ci return -EINVAL; 103998c2ecf20Sopenharmony_ci if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { 104008c2ecf20Sopenharmony_ci if (!(p->flags & IORING_SETUP_CLAMP)) 104018c2ecf20Sopenharmony_ci return -EINVAL; 104028c2ecf20Sopenharmony_ci p->cq_entries = IORING_MAX_CQ_ENTRIES; 104038c2ecf20Sopenharmony_ci } 104048c2ecf20Sopenharmony_ci p->cq_entries = roundup_pow_of_two(p->cq_entries); 104058c2ecf20Sopenharmony_ci if (p->cq_entries < p->sq_entries) 104068c2ecf20Sopenharmony_ci return -EINVAL; 104078c2ecf20Sopenharmony_ci } else { 104088c2ecf20Sopenharmony_ci p->cq_entries = 2 * p->sq_entries; 104098c2ecf20Sopenharmony_ci } 104108c2ecf20Sopenharmony_ci 104118c2ecf20Sopenharmony_ci ctx = io_ring_ctx_alloc(p); 104128c2ecf20Sopenharmony_ci if (!ctx) 104138c2ecf20Sopenharmony_ci return -ENOMEM; 104148c2ecf20Sopenharmony_ci ctx->compat = in_compat_syscall(); 104158c2ecf20Sopenharmony_ci if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK)) 104168c2ecf20Sopenharmony_ci ctx->user = get_uid(current_user()); 104178c2ecf20Sopenharmony_ci 104188c2ecf20Sopenharmony_ci /* 104198c2ecf20Sopenharmony_ci * This is just grabbed for accounting purposes. When a process exits, 104208c2ecf20Sopenharmony_ci * the mm is exited and dropped before the files, hence we need to hang 104218c2ecf20Sopenharmony_ci * on to this mm purely for the purposes of being able to unaccount 104228c2ecf20Sopenharmony_ci * memory (locked/pinned vm). It's not used for anything else. 104238c2ecf20Sopenharmony_ci */ 104248c2ecf20Sopenharmony_ci mmgrab(current->mm); 104258c2ecf20Sopenharmony_ci ctx->mm_account = current->mm; 104268c2ecf20Sopenharmony_ci 104278c2ecf20Sopenharmony_ci ret = io_allocate_scq_urings(ctx, p); 104288c2ecf20Sopenharmony_ci if (ret) 104298c2ecf20Sopenharmony_ci goto err; 104308c2ecf20Sopenharmony_ci 104318c2ecf20Sopenharmony_ci ret = io_sq_offload_create(ctx, p); 104328c2ecf20Sopenharmony_ci if (ret) 104338c2ecf20Sopenharmony_ci goto err; 104348c2ecf20Sopenharmony_ci /* always set a rsrc node */ 104358c2ecf20Sopenharmony_ci ret = io_rsrc_node_switch_start(ctx); 104368c2ecf20Sopenharmony_ci if (ret) 104378c2ecf20Sopenharmony_ci goto err; 104388c2ecf20Sopenharmony_ci io_rsrc_node_switch(ctx, NULL); 104398c2ecf20Sopenharmony_ci 104408c2ecf20Sopenharmony_ci memset(&p->sq_off, 0, sizeof(p->sq_off)); 104418c2ecf20Sopenharmony_ci p->sq_off.head = offsetof(struct io_rings, sq.head); 104428c2ecf20Sopenharmony_ci p->sq_off.tail = offsetof(struct io_rings, sq.tail); 104438c2ecf20Sopenharmony_ci p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); 104448c2ecf20Sopenharmony_ci p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); 104458c2ecf20Sopenharmony_ci p->sq_off.flags = offsetof(struct io_rings, sq_flags); 104468c2ecf20Sopenharmony_ci p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); 104478c2ecf20Sopenharmony_ci p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; 104488c2ecf20Sopenharmony_ci 104498c2ecf20Sopenharmony_ci memset(&p->cq_off, 0, sizeof(p->cq_off)); 104508c2ecf20Sopenharmony_ci p->cq_off.head = offsetof(struct io_rings, cq.head); 104518c2ecf20Sopenharmony_ci p->cq_off.tail = offsetof(struct io_rings, cq.tail); 104528c2ecf20Sopenharmony_ci p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); 104538c2ecf20Sopenharmony_ci p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); 104548c2ecf20Sopenharmony_ci p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); 104558c2ecf20Sopenharmony_ci p->cq_off.cqes = offsetof(struct io_rings, cqes); 104568c2ecf20Sopenharmony_ci p->cq_off.flags = offsetof(struct io_rings, cq_flags); 104578c2ecf20Sopenharmony_ci 104588c2ecf20Sopenharmony_ci p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | 104598c2ecf20Sopenharmony_ci IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | 104608c2ecf20Sopenharmony_ci IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | 104618c2ecf20Sopenharmony_ci IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | 104628c2ecf20Sopenharmony_ci IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | 104638c2ecf20Sopenharmony_ci IORING_FEAT_RSRC_TAGS; 104648c2ecf20Sopenharmony_ci 104658c2ecf20Sopenharmony_ci if (copy_to_user(params, p, sizeof(*p))) { 104668c2ecf20Sopenharmony_ci ret = -EFAULT; 104678c2ecf20Sopenharmony_ci goto err; 104688c2ecf20Sopenharmony_ci } 104698c2ecf20Sopenharmony_ci 104708c2ecf20Sopenharmony_ci file = io_uring_get_file(ctx); 104718c2ecf20Sopenharmony_ci if (IS_ERR(file)) { 104728c2ecf20Sopenharmony_ci ret = PTR_ERR(file); 104738c2ecf20Sopenharmony_ci goto err; 104748c2ecf20Sopenharmony_ci } 104758c2ecf20Sopenharmony_ci 104768c2ecf20Sopenharmony_ci /* 104778c2ecf20Sopenharmony_ci * Install ring fd as the very last thing, so we don't risk someone 104788c2ecf20Sopenharmony_ci * having closed it before we finish setup 104798c2ecf20Sopenharmony_ci */ 104808c2ecf20Sopenharmony_ci ret = io_uring_install_fd(ctx, file); 104818c2ecf20Sopenharmony_ci if (ret < 0) { 104828c2ecf20Sopenharmony_ci /* fput will clean it up */ 104838c2ecf20Sopenharmony_ci fput(file); 104848c2ecf20Sopenharmony_ci return ret; 104858c2ecf20Sopenharmony_ci } 104868c2ecf20Sopenharmony_ci 104878c2ecf20Sopenharmony_ci trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); 104888c2ecf20Sopenharmony_ci return ret; 104898c2ecf20Sopenharmony_cierr: 104908c2ecf20Sopenharmony_ci io_ring_ctx_wait_and_kill(ctx); 104918c2ecf20Sopenharmony_ci return ret; 104928c2ecf20Sopenharmony_ci} 104938c2ecf20Sopenharmony_ci 104948c2ecf20Sopenharmony_ci/* 104958c2ecf20Sopenharmony_ci * Sets up an aio uring context, and returns the fd. Applications asks for a 104968c2ecf20Sopenharmony_ci * ring size, we return the actual sq/cq ring sizes (among other things) in the 104978c2ecf20Sopenharmony_ci * params structure passed in. 104988c2ecf20Sopenharmony_ci */ 104998c2ecf20Sopenharmony_cistatic long io_uring_setup(u32 entries, struct io_uring_params __user *params) 105008c2ecf20Sopenharmony_ci{ 105018c2ecf20Sopenharmony_ci struct io_uring_params p; 105028c2ecf20Sopenharmony_ci int i; 105038c2ecf20Sopenharmony_ci 105048c2ecf20Sopenharmony_ci if (copy_from_user(&p, params, sizeof(p))) 105058c2ecf20Sopenharmony_ci return -EFAULT; 105068c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(p.resv); i++) { 105078c2ecf20Sopenharmony_ci if (p.resv[i]) 105088c2ecf20Sopenharmony_ci return -EINVAL; 105098c2ecf20Sopenharmony_ci } 105108c2ecf20Sopenharmony_ci 105118c2ecf20Sopenharmony_ci if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | 105128c2ecf20Sopenharmony_ci IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | 105138c2ecf20Sopenharmony_ci IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | 105148c2ecf20Sopenharmony_ci IORING_SETUP_R_DISABLED)) 105158c2ecf20Sopenharmony_ci return -EINVAL; 105168c2ecf20Sopenharmony_ci 105178c2ecf20Sopenharmony_ci return io_uring_create(entries, &p, params); 105188c2ecf20Sopenharmony_ci} 105198c2ecf20Sopenharmony_ci 105208c2ecf20Sopenharmony_ciSYSCALL_DEFINE2(io_uring_setup, u32, entries, 105218c2ecf20Sopenharmony_ci struct io_uring_params __user *, params) 105228c2ecf20Sopenharmony_ci{ 105238c2ecf20Sopenharmony_ci return io_uring_setup(entries, params); 105248c2ecf20Sopenharmony_ci} 105258c2ecf20Sopenharmony_ci 105268c2ecf20Sopenharmony_cistatic int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) 105278c2ecf20Sopenharmony_ci{ 105288c2ecf20Sopenharmony_ci struct io_uring_probe *p; 105298c2ecf20Sopenharmony_ci size_t size; 105308c2ecf20Sopenharmony_ci int i, ret; 105318c2ecf20Sopenharmony_ci 105328c2ecf20Sopenharmony_ci size = struct_size(p, ops, nr_args); 105338c2ecf20Sopenharmony_ci if (size == SIZE_MAX) 105348c2ecf20Sopenharmony_ci return -EOVERFLOW; 105358c2ecf20Sopenharmony_ci p = kzalloc(size, GFP_KERNEL); 105368c2ecf20Sopenharmony_ci if (!p) 105378c2ecf20Sopenharmony_ci return -ENOMEM; 105388c2ecf20Sopenharmony_ci 105398c2ecf20Sopenharmony_ci ret = -EFAULT; 105408c2ecf20Sopenharmony_ci if (copy_from_user(p, arg, size)) 105418c2ecf20Sopenharmony_ci goto out; 105428c2ecf20Sopenharmony_ci ret = -EINVAL; 105438c2ecf20Sopenharmony_ci if (memchr_inv(p, 0, size)) 105448c2ecf20Sopenharmony_ci goto out; 105458c2ecf20Sopenharmony_ci 105468c2ecf20Sopenharmony_ci p->last_op = IORING_OP_LAST - 1; 105478c2ecf20Sopenharmony_ci if (nr_args > IORING_OP_LAST) 105488c2ecf20Sopenharmony_ci nr_args = IORING_OP_LAST; 105498c2ecf20Sopenharmony_ci 105508c2ecf20Sopenharmony_ci for (i = 0; i < nr_args; i++) { 105518c2ecf20Sopenharmony_ci p->ops[i].op = i; 105528c2ecf20Sopenharmony_ci if (!io_op_defs[i].not_supported) 105538c2ecf20Sopenharmony_ci p->ops[i].flags = IO_URING_OP_SUPPORTED; 105548c2ecf20Sopenharmony_ci } 105558c2ecf20Sopenharmony_ci p->ops_len = i; 105568c2ecf20Sopenharmony_ci 105578c2ecf20Sopenharmony_ci ret = 0; 105588c2ecf20Sopenharmony_ci if (copy_to_user(arg, p, size)) 105598c2ecf20Sopenharmony_ci ret = -EFAULT; 105608c2ecf20Sopenharmony_ciout: 105618c2ecf20Sopenharmony_ci kfree(p); 105628c2ecf20Sopenharmony_ci return ret; 105638c2ecf20Sopenharmony_ci} 105648c2ecf20Sopenharmony_ci 105658c2ecf20Sopenharmony_cistatic int io_register_personality(struct io_ring_ctx *ctx) 105668c2ecf20Sopenharmony_ci{ 105678c2ecf20Sopenharmony_ci const struct cred *creds; 105688c2ecf20Sopenharmony_ci u32 id; 105698c2ecf20Sopenharmony_ci int ret; 105708c2ecf20Sopenharmony_ci 105718c2ecf20Sopenharmony_ci creds = get_current_cred(); 105728c2ecf20Sopenharmony_ci 105738c2ecf20Sopenharmony_ci ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, 105748c2ecf20Sopenharmony_ci XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); 105758c2ecf20Sopenharmony_ci if (ret < 0) { 105768c2ecf20Sopenharmony_ci put_cred(creds); 105778c2ecf20Sopenharmony_ci return ret; 105788c2ecf20Sopenharmony_ci } 105798c2ecf20Sopenharmony_ci return id; 105808c2ecf20Sopenharmony_ci} 105818c2ecf20Sopenharmony_ci 105828c2ecf20Sopenharmony_cistatic int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, 105838c2ecf20Sopenharmony_ci unsigned int nr_args) 105848c2ecf20Sopenharmony_ci{ 105858c2ecf20Sopenharmony_ci struct io_uring_restriction *res; 105868c2ecf20Sopenharmony_ci size_t size; 105878c2ecf20Sopenharmony_ci int i, ret; 105888c2ecf20Sopenharmony_ci 105898c2ecf20Sopenharmony_ci /* Restrictions allowed only if rings started disabled */ 105908c2ecf20Sopenharmony_ci if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 105918c2ecf20Sopenharmony_ci return -EBADFD; 105928c2ecf20Sopenharmony_ci 105938c2ecf20Sopenharmony_ci /* We allow only a single restrictions registration */ 105948c2ecf20Sopenharmony_ci if (ctx->restrictions.registered) 105958c2ecf20Sopenharmony_ci return -EBUSY; 105968c2ecf20Sopenharmony_ci 105978c2ecf20Sopenharmony_ci if (!arg || nr_args > IORING_MAX_RESTRICTIONS) 105988c2ecf20Sopenharmony_ci return -EINVAL; 105998c2ecf20Sopenharmony_ci 106008c2ecf20Sopenharmony_ci size = array_size(nr_args, sizeof(*res)); 106018c2ecf20Sopenharmony_ci if (size == SIZE_MAX) 106028c2ecf20Sopenharmony_ci return -EOVERFLOW; 106038c2ecf20Sopenharmony_ci 106048c2ecf20Sopenharmony_ci res = memdup_user(arg, size); 106058c2ecf20Sopenharmony_ci if (IS_ERR(res)) 106068c2ecf20Sopenharmony_ci return PTR_ERR(res); 106078c2ecf20Sopenharmony_ci 106088c2ecf20Sopenharmony_ci ret = 0; 106098c2ecf20Sopenharmony_ci 106108c2ecf20Sopenharmony_ci for (i = 0; i < nr_args; i++) { 106118c2ecf20Sopenharmony_ci switch (res[i].opcode) { 106128c2ecf20Sopenharmony_ci case IORING_RESTRICTION_REGISTER_OP: 106138c2ecf20Sopenharmony_ci if (res[i].register_op >= IORING_REGISTER_LAST) { 106148c2ecf20Sopenharmony_ci ret = -EINVAL; 106158c2ecf20Sopenharmony_ci goto out; 106168c2ecf20Sopenharmony_ci } 106178c2ecf20Sopenharmony_ci 106188c2ecf20Sopenharmony_ci __set_bit(res[i].register_op, 106198c2ecf20Sopenharmony_ci ctx->restrictions.register_op); 106208c2ecf20Sopenharmony_ci break; 106218c2ecf20Sopenharmony_ci case IORING_RESTRICTION_SQE_OP: 106228c2ecf20Sopenharmony_ci if (res[i].sqe_op >= IORING_OP_LAST) { 106238c2ecf20Sopenharmony_ci ret = -EINVAL; 106248c2ecf20Sopenharmony_ci goto out; 106258c2ecf20Sopenharmony_ci } 106268c2ecf20Sopenharmony_ci 106278c2ecf20Sopenharmony_ci __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); 106288c2ecf20Sopenharmony_ci break; 106298c2ecf20Sopenharmony_ci case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: 106308c2ecf20Sopenharmony_ci ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; 106318c2ecf20Sopenharmony_ci break; 106328c2ecf20Sopenharmony_ci case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: 106338c2ecf20Sopenharmony_ci ctx->restrictions.sqe_flags_required = res[i].sqe_flags; 106348c2ecf20Sopenharmony_ci break; 106358c2ecf20Sopenharmony_ci default: 106368c2ecf20Sopenharmony_ci ret = -EINVAL; 106378c2ecf20Sopenharmony_ci goto out; 106388c2ecf20Sopenharmony_ci } 106398c2ecf20Sopenharmony_ci } 106408c2ecf20Sopenharmony_ci 106418c2ecf20Sopenharmony_ciout: 106428c2ecf20Sopenharmony_ci /* Reset all restrictions if an error happened */ 106438c2ecf20Sopenharmony_ci if (ret != 0) 106448c2ecf20Sopenharmony_ci memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); 106458c2ecf20Sopenharmony_ci else 106468c2ecf20Sopenharmony_ci ctx->restrictions.registered = true; 106478c2ecf20Sopenharmony_ci 106488c2ecf20Sopenharmony_ci kfree(res); 106498c2ecf20Sopenharmony_ci return ret; 106508c2ecf20Sopenharmony_ci} 106518c2ecf20Sopenharmony_ci 106528c2ecf20Sopenharmony_cistatic int io_register_enable_rings(struct io_ring_ctx *ctx) 106538c2ecf20Sopenharmony_ci{ 106548c2ecf20Sopenharmony_ci if (!(ctx->flags & IORING_SETUP_R_DISABLED)) 106558c2ecf20Sopenharmony_ci return -EBADFD; 106568c2ecf20Sopenharmony_ci 106578c2ecf20Sopenharmony_ci if (ctx->restrictions.registered) 106588c2ecf20Sopenharmony_ci ctx->restricted = 1; 106598c2ecf20Sopenharmony_ci 106608c2ecf20Sopenharmony_ci ctx->flags &= ~IORING_SETUP_R_DISABLED; 106618c2ecf20Sopenharmony_ci if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) 106628c2ecf20Sopenharmony_ci wake_up(&ctx->sq_data->wait); 106638c2ecf20Sopenharmony_ci return 0; 106648c2ecf20Sopenharmony_ci} 106658c2ecf20Sopenharmony_ci 106668c2ecf20Sopenharmony_cistatic int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, 106678c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 *up, 106688c2ecf20Sopenharmony_ci unsigned nr_args) 106698c2ecf20Sopenharmony_ci{ 106708c2ecf20Sopenharmony_ci __u32 tmp; 106718c2ecf20Sopenharmony_ci int err; 106728c2ecf20Sopenharmony_ci 106738c2ecf20Sopenharmony_ci if (check_add_overflow(up->offset, nr_args, &tmp)) 106748c2ecf20Sopenharmony_ci return -EOVERFLOW; 106758c2ecf20Sopenharmony_ci err = io_rsrc_node_switch_start(ctx); 106768c2ecf20Sopenharmony_ci if (err) 106778c2ecf20Sopenharmony_ci return err; 106788c2ecf20Sopenharmony_ci 106798c2ecf20Sopenharmony_ci switch (type) { 106808c2ecf20Sopenharmony_ci case IORING_RSRC_FILE: 106818c2ecf20Sopenharmony_ci return __io_sqe_files_update(ctx, up, nr_args); 106828c2ecf20Sopenharmony_ci case IORING_RSRC_BUFFER: 106838c2ecf20Sopenharmony_ci return __io_sqe_buffers_update(ctx, up, nr_args); 106848c2ecf20Sopenharmony_ci } 106858c2ecf20Sopenharmony_ci return -EINVAL; 106868c2ecf20Sopenharmony_ci} 106878c2ecf20Sopenharmony_ci 106888c2ecf20Sopenharmony_cistatic int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, 106898c2ecf20Sopenharmony_ci unsigned nr_args) 106908c2ecf20Sopenharmony_ci{ 106918c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 up; 106928c2ecf20Sopenharmony_ci 106938c2ecf20Sopenharmony_ci if (!nr_args) 106948c2ecf20Sopenharmony_ci return -EINVAL; 106958c2ecf20Sopenharmony_ci memset(&up, 0, sizeof(up)); 106968c2ecf20Sopenharmony_ci if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) 106978c2ecf20Sopenharmony_ci return -EFAULT; 106988c2ecf20Sopenharmony_ci if (up.resv || up.resv2) 106998c2ecf20Sopenharmony_ci return -EINVAL; 107008c2ecf20Sopenharmony_ci return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); 107018c2ecf20Sopenharmony_ci} 107028c2ecf20Sopenharmony_ci 107038c2ecf20Sopenharmony_cistatic int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, 107048c2ecf20Sopenharmony_ci unsigned size, unsigned type) 107058c2ecf20Sopenharmony_ci{ 107068c2ecf20Sopenharmony_ci struct io_uring_rsrc_update2 up; 107078c2ecf20Sopenharmony_ci 107088c2ecf20Sopenharmony_ci if (size != sizeof(up)) 107098c2ecf20Sopenharmony_ci return -EINVAL; 107108c2ecf20Sopenharmony_ci if (copy_from_user(&up, arg, sizeof(up))) 107118c2ecf20Sopenharmony_ci return -EFAULT; 107128c2ecf20Sopenharmony_ci if (!up.nr || up.resv || up.resv2) 107138c2ecf20Sopenharmony_ci return -EINVAL; 107148c2ecf20Sopenharmony_ci return __io_register_rsrc_update(ctx, type, &up, up.nr); 107158c2ecf20Sopenharmony_ci} 107168c2ecf20Sopenharmony_ci 107178c2ecf20Sopenharmony_cistatic int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, 107188c2ecf20Sopenharmony_ci unsigned int size, unsigned int type) 107198c2ecf20Sopenharmony_ci{ 107208c2ecf20Sopenharmony_ci struct io_uring_rsrc_register rr; 107218c2ecf20Sopenharmony_ci 107228c2ecf20Sopenharmony_ci /* keep it extendible */ 107238c2ecf20Sopenharmony_ci if (size != sizeof(rr)) 107248c2ecf20Sopenharmony_ci return -EINVAL; 107258c2ecf20Sopenharmony_ci 107268c2ecf20Sopenharmony_ci memset(&rr, 0, sizeof(rr)); 107278c2ecf20Sopenharmony_ci if (copy_from_user(&rr, arg, size)) 107288c2ecf20Sopenharmony_ci return -EFAULT; 107298c2ecf20Sopenharmony_ci if (!rr.nr || rr.resv || rr.resv2) 107308c2ecf20Sopenharmony_ci return -EINVAL; 107318c2ecf20Sopenharmony_ci 107328c2ecf20Sopenharmony_ci switch (type) { 107338c2ecf20Sopenharmony_ci case IORING_RSRC_FILE: 107348c2ecf20Sopenharmony_ci return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), 107358c2ecf20Sopenharmony_ci rr.nr, u64_to_user_ptr(rr.tags)); 107368c2ecf20Sopenharmony_ci case IORING_RSRC_BUFFER: 107378c2ecf20Sopenharmony_ci return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), 107388c2ecf20Sopenharmony_ci rr.nr, u64_to_user_ptr(rr.tags)); 107398c2ecf20Sopenharmony_ci } 107408c2ecf20Sopenharmony_ci return -EINVAL; 107418c2ecf20Sopenharmony_ci} 107428c2ecf20Sopenharmony_ci 107438c2ecf20Sopenharmony_cistatic int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg, 107448c2ecf20Sopenharmony_ci unsigned len) 107458c2ecf20Sopenharmony_ci{ 107468c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 107478c2ecf20Sopenharmony_ci cpumask_var_t new_mask; 107488c2ecf20Sopenharmony_ci int ret; 107498c2ecf20Sopenharmony_ci 107508c2ecf20Sopenharmony_ci if (!tctx || !tctx->io_wq) 107518c2ecf20Sopenharmony_ci return -EINVAL; 107528c2ecf20Sopenharmony_ci 107538c2ecf20Sopenharmony_ci if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) 107548c2ecf20Sopenharmony_ci return -ENOMEM; 107558c2ecf20Sopenharmony_ci 107568c2ecf20Sopenharmony_ci cpumask_clear(new_mask); 107578c2ecf20Sopenharmony_ci if (len > cpumask_size()) 107588c2ecf20Sopenharmony_ci len = cpumask_size(); 107598c2ecf20Sopenharmony_ci 107608c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 107618c2ecf20Sopenharmony_ci if (in_compat_syscall()) { 107628c2ecf20Sopenharmony_ci ret = compat_get_bitmap(cpumask_bits(new_mask), 107638c2ecf20Sopenharmony_ci (const compat_ulong_t __user *)arg, 107648c2ecf20Sopenharmony_ci len * 8 /* CHAR_BIT */); 107658c2ecf20Sopenharmony_ci } else { 107668c2ecf20Sopenharmony_ci ret = copy_from_user(new_mask, arg, len); 107678c2ecf20Sopenharmony_ci } 107688c2ecf20Sopenharmony_ci#else 107698c2ecf20Sopenharmony_ci ret = copy_from_user(new_mask, arg, len); 107708c2ecf20Sopenharmony_ci#endif 107718c2ecf20Sopenharmony_ci 107728c2ecf20Sopenharmony_ci if (ret) { 107738c2ecf20Sopenharmony_ci free_cpumask_var(new_mask); 107748c2ecf20Sopenharmony_ci return -EFAULT; 107758c2ecf20Sopenharmony_ci } 107768c2ecf20Sopenharmony_ci 107778c2ecf20Sopenharmony_ci ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); 107788c2ecf20Sopenharmony_ci free_cpumask_var(new_mask); 107798c2ecf20Sopenharmony_ci return ret; 107808c2ecf20Sopenharmony_ci} 107818c2ecf20Sopenharmony_ci 107828c2ecf20Sopenharmony_cistatic int io_unregister_iowq_aff(struct io_ring_ctx *ctx) 107838c2ecf20Sopenharmony_ci{ 107848c2ecf20Sopenharmony_ci struct io_uring_task *tctx = current->io_uring; 107858c2ecf20Sopenharmony_ci 107868c2ecf20Sopenharmony_ci if (!tctx || !tctx->io_wq) 107878c2ecf20Sopenharmony_ci return -EINVAL; 107888c2ecf20Sopenharmony_ci 107898c2ecf20Sopenharmony_ci return io_wq_cpu_affinity(tctx->io_wq, NULL); 107908c2ecf20Sopenharmony_ci} 107918c2ecf20Sopenharmony_ci 107928c2ecf20Sopenharmony_cistatic int io_register_iowq_max_workers(struct io_ring_ctx *ctx, 107938c2ecf20Sopenharmony_ci void __user *arg) 107948c2ecf20Sopenharmony_ci __must_hold(&ctx->uring_lock) 107958c2ecf20Sopenharmony_ci{ 107968c2ecf20Sopenharmony_ci struct io_tctx_node *node; 107978c2ecf20Sopenharmony_ci struct io_uring_task *tctx = NULL; 107988c2ecf20Sopenharmony_ci struct io_sq_data *sqd = NULL; 107998c2ecf20Sopenharmony_ci __u32 new_count[2]; 108008c2ecf20Sopenharmony_ci int i, ret; 108018c2ecf20Sopenharmony_ci 108028c2ecf20Sopenharmony_ci if (copy_from_user(new_count, arg, sizeof(new_count))) 108038c2ecf20Sopenharmony_ci return -EFAULT; 108048c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(new_count); i++) 108058c2ecf20Sopenharmony_ci if (new_count[i] > INT_MAX) 108068c2ecf20Sopenharmony_ci return -EINVAL; 108078c2ecf20Sopenharmony_ci 108088c2ecf20Sopenharmony_ci if (ctx->flags & IORING_SETUP_SQPOLL) { 108098c2ecf20Sopenharmony_ci sqd = ctx->sq_data; 108108c2ecf20Sopenharmony_ci if (sqd) { 108118c2ecf20Sopenharmony_ci /* 108128c2ecf20Sopenharmony_ci * Observe the correct sqd->lock -> ctx->uring_lock 108138c2ecf20Sopenharmony_ci * ordering. Fine to drop uring_lock here, we hold 108148c2ecf20Sopenharmony_ci * a ref to the ctx. 108158c2ecf20Sopenharmony_ci */ 108168c2ecf20Sopenharmony_ci refcount_inc(&sqd->refs); 108178c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 108188c2ecf20Sopenharmony_ci mutex_lock(&sqd->lock); 108198c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 108208c2ecf20Sopenharmony_ci if (sqd->thread) 108218c2ecf20Sopenharmony_ci tctx = sqd->thread->io_uring; 108228c2ecf20Sopenharmony_ci } 108238c2ecf20Sopenharmony_ci } else { 108248c2ecf20Sopenharmony_ci tctx = current->io_uring; 108258c2ecf20Sopenharmony_ci } 108268c2ecf20Sopenharmony_ci 108278c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); 108288c2ecf20Sopenharmony_ci 108298c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(new_count); i++) 108308c2ecf20Sopenharmony_ci if (new_count[i]) 108318c2ecf20Sopenharmony_ci ctx->iowq_limits[i] = new_count[i]; 108328c2ecf20Sopenharmony_ci ctx->iowq_limits_set = true; 108338c2ecf20Sopenharmony_ci 108348c2ecf20Sopenharmony_ci ret = -EINVAL; 108358c2ecf20Sopenharmony_ci if (tctx && tctx->io_wq) { 108368c2ecf20Sopenharmony_ci ret = io_wq_max_workers(tctx->io_wq, new_count); 108378c2ecf20Sopenharmony_ci if (ret) 108388c2ecf20Sopenharmony_ci goto err; 108398c2ecf20Sopenharmony_ci } else { 108408c2ecf20Sopenharmony_ci memset(new_count, 0, sizeof(new_count)); 108418c2ecf20Sopenharmony_ci } 108428c2ecf20Sopenharmony_ci 108438c2ecf20Sopenharmony_ci if (sqd) { 108448c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 108458c2ecf20Sopenharmony_ci io_put_sq_data(sqd); 108468c2ecf20Sopenharmony_ci } 108478c2ecf20Sopenharmony_ci 108488c2ecf20Sopenharmony_ci if (copy_to_user(arg, new_count, sizeof(new_count))) 108498c2ecf20Sopenharmony_ci return -EFAULT; 108508c2ecf20Sopenharmony_ci 108518c2ecf20Sopenharmony_ci /* that's it for SQPOLL, only the SQPOLL task creates requests */ 108528c2ecf20Sopenharmony_ci if (sqd) 108538c2ecf20Sopenharmony_ci return 0; 108548c2ecf20Sopenharmony_ci 108558c2ecf20Sopenharmony_ci /* now propagate the restriction to all registered users */ 108568c2ecf20Sopenharmony_ci list_for_each_entry(node, &ctx->tctx_list, ctx_node) { 108578c2ecf20Sopenharmony_ci struct io_uring_task *tctx = node->task->io_uring; 108588c2ecf20Sopenharmony_ci 108598c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!tctx->io_wq)) 108608c2ecf20Sopenharmony_ci continue; 108618c2ecf20Sopenharmony_ci 108628c2ecf20Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(new_count); i++) 108638c2ecf20Sopenharmony_ci new_count[i] = ctx->iowq_limits[i]; 108648c2ecf20Sopenharmony_ci /* ignore errors, it always returns zero anyway */ 108658c2ecf20Sopenharmony_ci (void)io_wq_max_workers(tctx->io_wq, new_count); 108668c2ecf20Sopenharmony_ci } 108678c2ecf20Sopenharmony_ci return 0; 108688c2ecf20Sopenharmony_cierr: 108698c2ecf20Sopenharmony_ci if (sqd) { 108708c2ecf20Sopenharmony_ci mutex_unlock(&sqd->lock); 108718c2ecf20Sopenharmony_ci io_put_sq_data(sqd); 108728c2ecf20Sopenharmony_ci } 108738c2ecf20Sopenharmony_ci return ret; 108748c2ecf20Sopenharmony_ci} 108758c2ecf20Sopenharmony_ci 108768c2ecf20Sopenharmony_cistatic bool io_register_op_must_quiesce(int op) 108778c2ecf20Sopenharmony_ci{ 108788c2ecf20Sopenharmony_ci switch (op) { 108798c2ecf20Sopenharmony_ci case IORING_REGISTER_BUFFERS: 108808c2ecf20Sopenharmony_ci case IORING_UNREGISTER_BUFFERS: 108818c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES: 108828c2ecf20Sopenharmony_ci case IORING_UNREGISTER_FILES: 108838c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES_UPDATE: 108848c2ecf20Sopenharmony_ci case IORING_REGISTER_PROBE: 108858c2ecf20Sopenharmony_ci case IORING_REGISTER_PERSONALITY: 108868c2ecf20Sopenharmony_ci case IORING_UNREGISTER_PERSONALITY: 108878c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES2: 108888c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES_UPDATE2: 108898c2ecf20Sopenharmony_ci case IORING_REGISTER_BUFFERS2: 108908c2ecf20Sopenharmony_ci case IORING_REGISTER_BUFFERS_UPDATE: 108918c2ecf20Sopenharmony_ci case IORING_REGISTER_IOWQ_AFF: 108928c2ecf20Sopenharmony_ci case IORING_UNREGISTER_IOWQ_AFF: 108938c2ecf20Sopenharmony_ci case IORING_REGISTER_IOWQ_MAX_WORKERS: 108948c2ecf20Sopenharmony_ci return false; 108958c2ecf20Sopenharmony_ci default: 108968c2ecf20Sopenharmony_ci return true; 108978c2ecf20Sopenharmony_ci } 108988c2ecf20Sopenharmony_ci} 108998c2ecf20Sopenharmony_ci 109008c2ecf20Sopenharmony_cistatic int io_ctx_quiesce(struct io_ring_ctx *ctx) 109018c2ecf20Sopenharmony_ci{ 109028c2ecf20Sopenharmony_ci long ret; 109038c2ecf20Sopenharmony_ci 109048c2ecf20Sopenharmony_ci percpu_ref_kill(&ctx->refs); 109058c2ecf20Sopenharmony_ci 109068c2ecf20Sopenharmony_ci /* 109078c2ecf20Sopenharmony_ci * Drop uring mutex before waiting for references to exit. If another 109088c2ecf20Sopenharmony_ci * thread is currently inside io_uring_enter() it might need to grab the 109098c2ecf20Sopenharmony_ci * uring_lock to make progress. If we hold it here across the drain 109108c2ecf20Sopenharmony_ci * wait, then we can deadlock. It's safe to drop the mutex here, since 109118c2ecf20Sopenharmony_ci * no new references will come in after we've killed the percpu ref. 109128c2ecf20Sopenharmony_ci */ 109138c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 109148c2ecf20Sopenharmony_ci do { 109158c2ecf20Sopenharmony_ci ret = wait_for_completion_interruptible(&ctx->ref_comp); 109168c2ecf20Sopenharmony_ci if (!ret) 109178c2ecf20Sopenharmony_ci break; 109188c2ecf20Sopenharmony_ci ret = io_run_task_work_sig(); 109198c2ecf20Sopenharmony_ci } while (ret >= 0); 109208c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 109218c2ecf20Sopenharmony_ci 109228c2ecf20Sopenharmony_ci if (ret) 109238c2ecf20Sopenharmony_ci io_refs_resurrect(&ctx->refs, &ctx->ref_comp); 109248c2ecf20Sopenharmony_ci return ret; 109258c2ecf20Sopenharmony_ci} 109268c2ecf20Sopenharmony_ci 109278c2ecf20Sopenharmony_cistatic int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, 109288c2ecf20Sopenharmony_ci void __user *arg, unsigned nr_args) 109298c2ecf20Sopenharmony_ci __releases(ctx->uring_lock) 109308c2ecf20Sopenharmony_ci __acquires(ctx->uring_lock) 109318c2ecf20Sopenharmony_ci{ 109328c2ecf20Sopenharmony_ci int ret; 109338c2ecf20Sopenharmony_ci 109348c2ecf20Sopenharmony_ci /* 109358c2ecf20Sopenharmony_ci * We're inside the ring mutex, if the ref is already dying, then 109368c2ecf20Sopenharmony_ci * someone else killed the ctx or is already going through 109378c2ecf20Sopenharmony_ci * io_uring_register(). 109388c2ecf20Sopenharmony_ci */ 109398c2ecf20Sopenharmony_ci if (percpu_ref_is_dying(&ctx->refs)) 109408c2ecf20Sopenharmony_ci return -ENXIO; 109418c2ecf20Sopenharmony_ci 109428c2ecf20Sopenharmony_ci if (ctx->restricted) { 109438c2ecf20Sopenharmony_ci opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); 109448c2ecf20Sopenharmony_ci if (!test_bit(opcode, ctx->restrictions.register_op)) 109458c2ecf20Sopenharmony_ci return -EACCES; 109468c2ecf20Sopenharmony_ci } 109478c2ecf20Sopenharmony_ci 109488c2ecf20Sopenharmony_ci if (io_register_op_must_quiesce(opcode)) { 109498c2ecf20Sopenharmony_ci ret = io_ctx_quiesce(ctx); 109508c2ecf20Sopenharmony_ci if (ret) 109518c2ecf20Sopenharmony_ci return ret; 109528c2ecf20Sopenharmony_ci } 109538c2ecf20Sopenharmony_ci 109548c2ecf20Sopenharmony_ci switch (opcode) { 109558c2ecf20Sopenharmony_ci case IORING_REGISTER_BUFFERS: 109568c2ecf20Sopenharmony_ci ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); 109578c2ecf20Sopenharmony_ci break; 109588c2ecf20Sopenharmony_ci case IORING_UNREGISTER_BUFFERS: 109598c2ecf20Sopenharmony_ci ret = -EINVAL; 109608c2ecf20Sopenharmony_ci if (arg || nr_args) 109618c2ecf20Sopenharmony_ci break; 109628c2ecf20Sopenharmony_ci ret = io_sqe_buffers_unregister(ctx); 109638c2ecf20Sopenharmony_ci break; 109648c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES: 109658c2ecf20Sopenharmony_ci ret = io_sqe_files_register(ctx, arg, nr_args, NULL); 109668c2ecf20Sopenharmony_ci break; 109678c2ecf20Sopenharmony_ci case IORING_UNREGISTER_FILES: 109688c2ecf20Sopenharmony_ci ret = -EINVAL; 109698c2ecf20Sopenharmony_ci if (arg || nr_args) 109708c2ecf20Sopenharmony_ci break; 109718c2ecf20Sopenharmony_ci ret = io_sqe_files_unregister(ctx); 109728c2ecf20Sopenharmony_ci break; 109738c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES_UPDATE: 109748c2ecf20Sopenharmony_ci ret = io_register_files_update(ctx, arg, nr_args); 109758c2ecf20Sopenharmony_ci break; 109768c2ecf20Sopenharmony_ci case IORING_REGISTER_EVENTFD: 109778c2ecf20Sopenharmony_ci case IORING_REGISTER_EVENTFD_ASYNC: 109788c2ecf20Sopenharmony_ci ret = -EINVAL; 109798c2ecf20Sopenharmony_ci if (nr_args != 1) 109808c2ecf20Sopenharmony_ci break; 109818c2ecf20Sopenharmony_ci ret = io_eventfd_register(ctx, arg); 109828c2ecf20Sopenharmony_ci if (ret) 109838c2ecf20Sopenharmony_ci break; 109848c2ecf20Sopenharmony_ci if (opcode == IORING_REGISTER_EVENTFD_ASYNC) 109858c2ecf20Sopenharmony_ci ctx->eventfd_async = 1; 109868c2ecf20Sopenharmony_ci else 109878c2ecf20Sopenharmony_ci ctx->eventfd_async = 0; 109888c2ecf20Sopenharmony_ci break; 109898c2ecf20Sopenharmony_ci case IORING_UNREGISTER_EVENTFD: 109908c2ecf20Sopenharmony_ci ret = -EINVAL; 109918c2ecf20Sopenharmony_ci if (arg || nr_args) 109928c2ecf20Sopenharmony_ci break; 109938c2ecf20Sopenharmony_ci ret = io_eventfd_unregister(ctx); 109948c2ecf20Sopenharmony_ci break; 109958c2ecf20Sopenharmony_ci case IORING_REGISTER_PROBE: 109968c2ecf20Sopenharmony_ci ret = -EINVAL; 109978c2ecf20Sopenharmony_ci if (!arg || nr_args > 256) 109988c2ecf20Sopenharmony_ci break; 109998c2ecf20Sopenharmony_ci ret = io_probe(ctx, arg, nr_args); 110008c2ecf20Sopenharmony_ci break; 110018c2ecf20Sopenharmony_ci case IORING_REGISTER_PERSONALITY: 110028c2ecf20Sopenharmony_ci ret = -EINVAL; 110038c2ecf20Sopenharmony_ci if (arg || nr_args) 110048c2ecf20Sopenharmony_ci break; 110058c2ecf20Sopenharmony_ci ret = io_register_personality(ctx); 110068c2ecf20Sopenharmony_ci break; 110078c2ecf20Sopenharmony_ci case IORING_UNREGISTER_PERSONALITY: 110088c2ecf20Sopenharmony_ci ret = -EINVAL; 110098c2ecf20Sopenharmony_ci if (arg) 110108c2ecf20Sopenharmony_ci break; 110118c2ecf20Sopenharmony_ci ret = io_unregister_personality(ctx, nr_args); 110128c2ecf20Sopenharmony_ci break; 110138c2ecf20Sopenharmony_ci case IORING_REGISTER_ENABLE_RINGS: 110148c2ecf20Sopenharmony_ci ret = -EINVAL; 110158c2ecf20Sopenharmony_ci if (arg || nr_args) 110168c2ecf20Sopenharmony_ci break; 110178c2ecf20Sopenharmony_ci ret = io_register_enable_rings(ctx); 110188c2ecf20Sopenharmony_ci break; 110198c2ecf20Sopenharmony_ci case IORING_REGISTER_RESTRICTIONS: 110208c2ecf20Sopenharmony_ci ret = io_register_restrictions(ctx, arg, nr_args); 110218c2ecf20Sopenharmony_ci break; 110228c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES2: 110238c2ecf20Sopenharmony_ci ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); 110248c2ecf20Sopenharmony_ci break; 110258c2ecf20Sopenharmony_ci case IORING_REGISTER_FILES_UPDATE2: 110268c2ecf20Sopenharmony_ci ret = io_register_rsrc_update(ctx, arg, nr_args, 110278c2ecf20Sopenharmony_ci IORING_RSRC_FILE); 110288c2ecf20Sopenharmony_ci break; 110298c2ecf20Sopenharmony_ci case IORING_REGISTER_BUFFERS2: 110308c2ecf20Sopenharmony_ci ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); 110318c2ecf20Sopenharmony_ci break; 110328c2ecf20Sopenharmony_ci case IORING_REGISTER_BUFFERS_UPDATE: 110338c2ecf20Sopenharmony_ci ret = io_register_rsrc_update(ctx, arg, nr_args, 110348c2ecf20Sopenharmony_ci IORING_RSRC_BUFFER); 110358c2ecf20Sopenharmony_ci break; 110368c2ecf20Sopenharmony_ci case IORING_REGISTER_IOWQ_AFF: 110378c2ecf20Sopenharmony_ci ret = -EINVAL; 110388c2ecf20Sopenharmony_ci if (!arg || !nr_args) 110398c2ecf20Sopenharmony_ci break; 110408c2ecf20Sopenharmony_ci ret = io_register_iowq_aff(ctx, arg, nr_args); 110418c2ecf20Sopenharmony_ci break; 110428c2ecf20Sopenharmony_ci case IORING_UNREGISTER_IOWQ_AFF: 110438c2ecf20Sopenharmony_ci ret = -EINVAL; 110448c2ecf20Sopenharmony_ci if (arg || nr_args) 110458c2ecf20Sopenharmony_ci break; 110468c2ecf20Sopenharmony_ci ret = io_unregister_iowq_aff(ctx); 110478c2ecf20Sopenharmony_ci break; 110488c2ecf20Sopenharmony_ci case IORING_REGISTER_IOWQ_MAX_WORKERS: 110498c2ecf20Sopenharmony_ci ret = -EINVAL; 110508c2ecf20Sopenharmony_ci if (!arg || nr_args != 2) 110518c2ecf20Sopenharmony_ci break; 110528c2ecf20Sopenharmony_ci ret = io_register_iowq_max_workers(ctx, arg); 110538c2ecf20Sopenharmony_ci break; 110548c2ecf20Sopenharmony_ci default: 110558c2ecf20Sopenharmony_ci ret = -EINVAL; 110568c2ecf20Sopenharmony_ci break; 110578c2ecf20Sopenharmony_ci } 110588c2ecf20Sopenharmony_ci 110598c2ecf20Sopenharmony_ci if (io_register_op_must_quiesce(opcode)) { 110608c2ecf20Sopenharmony_ci /* bring the ctx back to life */ 110618c2ecf20Sopenharmony_ci percpu_ref_reinit(&ctx->refs); 110628c2ecf20Sopenharmony_ci reinit_completion(&ctx->ref_comp); 110638c2ecf20Sopenharmony_ci } 110648c2ecf20Sopenharmony_ci return ret; 110658c2ecf20Sopenharmony_ci} 110668c2ecf20Sopenharmony_ci 110678c2ecf20Sopenharmony_ciSYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, 110688c2ecf20Sopenharmony_ci void __user *, arg, unsigned int, nr_args) 110698c2ecf20Sopenharmony_ci{ 110708c2ecf20Sopenharmony_ci struct io_ring_ctx *ctx; 110718c2ecf20Sopenharmony_ci long ret = -EBADF; 110728c2ecf20Sopenharmony_ci struct fd f; 110738c2ecf20Sopenharmony_ci 110748c2ecf20Sopenharmony_ci if (opcode >= IORING_REGISTER_LAST) 110758c2ecf20Sopenharmony_ci return -EINVAL; 110768c2ecf20Sopenharmony_ci 110778c2ecf20Sopenharmony_ci f = fdget(fd); 110788c2ecf20Sopenharmony_ci if (!f.file) 110798c2ecf20Sopenharmony_ci return -EBADF; 110808c2ecf20Sopenharmony_ci 110818c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 110828c2ecf20Sopenharmony_ci if (f.file->f_op != &io_uring_fops) 110838c2ecf20Sopenharmony_ci goto out_fput; 110848c2ecf20Sopenharmony_ci 110858c2ecf20Sopenharmony_ci ctx = f.file->private_data; 110868c2ecf20Sopenharmony_ci 110878c2ecf20Sopenharmony_ci io_run_task_work(); 110888c2ecf20Sopenharmony_ci 110898c2ecf20Sopenharmony_ci mutex_lock(&ctx->uring_lock); 110908c2ecf20Sopenharmony_ci ret = __io_uring_register(ctx, opcode, arg, nr_args); 110918c2ecf20Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 110928c2ecf20Sopenharmony_ci trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, 110938c2ecf20Sopenharmony_ci ctx->cq_ev_fd != NULL, ret); 110948c2ecf20Sopenharmony_ciout_fput: 110958c2ecf20Sopenharmony_ci fdput(f); 110968c2ecf20Sopenharmony_ci return ret; 110978c2ecf20Sopenharmony_ci} 110988c2ecf20Sopenharmony_ci 110998c2ecf20Sopenharmony_cistatic int __init io_uring_init(void) 111008c2ecf20Sopenharmony_ci{ 111018c2ecf20Sopenharmony_ci#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ 111028c2ecf20Sopenharmony_ci BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ 111038c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ 111048c2ecf20Sopenharmony_ci} while (0) 111058c2ecf20Sopenharmony_ci 111068c2ecf20Sopenharmony_ci#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ 111078c2ecf20Sopenharmony_ci __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) 111088c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); 111098c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(0, __u8, opcode); 111108c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(1, __u8, flags); 111118c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(2, __u16, ioprio); 111128c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(4, __s32, fd); 111138c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(8, __u64, off); 111148c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(8, __u64, addr2); 111158c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(16, __u64, addr); 111168c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); 111178c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(24, __u32, len); 111188c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); 111198c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); 111208c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); 111218c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); 111228c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); 111238c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); 111248c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); 111258c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); 111268c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); 111278c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); 111288c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); 111298c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, open_flags); 111308c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); 111318c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); 111328c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); 111338c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(32, __u64, user_data); 111348c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(40, __u16, buf_index); 111358c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(40, __u16, buf_group); 111368c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(42, __u16, personality); 111378c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); 111388c2ecf20Sopenharmony_ci BUILD_BUG_SQE_ELEM(44, __u32, file_index); 111398c2ecf20Sopenharmony_ci 111408c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct io_uring_files_update) != 111418c2ecf20Sopenharmony_ci sizeof(struct io_uring_rsrc_update)); 111428c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) > 111438c2ecf20Sopenharmony_ci sizeof(struct io_uring_rsrc_update2)); 111448c2ecf20Sopenharmony_ci 111458c2ecf20Sopenharmony_ci /* ->buf_index is u16 */ 111468c2ecf20Sopenharmony_ci BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); 111478c2ecf20Sopenharmony_ci 111488c2ecf20Sopenharmony_ci /* should fit into one byte */ 111498c2ecf20Sopenharmony_ci BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); 111508c2ecf20Sopenharmony_ci 111518c2ecf20Sopenharmony_ci BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); 111528c2ecf20Sopenharmony_ci BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); 111538c2ecf20Sopenharmony_ci 111548c2ecf20Sopenharmony_ci req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | 111558c2ecf20Sopenharmony_ci SLAB_ACCOUNT); 111568c2ecf20Sopenharmony_ci return 0; 111578c2ecf20Sopenharmony_ci}; 111588c2ecf20Sopenharmony_ci__initcall(io_uring_init); 11159