162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/kernel.h> 362306a36Sopenharmony_ci#include <linux/errno.h> 462306a36Sopenharmony_ci#include <linux/fs.h> 562306a36Sopenharmony_ci#include <linux/file.h> 662306a36Sopenharmony_ci#include <linux/mm.h> 762306a36Sopenharmony_ci#include <linux/slab.h> 862306a36Sopenharmony_ci#include <linux/nospec.h> 962306a36Sopenharmony_ci#include <linux/hugetlb.h> 1062306a36Sopenharmony_ci#include <linux/compat.h> 1162306a36Sopenharmony_ci#include <linux/io_uring.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <uapi/linux/io_uring.h> 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#include "io_uring.h" 1662306a36Sopenharmony_ci#include "openclose.h" 1762306a36Sopenharmony_ci#include "rsrc.h" 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_cistruct io_rsrc_update { 2062306a36Sopenharmony_ci struct file *file; 2162306a36Sopenharmony_ci u64 arg; 2262306a36Sopenharmony_ci u32 nr_args; 2362306a36Sopenharmony_ci u32 offset; 2462306a36Sopenharmony_ci}; 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_cistatic void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); 2762306a36Sopenharmony_cistatic int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, 2862306a36Sopenharmony_ci struct io_mapped_ubuf **pimu, 2962306a36Sopenharmony_ci struct page **last_hpage); 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* only define max */ 3262306a36Sopenharmony_ci#define IORING_MAX_FIXED_FILES (1U << 20) 3362306a36Sopenharmony_ci#define IORING_MAX_REG_BUFFERS (1U << 14) 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_cistatic const struct io_mapped_ubuf dummy_ubuf = { 3662306a36Sopenharmony_ci /* set invalid range, so io_import_fixed() fails meeting it */ 3762306a36Sopenharmony_ci .ubuf = -1UL, 3862306a36Sopenharmony_ci .ubuf_end = 0, 3962306a36Sopenharmony_ci}; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ciint __io_account_mem(struct user_struct *user, unsigned long nr_pages) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci unsigned long page_limit, cur_pages, new_pages; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci if (!nr_pages) 4662306a36Sopenharmony_ci return 0; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci /* Don't allow more pages than we can safely lock */ 4962306a36Sopenharmony_ci page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci cur_pages = atomic_long_read(&user->locked_vm); 5262306a36Sopenharmony_ci do { 5362306a36Sopenharmony_ci new_pages = cur_pages + nr_pages; 5462306a36Sopenharmony_ci if (new_pages > page_limit) 5562306a36Sopenharmony_ci return -ENOMEM; 5662306a36Sopenharmony_ci } while (!atomic_long_try_cmpxchg(&user->locked_vm, 5762306a36Sopenharmony_ci &cur_pages, new_pages)); 5862306a36Sopenharmony_ci return 0; 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci if (ctx->user) 6462306a36Sopenharmony_ci __io_unaccount_mem(ctx->user, nr_pages); 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (ctx->mm_account) 6762306a36Sopenharmony_ci atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_cistatic int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci int ret; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci if (ctx->user) { 7562306a36Sopenharmony_ci ret = __io_account_mem(ctx->user, nr_pages); 7662306a36Sopenharmony_ci if (ret) 7762306a36Sopenharmony_ci return ret; 7862306a36Sopenharmony_ci } 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci if (ctx->mm_account) 8162306a36Sopenharmony_ci atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci return 0; 8462306a36Sopenharmony_ci} 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_cistatic int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, 8762306a36Sopenharmony_ci void __user *arg, unsigned index) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci struct iovec __user *src; 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci#ifdef CONFIG_COMPAT 9262306a36Sopenharmony_ci if (ctx->compat) { 9362306a36Sopenharmony_ci struct compat_iovec __user *ciovs; 9462306a36Sopenharmony_ci struct compat_iovec ciov; 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci ciovs = (struct compat_iovec __user *) arg; 9762306a36Sopenharmony_ci if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) 9862306a36Sopenharmony_ci return -EFAULT; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); 10162306a36Sopenharmony_ci dst->iov_len = ciov.iov_len; 10262306a36Sopenharmony_ci return 0; 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci#endif 10562306a36Sopenharmony_ci src = (struct iovec __user *) arg; 10662306a36Sopenharmony_ci if (copy_from_user(dst, &src[index], sizeof(*dst))) 10762306a36Sopenharmony_ci return -EFAULT; 10862306a36Sopenharmony_ci return 0; 10962306a36Sopenharmony_ci} 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_cistatic int io_buffer_validate(struct iovec *iov) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci /* 11662306a36Sopenharmony_ci * Don't impose further limits on the size and buffer 11762306a36Sopenharmony_ci * constraints here, we'll -EINVAL later when IO is 11862306a36Sopenharmony_ci * submitted if they are wrong. 11962306a36Sopenharmony_ci */ 12062306a36Sopenharmony_ci if (!iov->iov_base) 12162306a36Sopenharmony_ci return iov->iov_len ? -EFAULT : 0; 12262306a36Sopenharmony_ci if (!iov->iov_len) 12362306a36Sopenharmony_ci return -EFAULT; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci /* arbitrary limit, but we need something */ 12662306a36Sopenharmony_ci if (iov->iov_len > SZ_1G) 12762306a36Sopenharmony_ci return -EFAULT; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) 13062306a36Sopenharmony_ci return -EOVERFLOW; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci return 0; 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) 13662306a36Sopenharmony_ci{ 13762306a36Sopenharmony_ci struct io_mapped_ubuf *imu = *slot; 13862306a36Sopenharmony_ci unsigned int i; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci if (imu != &dummy_ubuf) { 14162306a36Sopenharmony_ci for (i = 0; i < imu->nr_bvecs; i++) 14262306a36Sopenharmony_ci unpin_user_page(imu->bvec[i].bv_page); 14362306a36Sopenharmony_ci if (imu->acct_pages) 14462306a36Sopenharmony_ci io_unaccount_mem(ctx, imu->acct_pages); 14562306a36Sopenharmony_ci kvfree(imu); 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci *slot = NULL; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic void io_rsrc_put_work(struct io_rsrc_node *node) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci struct io_rsrc_put *prsrc = &node->item; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if (prsrc->tag) 15562306a36Sopenharmony_ci io_post_aux_cqe(node->ctx, prsrc->tag, 0, 0); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci switch (node->type) { 15862306a36Sopenharmony_ci case IORING_RSRC_FILE: 15962306a36Sopenharmony_ci fput(prsrc->file); 16062306a36Sopenharmony_ci break; 16162306a36Sopenharmony_ci case IORING_RSRC_BUFFER: 16262306a36Sopenharmony_ci io_rsrc_buf_put(node->ctx, prsrc); 16362306a36Sopenharmony_ci break; 16462306a36Sopenharmony_ci default: 16562306a36Sopenharmony_ci WARN_ON_ONCE(1); 16662306a36Sopenharmony_ci break; 16762306a36Sopenharmony_ci } 16862306a36Sopenharmony_ci} 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_civoid io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node) 17162306a36Sopenharmony_ci{ 17262306a36Sopenharmony_ci if (!io_alloc_cache_put(&ctx->rsrc_node_cache, &node->cache)) 17362306a36Sopenharmony_ci kfree(node); 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_civoid io_rsrc_node_ref_zero(struct io_rsrc_node *node) 17762306a36Sopenharmony_ci __must_hold(&node->ctx->uring_lock) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci struct io_ring_ctx *ctx = node->ctx; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci while (!list_empty(&ctx->rsrc_ref_list)) { 18262306a36Sopenharmony_ci node = list_first_entry(&ctx->rsrc_ref_list, 18362306a36Sopenharmony_ci struct io_rsrc_node, node); 18462306a36Sopenharmony_ci /* recycle ref nodes in order */ 18562306a36Sopenharmony_ci if (node->refs) 18662306a36Sopenharmony_ci break; 18762306a36Sopenharmony_ci list_del(&node->node); 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_ci if (likely(!node->empty)) 19062306a36Sopenharmony_ci io_rsrc_put_work(node); 19162306a36Sopenharmony_ci io_rsrc_node_destroy(ctx, node); 19262306a36Sopenharmony_ci } 19362306a36Sopenharmony_ci if (list_empty(&ctx->rsrc_ref_list) && unlikely(ctx->rsrc_quiesce)) 19462306a36Sopenharmony_ci wake_up_all(&ctx->rsrc_quiesce_wq); 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistruct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci struct io_rsrc_node *ref_node; 20062306a36Sopenharmony_ci struct io_cache_entry *entry; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci entry = io_alloc_cache_get(&ctx->rsrc_node_cache); 20362306a36Sopenharmony_ci if (entry) { 20462306a36Sopenharmony_ci ref_node = container_of(entry, struct io_rsrc_node, cache); 20562306a36Sopenharmony_ci } else { 20662306a36Sopenharmony_ci ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); 20762306a36Sopenharmony_ci if (!ref_node) 20862306a36Sopenharmony_ci return NULL; 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci ref_node->ctx = ctx; 21262306a36Sopenharmony_ci ref_node->empty = 0; 21362306a36Sopenharmony_ci ref_node->refs = 1; 21462306a36Sopenharmony_ci return ref_node; 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci__cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, 21862306a36Sopenharmony_ci struct io_ring_ctx *ctx) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci struct io_rsrc_node *backup; 22162306a36Sopenharmony_ci DEFINE_WAIT(we); 22262306a36Sopenharmony_ci int ret; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci /* As We may drop ->uring_lock, other task may have started quiesce */ 22562306a36Sopenharmony_ci if (data->quiesce) 22662306a36Sopenharmony_ci return -ENXIO; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci backup = io_rsrc_node_alloc(ctx); 22962306a36Sopenharmony_ci if (!backup) 23062306a36Sopenharmony_ci return -ENOMEM; 23162306a36Sopenharmony_ci ctx->rsrc_node->empty = true; 23262306a36Sopenharmony_ci ctx->rsrc_node->type = -1; 23362306a36Sopenharmony_ci list_add_tail(&ctx->rsrc_node->node, &ctx->rsrc_ref_list); 23462306a36Sopenharmony_ci io_put_rsrc_node(ctx, ctx->rsrc_node); 23562306a36Sopenharmony_ci ctx->rsrc_node = backup; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci if (list_empty(&ctx->rsrc_ref_list)) 23862306a36Sopenharmony_ci return 0; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { 24162306a36Sopenharmony_ci atomic_set(&ctx->cq_wait_nr, 1); 24262306a36Sopenharmony_ci smp_mb(); 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci ctx->rsrc_quiesce++; 24662306a36Sopenharmony_ci data->quiesce = true; 24762306a36Sopenharmony_ci do { 24862306a36Sopenharmony_ci prepare_to_wait(&ctx->rsrc_quiesce_wq, &we, TASK_INTERRUPTIBLE); 24962306a36Sopenharmony_ci mutex_unlock(&ctx->uring_lock); 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci ret = io_run_task_work_sig(ctx); 25262306a36Sopenharmony_ci if (ret < 0) { 25362306a36Sopenharmony_ci mutex_lock(&ctx->uring_lock); 25462306a36Sopenharmony_ci if (list_empty(&ctx->rsrc_ref_list)) 25562306a36Sopenharmony_ci ret = 0; 25662306a36Sopenharmony_ci break; 25762306a36Sopenharmony_ci } 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci schedule(); 26062306a36Sopenharmony_ci __set_current_state(TASK_RUNNING); 26162306a36Sopenharmony_ci mutex_lock(&ctx->uring_lock); 26262306a36Sopenharmony_ci ret = 0; 26362306a36Sopenharmony_ci } while (!list_empty(&ctx->rsrc_ref_list)); 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci finish_wait(&ctx->rsrc_quiesce_wq, &we); 26662306a36Sopenharmony_ci data->quiesce = false; 26762306a36Sopenharmony_ci ctx->rsrc_quiesce--; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) { 27062306a36Sopenharmony_ci atomic_set(&ctx->cq_wait_nr, 0); 27162306a36Sopenharmony_ci smp_mb(); 27262306a36Sopenharmony_ci } 27362306a36Sopenharmony_ci return ret; 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic void io_free_page_table(void **table, size_t size) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci for (i = 0; i < nr_tables; i++) 28162306a36Sopenharmony_ci kfree(table[i]); 28262306a36Sopenharmony_ci kfree(table); 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_cistatic void io_rsrc_data_free(struct io_rsrc_data *data) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci size_t size = data->nr * sizeof(data->tags[0][0]); 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci if (data->tags) 29062306a36Sopenharmony_ci io_free_page_table((void **)data->tags, size); 29162306a36Sopenharmony_ci kfree(data); 29262306a36Sopenharmony_ci} 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_cistatic __cold void **io_alloc_page_table(size_t size) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); 29762306a36Sopenharmony_ci size_t init_size = size; 29862306a36Sopenharmony_ci void **table; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); 30162306a36Sopenharmony_ci if (!table) 30262306a36Sopenharmony_ci return NULL; 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci for (i = 0; i < nr_tables; i++) { 30562306a36Sopenharmony_ci unsigned int this_size = min_t(size_t, size, PAGE_SIZE); 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_ci table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); 30862306a36Sopenharmony_ci if (!table[i]) { 30962306a36Sopenharmony_ci io_free_page_table(table, init_size); 31062306a36Sopenharmony_ci return NULL; 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci size -= this_size; 31362306a36Sopenharmony_ci } 31462306a36Sopenharmony_ci return table; 31562306a36Sopenharmony_ci} 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, int type, 31862306a36Sopenharmony_ci u64 __user *utags, 31962306a36Sopenharmony_ci unsigned nr, struct io_rsrc_data **pdata) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci struct io_rsrc_data *data; 32262306a36Sopenharmony_ci int ret = 0; 32362306a36Sopenharmony_ci unsigned i; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci data = kzalloc(sizeof(*data), GFP_KERNEL); 32662306a36Sopenharmony_ci if (!data) 32762306a36Sopenharmony_ci return -ENOMEM; 32862306a36Sopenharmony_ci data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); 32962306a36Sopenharmony_ci if (!data->tags) { 33062306a36Sopenharmony_ci kfree(data); 33162306a36Sopenharmony_ci return -ENOMEM; 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci data->nr = nr; 33562306a36Sopenharmony_ci data->ctx = ctx; 33662306a36Sopenharmony_ci data->rsrc_type = type; 33762306a36Sopenharmony_ci if (utags) { 33862306a36Sopenharmony_ci ret = -EFAULT; 33962306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 34062306a36Sopenharmony_ci u64 *tag_slot = io_get_tag_slot(data, i); 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci if (copy_from_user(tag_slot, &utags[i], 34362306a36Sopenharmony_ci sizeof(*tag_slot))) 34462306a36Sopenharmony_ci goto fail; 34562306a36Sopenharmony_ci } 34662306a36Sopenharmony_ci } 34762306a36Sopenharmony_ci *pdata = data; 34862306a36Sopenharmony_ci return 0; 34962306a36Sopenharmony_cifail: 35062306a36Sopenharmony_ci io_rsrc_data_free(data); 35162306a36Sopenharmony_ci return ret; 35262306a36Sopenharmony_ci} 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_cistatic int __io_sqe_files_update(struct io_ring_ctx *ctx, 35562306a36Sopenharmony_ci struct io_uring_rsrc_update2 *up, 35662306a36Sopenharmony_ci unsigned nr_args) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci u64 __user *tags = u64_to_user_ptr(up->tags); 35962306a36Sopenharmony_ci __s32 __user *fds = u64_to_user_ptr(up->data); 36062306a36Sopenharmony_ci struct io_rsrc_data *data = ctx->file_data; 36162306a36Sopenharmony_ci struct io_fixed_file *file_slot; 36262306a36Sopenharmony_ci int fd, i, err = 0; 36362306a36Sopenharmony_ci unsigned int done; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci if (!ctx->file_data) 36662306a36Sopenharmony_ci return -ENXIO; 36762306a36Sopenharmony_ci if (up->offset + nr_args > ctx->nr_user_files) 36862306a36Sopenharmony_ci return -EINVAL; 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci for (done = 0; done < nr_args; done++) { 37162306a36Sopenharmony_ci u64 tag = 0; 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || 37462306a36Sopenharmony_ci copy_from_user(&fd, &fds[done], sizeof(fd))) { 37562306a36Sopenharmony_ci err = -EFAULT; 37662306a36Sopenharmony_ci break; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { 37962306a36Sopenharmony_ci err = -EINVAL; 38062306a36Sopenharmony_ci break; 38162306a36Sopenharmony_ci } 38262306a36Sopenharmony_ci if (fd == IORING_REGISTER_FILES_SKIP) 38362306a36Sopenharmony_ci continue; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci i = array_index_nospec(up->offset + done, ctx->nr_user_files); 38662306a36Sopenharmony_ci file_slot = io_fixed_file_slot(&ctx->file_table, i); 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci if (file_slot->file_ptr) { 38962306a36Sopenharmony_ci err = io_queue_rsrc_removal(data, i, 39062306a36Sopenharmony_ci io_slot_file(file_slot)); 39162306a36Sopenharmony_ci if (err) 39262306a36Sopenharmony_ci break; 39362306a36Sopenharmony_ci file_slot->file_ptr = 0; 39462306a36Sopenharmony_ci io_file_bitmap_clear(&ctx->file_table, i); 39562306a36Sopenharmony_ci } 39662306a36Sopenharmony_ci if (fd != -1) { 39762306a36Sopenharmony_ci struct file *file = fget(fd); 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci if (!file) { 40062306a36Sopenharmony_ci err = -EBADF; 40162306a36Sopenharmony_ci break; 40262306a36Sopenharmony_ci } 40362306a36Sopenharmony_ci /* 40462306a36Sopenharmony_ci * Don't allow io_uring instances to be registered. 40562306a36Sopenharmony_ci */ 40662306a36Sopenharmony_ci if (io_is_uring_fops(file)) { 40762306a36Sopenharmony_ci fput(file); 40862306a36Sopenharmony_ci err = -EBADF; 40962306a36Sopenharmony_ci break; 41062306a36Sopenharmony_ci } 41162306a36Sopenharmony_ci *io_get_tag_slot(data, i) = tag; 41262306a36Sopenharmony_ci io_fixed_file_set(file_slot, file); 41362306a36Sopenharmony_ci io_file_bitmap_set(&ctx->file_table, i); 41462306a36Sopenharmony_ci } 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci return done ? done : err; 41762306a36Sopenharmony_ci} 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cistatic int __io_sqe_buffers_update(struct io_ring_ctx *ctx, 42062306a36Sopenharmony_ci struct io_uring_rsrc_update2 *up, 42162306a36Sopenharmony_ci unsigned int nr_args) 42262306a36Sopenharmony_ci{ 42362306a36Sopenharmony_ci u64 __user *tags = u64_to_user_ptr(up->tags); 42462306a36Sopenharmony_ci struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); 42562306a36Sopenharmony_ci struct page *last_hpage = NULL; 42662306a36Sopenharmony_ci __u32 done; 42762306a36Sopenharmony_ci int i, err; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (!ctx->buf_data) 43062306a36Sopenharmony_ci return -ENXIO; 43162306a36Sopenharmony_ci if (up->offset + nr_args > ctx->nr_user_bufs) 43262306a36Sopenharmony_ci return -EINVAL; 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci for (done = 0; done < nr_args; done++) { 43562306a36Sopenharmony_ci struct io_mapped_ubuf *imu; 43662306a36Sopenharmony_ci u64 tag = 0; 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci err = io_copy_iov(ctx, &iov, iovs, done); 43962306a36Sopenharmony_ci if (err) 44062306a36Sopenharmony_ci break; 44162306a36Sopenharmony_ci if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { 44262306a36Sopenharmony_ci err = -EFAULT; 44362306a36Sopenharmony_ci break; 44462306a36Sopenharmony_ci } 44562306a36Sopenharmony_ci err = io_buffer_validate(&iov); 44662306a36Sopenharmony_ci if (err) 44762306a36Sopenharmony_ci break; 44862306a36Sopenharmony_ci if (!iov.iov_base && tag) { 44962306a36Sopenharmony_ci err = -EINVAL; 45062306a36Sopenharmony_ci break; 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); 45362306a36Sopenharmony_ci if (err) 45462306a36Sopenharmony_ci break; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci i = array_index_nospec(up->offset + done, ctx->nr_user_bufs); 45762306a36Sopenharmony_ci if (ctx->user_bufs[i] != &dummy_ubuf) { 45862306a36Sopenharmony_ci err = io_queue_rsrc_removal(ctx->buf_data, i, 45962306a36Sopenharmony_ci ctx->user_bufs[i]); 46062306a36Sopenharmony_ci if (unlikely(err)) { 46162306a36Sopenharmony_ci io_buffer_unmap(ctx, &imu); 46262306a36Sopenharmony_ci break; 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci ctx->user_bufs[i] = (struct io_mapped_ubuf *)&dummy_ubuf; 46562306a36Sopenharmony_ci } 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci ctx->user_bufs[i] = imu; 46862306a36Sopenharmony_ci *io_get_tag_slot(ctx->buf_data, i) = tag; 46962306a36Sopenharmony_ci } 47062306a36Sopenharmony_ci return done ? done : err; 47162306a36Sopenharmony_ci} 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_cistatic int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, 47462306a36Sopenharmony_ci struct io_uring_rsrc_update2 *up, 47562306a36Sopenharmony_ci unsigned nr_args) 47662306a36Sopenharmony_ci{ 47762306a36Sopenharmony_ci __u32 tmp; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci lockdep_assert_held(&ctx->uring_lock); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci if (check_add_overflow(up->offset, nr_args, &tmp)) 48262306a36Sopenharmony_ci return -EOVERFLOW; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci switch (type) { 48562306a36Sopenharmony_ci case IORING_RSRC_FILE: 48662306a36Sopenharmony_ci return __io_sqe_files_update(ctx, up, nr_args); 48762306a36Sopenharmony_ci case IORING_RSRC_BUFFER: 48862306a36Sopenharmony_ci return __io_sqe_buffers_update(ctx, up, nr_args); 48962306a36Sopenharmony_ci } 49062306a36Sopenharmony_ci return -EINVAL; 49162306a36Sopenharmony_ci} 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ciint io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, 49462306a36Sopenharmony_ci unsigned nr_args) 49562306a36Sopenharmony_ci{ 49662306a36Sopenharmony_ci struct io_uring_rsrc_update2 up; 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci if (!nr_args) 49962306a36Sopenharmony_ci return -EINVAL; 50062306a36Sopenharmony_ci memset(&up, 0, sizeof(up)); 50162306a36Sopenharmony_ci if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) 50262306a36Sopenharmony_ci return -EFAULT; 50362306a36Sopenharmony_ci if (up.resv || up.resv2) 50462306a36Sopenharmony_ci return -EINVAL; 50562306a36Sopenharmony_ci return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); 50662306a36Sopenharmony_ci} 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ciint io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, 50962306a36Sopenharmony_ci unsigned size, unsigned type) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci struct io_uring_rsrc_update2 up; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci if (size != sizeof(up)) 51462306a36Sopenharmony_ci return -EINVAL; 51562306a36Sopenharmony_ci if (copy_from_user(&up, arg, sizeof(up))) 51662306a36Sopenharmony_ci return -EFAULT; 51762306a36Sopenharmony_ci if (!up.nr || up.resv || up.resv2) 51862306a36Sopenharmony_ci return -EINVAL; 51962306a36Sopenharmony_ci return __io_register_rsrc_update(ctx, type, &up, up.nr); 52062306a36Sopenharmony_ci} 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci__cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, 52362306a36Sopenharmony_ci unsigned int size, unsigned int type) 52462306a36Sopenharmony_ci{ 52562306a36Sopenharmony_ci struct io_uring_rsrc_register rr; 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci /* keep it extendible */ 52862306a36Sopenharmony_ci if (size != sizeof(rr)) 52962306a36Sopenharmony_ci return -EINVAL; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci memset(&rr, 0, sizeof(rr)); 53262306a36Sopenharmony_ci if (copy_from_user(&rr, arg, size)) 53362306a36Sopenharmony_ci return -EFAULT; 53462306a36Sopenharmony_ci if (!rr.nr || rr.resv2) 53562306a36Sopenharmony_ci return -EINVAL; 53662306a36Sopenharmony_ci if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE) 53762306a36Sopenharmony_ci return -EINVAL; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci switch (type) { 54062306a36Sopenharmony_ci case IORING_RSRC_FILE: 54162306a36Sopenharmony_ci if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) 54262306a36Sopenharmony_ci break; 54362306a36Sopenharmony_ci return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), 54462306a36Sopenharmony_ci rr.nr, u64_to_user_ptr(rr.tags)); 54562306a36Sopenharmony_ci case IORING_RSRC_BUFFER: 54662306a36Sopenharmony_ci if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) 54762306a36Sopenharmony_ci break; 54862306a36Sopenharmony_ci return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), 54962306a36Sopenharmony_ci rr.nr, u64_to_user_ptr(rr.tags)); 55062306a36Sopenharmony_ci } 55162306a36Sopenharmony_ci return -EINVAL; 55262306a36Sopenharmony_ci} 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ciint io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) 55962306a36Sopenharmony_ci return -EINVAL; 56062306a36Sopenharmony_ci if (sqe->rw_flags || sqe->splice_fd_in) 56162306a36Sopenharmony_ci return -EINVAL; 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci up->offset = READ_ONCE(sqe->off); 56462306a36Sopenharmony_ci up->nr_args = READ_ONCE(sqe->len); 56562306a36Sopenharmony_ci if (!up->nr_args) 56662306a36Sopenharmony_ci return -EINVAL; 56762306a36Sopenharmony_ci up->arg = READ_ONCE(sqe->addr); 56862306a36Sopenharmony_ci return 0; 56962306a36Sopenharmony_ci} 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_cistatic int io_files_update_with_index_alloc(struct io_kiocb *req, 57262306a36Sopenharmony_ci unsigned int issue_flags) 57362306a36Sopenharmony_ci{ 57462306a36Sopenharmony_ci struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 57562306a36Sopenharmony_ci __s32 __user *fds = u64_to_user_ptr(up->arg); 57662306a36Sopenharmony_ci unsigned int done; 57762306a36Sopenharmony_ci struct file *file; 57862306a36Sopenharmony_ci int ret, fd; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (!req->ctx->file_data) 58162306a36Sopenharmony_ci return -ENXIO; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci for (done = 0; done < up->nr_args; done++) { 58462306a36Sopenharmony_ci if (copy_from_user(&fd, &fds[done], sizeof(fd))) { 58562306a36Sopenharmony_ci ret = -EFAULT; 58662306a36Sopenharmony_ci break; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci file = fget(fd); 59062306a36Sopenharmony_ci if (!file) { 59162306a36Sopenharmony_ci ret = -EBADF; 59262306a36Sopenharmony_ci break; 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci ret = io_fixed_fd_install(req, issue_flags, file, 59562306a36Sopenharmony_ci IORING_FILE_INDEX_ALLOC); 59662306a36Sopenharmony_ci if (ret < 0) 59762306a36Sopenharmony_ci break; 59862306a36Sopenharmony_ci if (copy_to_user(&fds[done], &ret, sizeof(ret))) { 59962306a36Sopenharmony_ci __io_close_fixed(req->ctx, issue_flags, ret); 60062306a36Sopenharmony_ci ret = -EFAULT; 60162306a36Sopenharmony_ci break; 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci } 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci if (done) 60662306a36Sopenharmony_ci return done; 60762306a36Sopenharmony_ci return ret; 60862306a36Sopenharmony_ci} 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ciint io_files_update(struct io_kiocb *req, unsigned int issue_flags) 61162306a36Sopenharmony_ci{ 61262306a36Sopenharmony_ci struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update); 61362306a36Sopenharmony_ci struct io_ring_ctx *ctx = req->ctx; 61462306a36Sopenharmony_ci struct io_uring_rsrc_update2 up2; 61562306a36Sopenharmony_ci int ret; 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci up2.offset = up->offset; 61862306a36Sopenharmony_ci up2.data = up->arg; 61962306a36Sopenharmony_ci up2.nr = 0; 62062306a36Sopenharmony_ci up2.tags = 0; 62162306a36Sopenharmony_ci up2.resv = 0; 62262306a36Sopenharmony_ci up2.resv2 = 0; 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci if (up->offset == IORING_FILE_INDEX_ALLOC) { 62562306a36Sopenharmony_ci ret = io_files_update_with_index_alloc(req, issue_flags); 62662306a36Sopenharmony_ci } else { 62762306a36Sopenharmony_ci io_ring_submit_lock(ctx, issue_flags); 62862306a36Sopenharmony_ci ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, 62962306a36Sopenharmony_ci &up2, up->nr_args); 63062306a36Sopenharmony_ci io_ring_submit_unlock(ctx, issue_flags); 63162306a36Sopenharmony_ci } 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci if (ret < 0) 63462306a36Sopenharmony_ci req_set_fail(req); 63562306a36Sopenharmony_ci io_req_set_res(req, ret, 0); 63662306a36Sopenharmony_ci return IOU_OK; 63762306a36Sopenharmony_ci} 63862306a36Sopenharmony_ci 63962306a36Sopenharmony_ciint io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci struct io_ring_ctx *ctx = data->ctx; 64262306a36Sopenharmony_ci struct io_rsrc_node *node = ctx->rsrc_node; 64362306a36Sopenharmony_ci u64 *tag_slot = io_get_tag_slot(data, idx); 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci ctx->rsrc_node = io_rsrc_node_alloc(ctx); 64662306a36Sopenharmony_ci if (unlikely(!ctx->rsrc_node)) { 64762306a36Sopenharmony_ci ctx->rsrc_node = node; 64862306a36Sopenharmony_ci return -ENOMEM; 64962306a36Sopenharmony_ci } 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci node->item.rsrc = rsrc; 65262306a36Sopenharmony_ci node->type = data->rsrc_type; 65362306a36Sopenharmony_ci node->item.tag = *tag_slot; 65462306a36Sopenharmony_ci *tag_slot = 0; 65562306a36Sopenharmony_ci list_add_tail(&node->node, &ctx->rsrc_ref_list); 65662306a36Sopenharmony_ci io_put_rsrc_node(ctx, node); 65762306a36Sopenharmony_ci return 0; 65862306a36Sopenharmony_ci} 65962306a36Sopenharmony_ci 66062306a36Sopenharmony_civoid __io_sqe_files_unregister(struct io_ring_ctx *ctx) 66162306a36Sopenharmony_ci{ 66262306a36Sopenharmony_ci int i; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci for (i = 0; i < ctx->nr_user_files; i++) { 66562306a36Sopenharmony_ci struct file *file = io_file_from_index(&ctx->file_table, i); 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci if (!file) 66862306a36Sopenharmony_ci continue; 66962306a36Sopenharmony_ci io_file_bitmap_clear(&ctx->file_table, i); 67062306a36Sopenharmony_ci fput(file); 67162306a36Sopenharmony_ci } 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci io_free_file_tables(&ctx->file_table); 67462306a36Sopenharmony_ci io_file_table_set_alloc_range(ctx, 0, 0); 67562306a36Sopenharmony_ci io_rsrc_data_free(ctx->file_data); 67662306a36Sopenharmony_ci ctx->file_data = NULL; 67762306a36Sopenharmony_ci ctx->nr_user_files = 0; 67862306a36Sopenharmony_ci} 67962306a36Sopenharmony_ci 68062306a36Sopenharmony_ciint io_sqe_files_unregister(struct io_ring_ctx *ctx) 68162306a36Sopenharmony_ci{ 68262306a36Sopenharmony_ci unsigned nr = ctx->nr_user_files; 68362306a36Sopenharmony_ci int ret; 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci if (!ctx->file_data) 68662306a36Sopenharmony_ci return -ENXIO; 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_ci /* 68962306a36Sopenharmony_ci * Quiesce may unlock ->uring_lock, and while it's not held 69062306a36Sopenharmony_ci * prevent new requests using the table. 69162306a36Sopenharmony_ci */ 69262306a36Sopenharmony_ci ctx->nr_user_files = 0; 69362306a36Sopenharmony_ci ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); 69462306a36Sopenharmony_ci ctx->nr_user_files = nr; 69562306a36Sopenharmony_ci if (!ret) 69662306a36Sopenharmony_ci __io_sqe_files_unregister(ctx); 69762306a36Sopenharmony_ci return ret; 69862306a36Sopenharmony_ci} 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ciint io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, 70162306a36Sopenharmony_ci unsigned nr_args, u64 __user *tags) 70262306a36Sopenharmony_ci{ 70362306a36Sopenharmony_ci __s32 __user *fds = (__s32 __user *) arg; 70462306a36Sopenharmony_ci struct file *file; 70562306a36Sopenharmony_ci int fd, ret; 70662306a36Sopenharmony_ci unsigned i; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci if (ctx->file_data) 70962306a36Sopenharmony_ci return -EBUSY; 71062306a36Sopenharmony_ci if (!nr_args) 71162306a36Sopenharmony_ci return -EINVAL; 71262306a36Sopenharmony_ci if (nr_args > IORING_MAX_FIXED_FILES) 71362306a36Sopenharmony_ci return -EMFILE; 71462306a36Sopenharmony_ci if (nr_args > rlimit(RLIMIT_NOFILE)) 71562306a36Sopenharmony_ci return -EMFILE; 71662306a36Sopenharmony_ci ret = io_rsrc_data_alloc(ctx, IORING_RSRC_FILE, tags, nr_args, 71762306a36Sopenharmony_ci &ctx->file_data); 71862306a36Sopenharmony_ci if (ret) 71962306a36Sopenharmony_ci return ret; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci if (!io_alloc_file_tables(&ctx->file_table, nr_args)) { 72262306a36Sopenharmony_ci io_rsrc_data_free(ctx->file_data); 72362306a36Sopenharmony_ci ctx->file_data = NULL; 72462306a36Sopenharmony_ci return -ENOMEM; 72562306a36Sopenharmony_ci } 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { 72862306a36Sopenharmony_ci struct io_fixed_file *file_slot; 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) { 73162306a36Sopenharmony_ci ret = -EFAULT; 73262306a36Sopenharmony_ci goto fail; 73362306a36Sopenharmony_ci } 73462306a36Sopenharmony_ci /* allow sparse sets */ 73562306a36Sopenharmony_ci if (!fds || fd == -1) { 73662306a36Sopenharmony_ci ret = -EINVAL; 73762306a36Sopenharmony_ci if (unlikely(*io_get_tag_slot(ctx->file_data, i))) 73862306a36Sopenharmony_ci goto fail; 73962306a36Sopenharmony_ci continue; 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci 74262306a36Sopenharmony_ci file = fget(fd); 74362306a36Sopenharmony_ci ret = -EBADF; 74462306a36Sopenharmony_ci if (unlikely(!file)) 74562306a36Sopenharmony_ci goto fail; 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci /* 74862306a36Sopenharmony_ci * Don't allow io_uring instances to be registered. 74962306a36Sopenharmony_ci */ 75062306a36Sopenharmony_ci if (io_is_uring_fops(file)) { 75162306a36Sopenharmony_ci fput(file); 75262306a36Sopenharmony_ci goto fail; 75362306a36Sopenharmony_ci } 75462306a36Sopenharmony_ci file_slot = io_fixed_file_slot(&ctx->file_table, i); 75562306a36Sopenharmony_ci io_fixed_file_set(file_slot, file); 75662306a36Sopenharmony_ci io_file_bitmap_set(&ctx->file_table, i); 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci /* default it to the whole table */ 76062306a36Sopenharmony_ci io_file_table_set_alloc_range(ctx, 0, ctx->nr_user_files); 76162306a36Sopenharmony_ci return 0; 76262306a36Sopenharmony_cifail: 76362306a36Sopenharmony_ci __io_sqe_files_unregister(ctx); 76462306a36Sopenharmony_ci return ret; 76562306a36Sopenharmony_ci} 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_cistatic void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) 76862306a36Sopenharmony_ci{ 76962306a36Sopenharmony_ci io_buffer_unmap(ctx, &prsrc->buf); 77062306a36Sopenharmony_ci prsrc->buf = NULL; 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_civoid __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) 77462306a36Sopenharmony_ci{ 77562306a36Sopenharmony_ci unsigned int i; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci for (i = 0; i < ctx->nr_user_bufs; i++) 77862306a36Sopenharmony_ci io_buffer_unmap(ctx, &ctx->user_bufs[i]); 77962306a36Sopenharmony_ci kfree(ctx->user_bufs); 78062306a36Sopenharmony_ci io_rsrc_data_free(ctx->buf_data); 78162306a36Sopenharmony_ci ctx->user_bufs = NULL; 78262306a36Sopenharmony_ci ctx->buf_data = NULL; 78362306a36Sopenharmony_ci ctx->nr_user_bufs = 0; 78462306a36Sopenharmony_ci} 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ciint io_sqe_buffers_unregister(struct io_ring_ctx *ctx) 78762306a36Sopenharmony_ci{ 78862306a36Sopenharmony_ci unsigned nr = ctx->nr_user_bufs; 78962306a36Sopenharmony_ci int ret; 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci if (!ctx->buf_data) 79262306a36Sopenharmony_ci return -ENXIO; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci /* 79562306a36Sopenharmony_ci * Quiesce may unlock ->uring_lock, and while it's not held 79662306a36Sopenharmony_ci * prevent new requests using the table. 79762306a36Sopenharmony_ci */ 79862306a36Sopenharmony_ci ctx->nr_user_bufs = 0; 79962306a36Sopenharmony_ci ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); 80062306a36Sopenharmony_ci ctx->nr_user_bufs = nr; 80162306a36Sopenharmony_ci if (!ret) 80262306a36Sopenharmony_ci __io_sqe_buffers_unregister(ctx); 80362306a36Sopenharmony_ci return ret; 80462306a36Sopenharmony_ci} 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci/* 80762306a36Sopenharmony_ci * Not super efficient, but this is just a registration time. And we do cache 80862306a36Sopenharmony_ci * the last compound head, so generally we'll only do a full search if we don't 80962306a36Sopenharmony_ci * match that one. 81062306a36Sopenharmony_ci * 81162306a36Sopenharmony_ci * We check if the given compound head page has already been accounted, to 81262306a36Sopenharmony_ci * avoid double accounting it. This allows us to account the full size of the 81362306a36Sopenharmony_ci * page, not just the constituent pages of a huge page. 81462306a36Sopenharmony_ci */ 81562306a36Sopenharmony_cistatic bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, 81662306a36Sopenharmony_ci int nr_pages, struct page *hpage) 81762306a36Sopenharmony_ci{ 81862306a36Sopenharmony_ci int i, j; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci /* check current page array */ 82162306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 82262306a36Sopenharmony_ci if (!PageCompound(pages[i])) 82362306a36Sopenharmony_ci continue; 82462306a36Sopenharmony_ci if (compound_head(pages[i]) == hpage) 82562306a36Sopenharmony_ci return true; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci /* check previously registered pages */ 82962306a36Sopenharmony_ci for (i = 0; i < ctx->nr_user_bufs; i++) { 83062306a36Sopenharmony_ci struct io_mapped_ubuf *imu = ctx->user_bufs[i]; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci for (j = 0; j < imu->nr_bvecs; j++) { 83362306a36Sopenharmony_ci if (!PageCompound(imu->bvec[j].bv_page)) 83462306a36Sopenharmony_ci continue; 83562306a36Sopenharmony_ci if (compound_head(imu->bvec[j].bv_page) == hpage) 83662306a36Sopenharmony_ci return true; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci } 83962306a36Sopenharmony_ci 84062306a36Sopenharmony_ci return false; 84162306a36Sopenharmony_ci} 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_cistatic int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, 84462306a36Sopenharmony_ci int nr_pages, struct io_mapped_ubuf *imu, 84562306a36Sopenharmony_ci struct page **last_hpage) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci int i, ret; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci imu->acct_pages = 0; 85062306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 85162306a36Sopenharmony_ci if (!PageCompound(pages[i])) { 85262306a36Sopenharmony_ci imu->acct_pages++; 85362306a36Sopenharmony_ci } else { 85462306a36Sopenharmony_ci struct page *hpage; 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci hpage = compound_head(pages[i]); 85762306a36Sopenharmony_ci if (hpage == *last_hpage) 85862306a36Sopenharmony_ci continue; 85962306a36Sopenharmony_ci *last_hpage = hpage; 86062306a36Sopenharmony_ci if (headpage_already_acct(ctx, pages, i, hpage)) 86162306a36Sopenharmony_ci continue; 86262306a36Sopenharmony_ci imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; 86362306a36Sopenharmony_ci } 86462306a36Sopenharmony_ci } 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_ci if (!imu->acct_pages) 86762306a36Sopenharmony_ci return 0; 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci ret = io_account_mem(ctx, imu->acct_pages); 87062306a36Sopenharmony_ci if (ret) 87162306a36Sopenharmony_ci imu->acct_pages = 0; 87262306a36Sopenharmony_ci return ret; 87362306a36Sopenharmony_ci} 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_cistruct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages) 87662306a36Sopenharmony_ci{ 87762306a36Sopenharmony_ci unsigned long start, end, nr_pages; 87862306a36Sopenharmony_ci struct page **pages = NULL; 87962306a36Sopenharmony_ci int pret, ret = -ENOMEM; 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 88262306a36Sopenharmony_ci start = ubuf >> PAGE_SHIFT; 88362306a36Sopenharmony_ci nr_pages = end - start; 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); 88662306a36Sopenharmony_ci if (!pages) 88762306a36Sopenharmony_ci goto done; 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci ret = 0; 89062306a36Sopenharmony_ci mmap_read_lock(current->mm); 89162306a36Sopenharmony_ci pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, 89262306a36Sopenharmony_ci pages); 89362306a36Sopenharmony_ci if (pret == nr_pages) 89462306a36Sopenharmony_ci *npages = nr_pages; 89562306a36Sopenharmony_ci else 89662306a36Sopenharmony_ci ret = pret < 0 ? pret : -EFAULT; 89762306a36Sopenharmony_ci 89862306a36Sopenharmony_ci mmap_read_unlock(current->mm); 89962306a36Sopenharmony_ci if (ret) { 90062306a36Sopenharmony_ci /* if we did partial map, release any pages we did get */ 90162306a36Sopenharmony_ci if (pret > 0) 90262306a36Sopenharmony_ci unpin_user_pages(pages, pret); 90362306a36Sopenharmony_ci goto done; 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci ret = 0; 90662306a36Sopenharmony_cidone: 90762306a36Sopenharmony_ci if (ret < 0) { 90862306a36Sopenharmony_ci kvfree(pages); 90962306a36Sopenharmony_ci pages = ERR_PTR(ret); 91062306a36Sopenharmony_ci } 91162306a36Sopenharmony_ci return pages; 91262306a36Sopenharmony_ci} 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_cistatic int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, 91562306a36Sopenharmony_ci struct io_mapped_ubuf **pimu, 91662306a36Sopenharmony_ci struct page **last_hpage) 91762306a36Sopenharmony_ci{ 91862306a36Sopenharmony_ci struct io_mapped_ubuf *imu = NULL; 91962306a36Sopenharmony_ci struct page **pages = NULL; 92062306a36Sopenharmony_ci unsigned long off; 92162306a36Sopenharmony_ci size_t size; 92262306a36Sopenharmony_ci int ret, nr_pages, i; 92362306a36Sopenharmony_ci struct folio *folio = NULL; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci *pimu = (struct io_mapped_ubuf *)&dummy_ubuf; 92662306a36Sopenharmony_ci if (!iov->iov_base) 92762306a36Sopenharmony_ci return 0; 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci ret = -ENOMEM; 93062306a36Sopenharmony_ci pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len, 93162306a36Sopenharmony_ci &nr_pages); 93262306a36Sopenharmony_ci if (IS_ERR(pages)) { 93362306a36Sopenharmony_ci ret = PTR_ERR(pages); 93462306a36Sopenharmony_ci pages = NULL; 93562306a36Sopenharmony_ci goto done; 93662306a36Sopenharmony_ci } 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci /* If it's a huge page, try to coalesce them into a single bvec entry */ 93962306a36Sopenharmony_ci if (nr_pages > 1) { 94062306a36Sopenharmony_ci folio = page_folio(pages[0]); 94162306a36Sopenharmony_ci for (i = 1; i < nr_pages; i++) { 94262306a36Sopenharmony_ci /* 94362306a36Sopenharmony_ci * Pages must be consecutive and on the same folio for 94462306a36Sopenharmony_ci * this to work 94562306a36Sopenharmony_ci */ 94662306a36Sopenharmony_ci if (page_folio(pages[i]) != folio || 94762306a36Sopenharmony_ci pages[i] != pages[i - 1] + 1) { 94862306a36Sopenharmony_ci folio = NULL; 94962306a36Sopenharmony_ci break; 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci } 95262306a36Sopenharmony_ci if (folio) { 95362306a36Sopenharmony_ci /* 95462306a36Sopenharmony_ci * The pages are bound to the folio, it doesn't 95562306a36Sopenharmony_ci * actually unpin them but drops all but one reference, 95662306a36Sopenharmony_ci * which is usually put down by io_buffer_unmap(). 95762306a36Sopenharmony_ci * Note, needs a better helper. 95862306a36Sopenharmony_ci */ 95962306a36Sopenharmony_ci unpin_user_pages(&pages[1], nr_pages - 1); 96062306a36Sopenharmony_ci nr_pages = 1; 96162306a36Sopenharmony_ci } 96262306a36Sopenharmony_ci } 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); 96562306a36Sopenharmony_ci if (!imu) 96662306a36Sopenharmony_ci goto done; 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); 96962306a36Sopenharmony_ci if (ret) { 97062306a36Sopenharmony_ci unpin_user_pages(pages, nr_pages); 97162306a36Sopenharmony_ci goto done; 97262306a36Sopenharmony_ci } 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci off = (unsigned long) iov->iov_base & ~PAGE_MASK; 97562306a36Sopenharmony_ci size = iov->iov_len; 97662306a36Sopenharmony_ci /* store original address for later verification */ 97762306a36Sopenharmony_ci imu->ubuf = (unsigned long) iov->iov_base; 97862306a36Sopenharmony_ci imu->ubuf_end = imu->ubuf + iov->iov_len; 97962306a36Sopenharmony_ci imu->nr_bvecs = nr_pages; 98062306a36Sopenharmony_ci *pimu = imu; 98162306a36Sopenharmony_ci ret = 0; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci if (folio) { 98462306a36Sopenharmony_ci bvec_set_page(&imu->bvec[0], pages[0], size, off); 98562306a36Sopenharmony_ci goto done; 98662306a36Sopenharmony_ci } 98762306a36Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 98862306a36Sopenharmony_ci size_t vec_len; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci vec_len = min_t(size_t, size, PAGE_SIZE - off); 99162306a36Sopenharmony_ci bvec_set_page(&imu->bvec[i], pages[i], vec_len, off); 99262306a36Sopenharmony_ci off = 0; 99362306a36Sopenharmony_ci size -= vec_len; 99462306a36Sopenharmony_ci } 99562306a36Sopenharmony_cidone: 99662306a36Sopenharmony_ci if (ret) 99762306a36Sopenharmony_ci kvfree(imu); 99862306a36Sopenharmony_ci kvfree(pages); 99962306a36Sopenharmony_ci return ret; 100062306a36Sopenharmony_ci} 100162306a36Sopenharmony_ci 100262306a36Sopenharmony_cistatic int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) 100362306a36Sopenharmony_ci{ 100462306a36Sopenharmony_ci ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); 100562306a36Sopenharmony_ci return ctx->user_bufs ? 0 : -ENOMEM; 100662306a36Sopenharmony_ci} 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ciint io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, 100962306a36Sopenharmony_ci unsigned int nr_args, u64 __user *tags) 101062306a36Sopenharmony_ci{ 101162306a36Sopenharmony_ci struct page *last_hpage = NULL; 101262306a36Sopenharmony_ci struct io_rsrc_data *data; 101362306a36Sopenharmony_ci int i, ret; 101462306a36Sopenharmony_ci struct iovec iov; 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci if (ctx->user_bufs) 101962306a36Sopenharmony_ci return -EBUSY; 102062306a36Sopenharmony_ci if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) 102162306a36Sopenharmony_ci return -EINVAL; 102262306a36Sopenharmony_ci ret = io_rsrc_data_alloc(ctx, IORING_RSRC_BUFFER, tags, nr_args, &data); 102362306a36Sopenharmony_ci if (ret) 102462306a36Sopenharmony_ci return ret; 102562306a36Sopenharmony_ci ret = io_buffers_map_alloc(ctx, nr_args); 102662306a36Sopenharmony_ci if (ret) { 102762306a36Sopenharmony_ci io_rsrc_data_free(data); 102862306a36Sopenharmony_ci return ret; 102962306a36Sopenharmony_ci } 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { 103262306a36Sopenharmony_ci if (arg) { 103362306a36Sopenharmony_ci ret = io_copy_iov(ctx, &iov, arg, i); 103462306a36Sopenharmony_ci if (ret) 103562306a36Sopenharmony_ci break; 103662306a36Sopenharmony_ci ret = io_buffer_validate(&iov); 103762306a36Sopenharmony_ci if (ret) 103862306a36Sopenharmony_ci break; 103962306a36Sopenharmony_ci } else { 104062306a36Sopenharmony_ci memset(&iov, 0, sizeof(iov)); 104162306a36Sopenharmony_ci } 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci if (!iov.iov_base && *io_get_tag_slot(data, i)) { 104462306a36Sopenharmony_ci ret = -EINVAL; 104562306a36Sopenharmony_ci break; 104662306a36Sopenharmony_ci } 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_ci ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], 104962306a36Sopenharmony_ci &last_hpage); 105062306a36Sopenharmony_ci if (ret) 105162306a36Sopenharmony_ci break; 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci WARN_ON_ONCE(ctx->buf_data); 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci ctx->buf_data = data; 105762306a36Sopenharmony_ci if (ret) 105862306a36Sopenharmony_ci __io_sqe_buffers_unregister(ctx); 105962306a36Sopenharmony_ci return ret; 106062306a36Sopenharmony_ci} 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ciint io_import_fixed(int ddir, struct iov_iter *iter, 106362306a36Sopenharmony_ci struct io_mapped_ubuf *imu, 106462306a36Sopenharmony_ci u64 buf_addr, size_t len) 106562306a36Sopenharmony_ci{ 106662306a36Sopenharmony_ci u64 buf_end; 106762306a36Sopenharmony_ci size_t offset; 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci if (WARN_ON_ONCE(!imu)) 107062306a36Sopenharmony_ci return -EFAULT; 107162306a36Sopenharmony_ci if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) 107262306a36Sopenharmony_ci return -EFAULT; 107362306a36Sopenharmony_ci /* not inside the mapped region */ 107462306a36Sopenharmony_ci if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) 107562306a36Sopenharmony_ci return -EFAULT; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci /* 107862306a36Sopenharmony_ci * Might not be a start of buffer, set size appropriately 107962306a36Sopenharmony_ci * and advance us to the beginning. 108062306a36Sopenharmony_ci */ 108162306a36Sopenharmony_ci offset = buf_addr - imu->ubuf; 108262306a36Sopenharmony_ci iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len); 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci if (offset) { 108562306a36Sopenharmony_ci /* 108662306a36Sopenharmony_ci * Don't use iov_iter_advance() here, as it's really slow for 108762306a36Sopenharmony_ci * using the latter parts of a big fixed buffer - it iterates 108862306a36Sopenharmony_ci * over each segment manually. We can cheat a bit here, because 108962306a36Sopenharmony_ci * we know that: 109062306a36Sopenharmony_ci * 109162306a36Sopenharmony_ci * 1) it's a BVEC iter, we set it up 109262306a36Sopenharmony_ci * 2) all bvecs are PAGE_SIZE in size, except potentially the 109362306a36Sopenharmony_ci * first and last bvec 109462306a36Sopenharmony_ci * 109562306a36Sopenharmony_ci * So just find our index, and adjust the iterator afterwards. 109662306a36Sopenharmony_ci * If the offset is within the first bvec (or the whole first 109762306a36Sopenharmony_ci * bvec, just use iov_iter_advance(). This makes it easier 109862306a36Sopenharmony_ci * since we can just skip the first segment, which may not 109962306a36Sopenharmony_ci * be PAGE_SIZE aligned. 110062306a36Sopenharmony_ci */ 110162306a36Sopenharmony_ci const struct bio_vec *bvec = imu->bvec; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci if (offset < bvec->bv_len) { 110462306a36Sopenharmony_ci /* 110562306a36Sopenharmony_ci * Note, huge pages buffers consists of one large 110662306a36Sopenharmony_ci * bvec entry and should always go this way. The other 110762306a36Sopenharmony_ci * branch doesn't expect non PAGE_SIZE'd chunks. 110862306a36Sopenharmony_ci */ 110962306a36Sopenharmony_ci iter->bvec = bvec; 111062306a36Sopenharmony_ci iter->nr_segs = bvec->bv_len; 111162306a36Sopenharmony_ci iter->count -= offset; 111262306a36Sopenharmony_ci iter->iov_offset = offset; 111362306a36Sopenharmony_ci } else { 111462306a36Sopenharmony_ci unsigned long seg_skip; 111562306a36Sopenharmony_ci 111662306a36Sopenharmony_ci /* skip first vec */ 111762306a36Sopenharmony_ci offset -= bvec->bv_len; 111862306a36Sopenharmony_ci seg_skip = 1 + (offset >> PAGE_SHIFT); 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci iter->bvec = bvec + seg_skip; 112162306a36Sopenharmony_ci iter->nr_segs -= seg_skip; 112262306a36Sopenharmony_ci iter->count -= bvec->bv_len + offset; 112362306a36Sopenharmony_ci iter->iov_offset = offset & ~PAGE_MASK; 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci } 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci return 0; 112862306a36Sopenharmony_ci} 1129