162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/kernel.h>
362306a36Sopenharmony_ci#include <linux/errno.h>
462306a36Sopenharmony_ci#include <linux/fs.h>
562306a36Sopenharmony_ci#include <linux/file.h>
662306a36Sopenharmony_ci#include <linux/mm.h>
762306a36Sopenharmony_ci#include <linux/slab.h>
862306a36Sopenharmony_ci#include <linux/nospec.h>
962306a36Sopenharmony_ci#include <linux/hugetlb.h>
1062306a36Sopenharmony_ci#include <linux/compat.h>
1162306a36Sopenharmony_ci#include <linux/io_uring.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include <uapi/linux/io_uring.h>
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#include "io_uring.h"
1662306a36Sopenharmony_ci#include "openclose.h"
1762306a36Sopenharmony_ci#include "rsrc.h"
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_cistruct io_rsrc_update {
2062306a36Sopenharmony_ci	struct file			*file;
2162306a36Sopenharmony_ci	u64				arg;
2262306a36Sopenharmony_ci	u32				nr_args;
2362306a36Sopenharmony_ci	u32				offset;
2462306a36Sopenharmony_ci};
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
2762306a36Sopenharmony_cistatic int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
2862306a36Sopenharmony_ci				  struct io_mapped_ubuf **pimu,
2962306a36Sopenharmony_ci				  struct page **last_hpage);
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/* only define max */
3262306a36Sopenharmony_ci#define IORING_MAX_FIXED_FILES	(1U << 20)
3362306a36Sopenharmony_ci#define IORING_MAX_REG_BUFFERS	(1U << 14)
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic const struct io_mapped_ubuf dummy_ubuf = {
3662306a36Sopenharmony_ci	/* set invalid range, so io_import_fixed() fails meeting it */
3762306a36Sopenharmony_ci	.ubuf = -1UL,
3862306a36Sopenharmony_ci	.ubuf_end = 0,
3962306a36Sopenharmony_ci};
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ciint __io_account_mem(struct user_struct *user, unsigned long nr_pages)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	unsigned long page_limit, cur_pages, new_pages;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	if (!nr_pages)
4662306a36Sopenharmony_ci		return 0;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	/* Don't allow more pages than we can safely lock */
4962306a36Sopenharmony_ci	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	cur_pages = atomic_long_read(&user->locked_vm);
5262306a36Sopenharmony_ci	do {
5362306a36Sopenharmony_ci		new_pages = cur_pages + nr_pages;
5462306a36Sopenharmony_ci		if (new_pages > page_limit)
5562306a36Sopenharmony_ci			return -ENOMEM;
5662306a36Sopenharmony_ci	} while (!atomic_long_try_cmpxchg(&user->locked_vm,
5762306a36Sopenharmony_ci					  &cur_pages, new_pages));
5862306a36Sopenharmony_ci	return 0;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistatic void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	if (ctx->user)
6462306a36Sopenharmony_ci		__io_unaccount_mem(ctx->user, nr_pages);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	if (ctx->mm_account)
6762306a36Sopenharmony_ci		atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm);
6862306a36Sopenharmony_ci}
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_cistatic int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci	int ret;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	if (ctx->user) {
7562306a36Sopenharmony_ci		ret = __io_account_mem(ctx->user, nr_pages);
7662306a36Sopenharmony_ci		if (ret)
7762306a36Sopenharmony_ci			return ret;
7862306a36Sopenharmony_ci	}
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	if (ctx->mm_account)
8162306a36Sopenharmony_ci		atomic64_add(nr_pages, &ctx->mm_account->pinned_vm);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	return 0;
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_cistatic int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
8762306a36Sopenharmony_ci		       void __user *arg, unsigned index)
8862306a36Sopenharmony_ci{
8962306a36Sopenharmony_ci	struct iovec __user *src;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
9262306a36Sopenharmony_ci	if (ctx->compat) {
9362306a36Sopenharmony_ci		struct compat_iovec __user *ciovs;
9462306a36Sopenharmony_ci		struct compat_iovec ciov;
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci		ciovs = (struct compat_iovec __user *) arg;
9762306a36Sopenharmony_ci		if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
9862306a36Sopenharmony_ci			return -EFAULT;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci		dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
10162306a36Sopenharmony_ci		dst->iov_len = ciov.iov_len;
10262306a36Sopenharmony_ci		return 0;
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci#endif
10562306a36Sopenharmony_ci	src = (struct iovec __user *) arg;
10662306a36Sopenharmony_ci	if (copy_from_user(dst, &src[index], sizeof(*dst)))
10762306a36Sopenharmony_ci		return -EFAULT;
10862306a36Sopenharmony_ci	return 0;
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic int io_buffer_validate(struct iovec *iov)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1);
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	/*
11662306a36Sopenharmony_ci	 * Don't impose further limits on the size and buffer
11762306a36Sopenharmony_ci	 * constraints here, we'll -EINVAL later when IO is
11862306a36Sopenharmony_ci	 * submitted if they are wrong.
11962306a36Sopenharmony_ci	 */
12062306a36Sopenharmony_ci	if (!iov->iov_base)
12162306a36Sopenharmony_ci		return iov->iov_len ? -EFAULT : 0;
12262306a36Sopenharmony_ci	if (!iov->iov_len)
12362306a36Sopenharmony_ci		return -EFAULT;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/* arbitrary limit, but we need something */
12662306a36Sopenharmony_ci	if (iov->iov_len > SZ_1G)
12762306a36Sopenharmony_ci		return -EFAULT;
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp))
13062306a36Sopenharmony_ci		return -EOVERFLOW;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	return 0;
13362306a36Sopenharmony_ci}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_cistatic void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	struct io_mapped_ubuf *imu = *slot;
13862306a36Sopenharmony_ci	unsigned int i;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	if (imu != &dummy_ubuf) {
14162306a36Sopenharmony_ci		for (i = 0; i < imu->nr_bvecs; i++)
14262306a36Sopenharmony_ci			unpin_user_page(imu->bvec[i].bv_page);
14362306a36Sopenharmony_ci		if (imu->acct_pages)
14462306a36Sopenharmony_ci			io_unaccount_mem(ctx, imu->acct_pages);
14562306a36Sopenharmony_ci		kvfree(imu);
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci	*slot = NULL;
14862306a36Sopenharmony_ci}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistatic void io_rsrc_put_work(struct io_rsrc_node *node)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	struct io_rsrc_put *prsrc = &node->item;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	if (prsrc->tag)
15562306a36Sopenharmony_ci		io_post_aux_cqe(node->ctx, prsrc->tag, 0, 0);
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	switch (node->type) {
15862306a36Sopenharmony_ci	case IORING_RSRC_FILE:
15962306a36Sopenharmony_ci		fput(prsrc->file);
16062306a36Sopenharmony_ci		break;
16162306a36Sopenharmony_ci	case IORING_RSRC_BUFFER:
16262306a36Sopenharmony_ci		io_rsrc_buf_put(node->ctx, prsrc);
16362306a36Sopenharmony_ci		break;
16462306a36Sopenharmony_ci	default:
16562306a36Sopenharmony_ci		WARN_ON_ONCE(1);
16662306a36Sopenharmony_ci		break;
16762306a36Sopenharmony_ci	}
16862306a36Sopenharmony_ci}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_civoid io_rsrc_node_destroy(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	if (!io_alloc_cache_put(&ctx->rsrc_node_cache, &node->cache))
17362306a36Sopenharmony_ci		kfree(node);
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_civoid io_rsrc_node_ref_zero(struct io_rsrc_node *node)
17762306a36Sopenharmony_ci	__must_hold(&node->ctx->uring_lock)
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci	struct io_ring_ctx *ctx = node->ctx;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	while (!list_empty(&ctx->rsrc_ref_list)) {
18262306a36Sopenharmony_ci		node = list_first_entry(&ctx->rsrc_ref_list,
18362306a36Sopenharmony_ci					    struct io_rsrc_node, node);
18462306a36Sopenharmony_ci		/* recycle ref nodes in order */
18562306a36Sopenharmony_ci		if (node->refs)
18662306a36Sopenharmony_ci			break;
18762306a36Sopenharmony_ci		list_del(&node->node);
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci		if (likely(!node->empty))
19062306a36Sopenharmony_ci			io_rsrc_put_work(node);
19162306a36Sopenharmony_ci		io_rsrc_node_destroy(ctx, node);
19262306a36Sopenharmony_ci	}
19362306a36Sopenharmony_ci	if (list_empty(&ctx->rsrc_ref_list) && unlikely(ctx->rsrc_quiesce))
19462306a36Sopenharmony_ci		wake_up_all(&ctx->rsrc_quiesce_wq);
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistruct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	struct io_rsrc_node *ref_node;
20062306a36Sopenharmony_ci	struct io_cache_entry *entry;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	entry = io_alloc_cache_get(&ctx->rsrc_node_cache);
20362306a36Sopenharmony_ci	if (entry) {
20462306a36Sopenharmony_ci		ref_node = container_of(entry, struct io_rsrc_node, cache);
20562306a36Sopenharmony_ci	} else {
20662306a36Sopenharmony_ci		ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL);
20762306a36Sopenharmony_ci		if (!ref_node)
20862306a36Sopenharmony_ci			return NULL;
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	ref_node->ctx = ctx;
21262306a36Sopenharmony_ci	ref_node->empty = 0;
21362306a36Sopenharmony_ci	ref_node->refs = 1;
21462306a36Sopenharmony_ci	return ref_node;
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci__cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data,
21862306a36Sopenharmony_ci				      struct io_ring_ctx *ctx)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	struct io_rsrc_node *backup;
22162306a36Sopenharmony_ci	DEFINE_WAIT(we);
22262306a36Sopenharmony_ci	int ret;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	/* As We may drop ->uring_lock, other task may have started quiesce */
22562306a36Sopenharmony_ci	if (data->quiesce)
22662306a36Sopenharmony_ci		return -ENXIO;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	backup = io_rsrc_node_alloc(ctx);
22962306a36Sopenharmony_ci	if (!backup)
23062306a36Sopenharmony_ci		return -ENOMEM;
23162306a36Sopenharmony_ci	ctx->rsrc_node->empty = true;
23262306a36Sopenharmony_ci	ctx->rsrc_node->type = -1;
23362306a36Sopenharmony_ci	list_add_tail(&ctx->rsrc_node->node, &ctx->rsrc_ref_list);
23462306a36Sopenharmony_ci	io_put_rsrc_node(ctx, ctx->rsrc_node);
23562306a36Sopenharmony_ci	ctx->rsrc_node = backup;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	if (list_empty(&ctx->rsrc_ref_list))
23862306a36Sopenharmony_ci		return 0;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
24162306a36Sopenharmony_ci		atomic_set(&ctx->cq_wait_nr, 1);
24262306a36Sopenharmony_ci		smp_mb();
24362306a36Sopenharmony_ci	}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	ctx->rsrc_quiesce++;
24662306a36Sopenharmony_ci	data->quiesce = true;
24762306a36Sopenharmony_ci	do {
24862306a36Sopenharmony_ci		prepare_to_wait(&ctx->rsrc_quiesce_wq, &we, TASK_INTERRUPTIBLE);
24962306a36Sopenharmony_ci		mutex_unlock(&ctx->uring_lock);
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci		ret = io_run_task_work_sig(ctx);
25262306a36Sopenharmony_ci		if (ret < 0) {
25362306a36Sopenharmony_ci			mutex_lock(&ctx->uring_lock);
25462306a36Sopenharmony_ci			if (list_empty(&ctx->rsrc_ref_list))
25562306a36Sopenharmony_ci				ret = 0;
25662306a36Sopenharmony_ci			break;
25762306a36Sopenharmony_ci		}
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci		schedule();
26062306a36Sopenharmony_ci		__set_current_state(TASK_RUNNING);
26162306a36Sopenharmony_ci		mutex_lock(&ctx->uring_lock);
26262306a36Sopenharmony_ci		ret = 0;
26362306a36Sopenharmony_ci	} while (!list_empty(&ctx->rsrc_ref_list));
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	finish_wait(&ctx->rsrc_quiesce_wq, &we);
26662306a36Sopenharmony_ci	data->quiesce = false;
26762306a36Sopenharmony_ci	ctx->rsrc_quiesce--;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) {
27062306a36Sopenharmony_ci		atomic_set(&ctx->cq_wait_nr, 0);
27162306a36Sopenharmony_ci		smp_mb();
27262306a36Sopenharmony_ci	}
27362306a36Sopenharmony_ci	return ret;
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic void io_free_page_table(void **table, size_t size)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci	unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci	for (i = 0; i < nr_tables; i++)
28162306a36Sopenharmony_ci		kfree(table[i]);
28262306a36Sopenharmony_ci	kfree(table);
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_cistatic void io_rsrc_data_free(struct io_rsrc_data *data)
28662306a36Sopenharmony_ci{
28762306a36Sopenharmony_ci	size_t size = data->nr * sizeof(data->tags[0][0]);
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	if (data->tags)
29062306a36Sopenharmony_ci		io_free_page_table((void **)data->tags, size);
29162306a36Sopenharmony_ci	kfree(data);
29262306a36Sopenharmony_ci}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_cistatic __cold void **io_alloc_page_table(size_t size)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci	unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE);
29762306a36Sopenharmony_ci	size_t init_size = size;
29862306a36Sopenharmony_ci	void **table;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT);
30162306a36Sopenharmony_ci	if (!table)
30262306a36Sopenharmony_ci		return NULL;
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	for (i = 0; i < nr_tables; i++) {
30562306a36Sopenharmony_ci		unsigned int this_size = min_t(size_t, size, PAGE_SIZE);
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci		table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT);
30862306a36Sopenharmony_ci		if (!table[i]) {
30962306a36Sopenharmony_ci			io_free_page_table(table, init_size);
31062306a36Sopenharmony_ci			return NULL;
31162306a36Sopenharmony_ci		}
31262306a36Sopenharmony_ci		size -= this_size;
31362306a36Sopenharmony_ci	}
31462306a36Sopenharmony_ci	return table;
31562306a36Sopenharmony_ci}
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci__cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx, int type,
31862306a36Sopenharmony_ci				     u64 __user *utags,
31962306a36Sopenharmony_ci				     unsigned nr, struct io_rsrc_data **pdata)
32062306a36Sopenharmony_ci{
32162306a36Sopenharmony_ci	struct io_rsrc_data *data;
32262306a36Sopenharmony_ci	int ret = 0;
32362306a36Sopenharmony_ci	unsigned i;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	data = kzalloc(sizeof(*data), GFP_KERNEL);
32662306a36Sopenharmony_ci	if (!data)
32762306a36Sopenharmony_ci		return -ENOMEM;
32862306a36Sopenharmony_ci	data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0]));
32962306a36Sopenharmony_ci	if (!data->tags) {
33062306a36Sopenharmony_ci		kfree(data);
33162306a36Sopenharmony_ci		return -ENOMEM;
33262306a36Sopenharmony_ci	}
33362306a36Sopenharmony_ci
33462306a36Sopenharmony_ci	data->nr = nr;
33562306a36Sopenharmony_ci	data->ctx = ctx;
33662306a36Sopenharmony_ci	data->rsrc_type = type;
33762306a36Sopenharmony_ci	if (utags) {
33862306a36Sopenharmony_ci		ret = -EFAULT;
33962306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
34062306a36Sopenharmony_ci			u64 *tag_slot = io_get_tag_slot(data, i);
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci			if (copy_from_user(tag_slot, &utags[i],
34362306a36Sopenharmony_ci					   sizeof(*tag_slot)))
34462306a36Sopenharmony_ci				goto fail;
34562306a36Sopenharmony_ci		}
34662306a36Sopenharmony_ci	}
34762306a36Sopenharmony_ci	*pdata = data;
34862306a36Sopenharmony_ci	return 0;
34962306a36Sopenharmony_cifail:
35062306a36Sopenharmony_ci	io_rsrc_data_free(data);
35162306a36Sopenharmony_ci	return ret;
35262306a36Sopenharmony_ci}
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_cistatic int __io_sqe_files_update(struct io_ring_ctx *ctx,
35562306a36Sopenharmony_ci				 struct io_uring_rsrc_update2 *up,
35662306a36Sopenharmony_ci				 unsigned nr_args)
35762306a36Sopenharmony_ci{
35862306a36Sopenharmony_ci	u64 __user *tags = u64_to_user_ptr(up->tags);
35962306a36Sopenharmony_ci	__s32 __user *fds = u64_to_user_ptr(up->data);
36062306a36Sopenharmony_ci	struct io_rsrc_data *data = ctx->file_data;
36162306a36Sopenharmony_ci	struct io_fixed_file *file_slot;
36262306a36Sopenharmony_ci	int fd, i, err = 0;
36362306a36Sopenharmony_ci	unsigned int done;
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	if (!ctx->file_data)
36662306a36Sopenharmony_ci		return -ENXIO;
36762306a36Sopenharmony_ci	if (up->offset + nr_args > ctx->nr_user_files)
36862306a36Sopenharmony_ci		return -EINVAL;
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	for (done = 0; done < nr_args; done++) {
37162306a36Sopenharmony_ci		u64 tag = 0;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci		if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) ||
37462306a36Sopenharmony_ci		    copy_from_user(&fd, &fds[done], sizeof(fd))) {
37562306a36Sopenharmony_ci			err = -EFAULT;
37662306a36Sopenharmony_ci			break;
37762306a36Sopenharmony_ci		}
37862306a36Sopenharmony_ci		if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) {
37962306a36Sopenharmony_ci			err = -EINVAL;
38062306a36Sopenharmony_ci			break;
38162306a36Sopenharmony_ci		}
38262306a36Sopenharmony_ci		if (fd == IORING_REGISTER_FILES_SKIP)
38362306a36Sopenharmony_ci			continue;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci		i = array_index_nospec(up->offset + done, ctx->nr_user_files);
38662306a36Sopenharmony_ci		file_slot = io_fixed_file_slot(&ctx->file_table, i);
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci		if (file_slot->file_ptr) {
38962306a36Sopenharmony_ci			err = io_queue_rsrc_removal(data, i,
39062306a36Sopenharmony_ci						    io_slot_file(file_slot));
39162306a36Sopenharmony_ci			if (err)
39262306a36Sopenharmony_ci				break;
39362306a36Sopenharmony_ci			file_slot->file_ptr = 0;
39462306a36Sopenharmony_ci			io_file_bitmap_clear(&ctx->file_table, i);
39562306a36Sopenharmony_ci		}
39662306a36Sopenharmony_ci		if (fd != -1) {
39762306a36Sopenharmony_ci			struct file *file = fget(fd);
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci			if (!file) {
40062306a36Sopenharmony_ci				err = -EBADF;
40162306a36Sopenharmony_ci				break;
40262306a36Sopenharmony_ci			}
40362306a36Sopenharmony_ci			/*
40462306a36Sopenharmony_ci			 * Don't allow io_uring instances to be registered.
40562306a36Sopenharmony_ci			 */
40662306a36Sopenharmony_ci			if (io_is_uring_fops(file)) {
40762306a36Sopenharmony_ci				fput(file);
40862306a36Sopenharmony_ci				err = -EBADF;
40962306a36Sopenharmony_ci				break;
41062306a36Sopenharmony_ci			}
41162306a36Sopenharmony_ci			*io_get_tag_slot(data, i) = tag;
41262306a36Sopenharmony_ci			io_fixed_file_set(file_slot, file);
41362306a36Sopenharmony_ci			io_file_bitmap_set(&ctx->file_table, i);
41462306a36Sopenharmony_ci		}
41562306a36Sopenharmony_ci	}
41662306a36Sopenharmony_ci	return done ? done : err;
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic int __io_sqe_buffers_update(struct io_ring_ctx *ctx,
42062306a36Sopenharmony_ci				   struct io_uring_rsrc_update2 *up,
42162306a36Sopenharmony_ci				   unsigned int nr_args)
42262306a36Sopenharmony_ci{
42362306a36Sopenharmony_ci	u64 __user *tags = u64_to_user_ptr(up->tags);
42462306a36Sopenharmony_ci	struct iovec iov, __user *iovs = u64_to_user_ptr(up->data);
42562306a36Sopenharmony_ci	struct page *last_hpage = NULL;
42662306a36Sopenharmony_ci	__u32 done;
42762306a36Sopenharmony_ci	int i, err;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	if (!ctx->buf_data)
43062306a36Sopenharmony_ci		return -ENXIO;
43162306a36Sopenharmony_ci	if (up->offset + nr_args > ctx->nr_user_bufs)
43262306a36Sopenharmony_ci		return -EINVAL;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	for (done = 0; done < nr_args; done++) {
43562306a36Sopenharmony_ci		struct io_mapped_ubuf *imu;
43662306a36Sopenharmony_ci		u64 tag = 0;
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci		err = io_copy_iov(ctx, &iov, iovs, done);
43962306a36Sopenharmony_ci		if (err)
44062306a36Sopenharmony_ci			break;
44162306a36Sopenharmony_ci		if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) {
44262306a36Sopenharmony_ci			err = -EFAULT;
44362306a36Sopenharmony_ci			break;
44462306a36Sopenharmony_ci		}
44562306a36Sopenharmony_ci		err = io_buffer_validate(&iov);
44662306a36Sopenharmony_ci		if (err)
44762306a36Sopenharmony_ci			break;
44862306a36Sopenharmony_ci		if (!iov.iov_base && tag) {
44962306a36Sopenharmony_ci			err = -EINVAL;
45062306a36Sopenharmony_ci			break;
45162306a36Sopenharmony_ci		}
45262306a36Sopenharmony_ci		err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
45362306a36Sopenharmony_ci		if (err)
45462306a36Sopenharmony_ci			break;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci		i = array_index_nospec(up->offset + done, ctx->nr_user_bufs);
45762306a36Sopenharmony_ci		if (ctx->user_bufs[i] != &dummy_ubuf) {
45862306a36Sopenharmony_ci			err = io_queue_rsrc_removal(ctx->buf_data, i,
45962306a36Sopenharmony_ci						    ctx->user_bufs[i]);
46062306a36Sopenharmony_ci			if (unlikely(err)) {
46162306a36Sopenharmony_ci				io_buffer_unmap(ctx, &imu);
46262306a36Sopenharmony_ci				break;
46362306a36Sopenharmony_ci			}
46462306a36Sopenharmony_ci			ctx->user_bufs[i] = (struct io_mapped_ubuf *)&dummy_ubuf;
46562306a36Sopenharmony_ci		}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci		ctx->user_bufs[i] = imu;
46862306a36Sopenharmony_ci		*io_get_tag_slot(ctx->buf_data, i) = tag;
46962306a36Sopenharmony_ci	}
47062306a36Sopenharmony_ci	return done ? done : err;
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_cistatic int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type,
47462306a36Sopenharmony_ci				     struct io_uring_rsrc_update2 *up,
47562306a36Sopenharmony_ci				     unsigned nr_args)
47662306a36Sopenharmony_ci{
47762306a36Sopenharmony_ci	__u32 tmp;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	lockdep_assert_held(&ctx->uring_lock);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	if (check_add_overflow(up->offset, nr_args, &tmp))
48262306a36Sopenharmony_ci		return -EOVERFLOW;
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	switch (type) {
48562306a36Sopenharmony_ci	case IORING_RSRC_FILE:
48662306a36Sopenharmony_ci		return __io_sqe_files_update(ctx, up, nr_args);
48762306a36Sopenharmony_ci	case IORING_RSRC_BUFFER:
48862306a36Sopenharmony_ci		return __io_sqe_buffers_update(ctx, up, nr_args);
48962306a36Sopenharmony_ci	}
49062306a36Sopenharmony_ci	return -EINVAL;
49162306a36Sopenharmony_ci}
49262306a36Sopenharmony_ci
49362306a36Sopenharmony_ciint io_register_files_update(struct io_ring_ctx *ctx, void __user *arg,
49462306a36Sopenharmony_ci			     unsigned nr_args)
49562306a36Sopenharmony_ci{
49662306a36Sopenharmony_ci	struct io_uring_rsrc_update2 up;
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	if (!nr_args)
49962306a36Sopenharmony_ci		return -EINVAL;
50062306a36Sopenharmony_ci	memset(&up, 0, sizeof(up));
50162306a36Sopenharmony_ci	if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update)))
50262306a36Sopenharmony_ci		return -EFAULT;
50362306a36Sopenharmony_ci	if (up.resv || up.resv2)
50462306a36Sopenharmony_ci		return -EINVAL;
50562306a36Sopenharmony_ci	return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args);
50662306a36Sopenharmony_ci}
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ciint io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg,
50962306a36Sopenharmony_ci			    unsigned size, unsigned type)
51062306a36Sopenharmony_ci{
51162306a36Sopenharmony_ci	struct io_uring_rsrc_update2 up;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	if (size != sizeof(up))
51462306a36Sopenharmony_ci		return -EINVAL;
51562306a36Sopenharmony_ci	if (copy_from_user(&up, arg, sizeof(up)))
51662306a36Sopenharmony_ci		return -EFAULT;
51762306a36Sopenharmony_ci	if (!up.nr || up.resv || up.resv2)
51862306a36Sopenharmony_ci		return -EINVAL;
51962306a36Sopenharmony_ci	return __io_register_rsrc_update(ctx, type, &up, up.nr);
52062306a36Sopenharmony_ci}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci__cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
52362306a36Sopenharmony_ci			    unsigned int size, unsigned int type)
52462306a36Sopenharmony_ci{
52562306a36Sopenharmony_ci	struct io_uring_rsrc_register rr;
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci	/* keep it extendible */
52862306a36Sopenharmony_ci	if (size != sizeof(rr))
52962306a36Sopenharmony_ci		return -EINVAL;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	memset(&rr, 0, sizeof(rr));
53262306a36Sopenharmony_ci	if (copy_from_user(&rr, arg, size))
53362306a36Sopenharmony_ci		return -EFAULT;
53462306a36Sopenharmony_ci	if (!rr.nr || rr.resv2)
53562306a36Sopenharmony_ci		return -EINVAL;
53662306a36Sopenharmony_ci	if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE)
53762306a36Sopenharmony_ci		return -EINVAL;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	switch (type) {
54062306a36Sopenharmony_ci	case IORING_RSRC_FILE:
54162306a36Sopenharmony_ci		if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
54262306a36Sopenharmony_ci			break;
54362306a36Sopenharmony_ci		return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data),
54462306a36Sopenharmony_ci					     rr.nr, u64_to_user_ptr(rr.tags));
54562306a36Sopenharmony_ci	case IORING_RSRC_BUFFER:
54662306a36Sopenharmony_ci		if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data)
54762306a36Sopenharmony_ci			break;
54862306a36Sopenharmony_ci		return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data),
54962306a36Sopenharmony_ci					       rr.nr, u64_to_user_ptr(rr.tags));
55062306a36Sopenharmony_ci	}
55162306a36Sopenharmony_ci	return -EINVAL;
55262306a36Sopenharmony_ci}
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ciint io_files_update_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update);
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
55962306a36Sopenharmony_ci		return -EINVAL;
56062306a36Sopenharmony_ci	if (sqe->rw_flags || sqe->splice_fd_in)
56162306a36Sopenharmony_ci		return -EINVAL;
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	up->offset = READ_ONCE(sqe->off);
56462306a36Sopenharmony_ci	up->nr_args = READ_ONCE(sqe->len);
56562306a36Sopenharmony_ci	if (!up->nr_args)
56662306a36Sopenharmony_ci		return -EINVAL;
56762306a36Sopenharmony_ci	up->arg = READ_ONCE(sqe->addr);
56862306a36Sopenharmony_ci	return 0;
56962306a36Sopenharmony_ci}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_cistatic int io_files_update_with_index_alloc(struct io_kiocb *req,
57262306a36Sopenharmony_ci					    unsigned int issue_flags)
57362306a36Sopenharmony_ci{
57462306a36Sopenharmony_ci	struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update);
57562306a36Sopenharmony_ci	__s32 __user *fds = u64_to_user_ptr(up->arg);
57662306a36Sopenharmony_ci	unsigned int done;
57762306a36Sopenharmony_ci	struct file *file;
57862306a36Sopenharmony_ci	int ret, fd;
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci	if (!req->ctx->file_data)
58162306a36Sopenharmony_ci		return -ENXIO;
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci	for (done = 0; done < up->nr_args; done++) {
58462306a36Sopenharmony_ci		if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
58562306a36Sopenharmony_ci			ret = -EFAULT;
58662306a36Sopenharmony_ci			break;
58762306a36Sopenharmony_ci		}
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci		file = fget(fd);
59062306a36Sopenharmony_ci		if (!file) {
59162306a36Sopenharmony_ci			ret = -EBADF;
59262306a36Sopenharmony_ci			break;
59362306a36Sopenharmony_ci		}
59462306a36Sopenharmony_ci		ret = io_fixed_fd_install(req, issue_flags, file,
59562306a36Sopenharmony_ci					  IORING_FILE_INDEX_ALLOC);
59662306a36Sopenharmony_ci		if (ret < 0)
59762306a36Sopenharmony_ci			break;
59862306a36Sopenharmony_ci		if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
59962306a36Sopenharmony_ci			__io_close_fixed(req->ctx, issue_flags, ret);
60062306a36Sopenharmony_ci			ret = -EFAULT;
60162306a36Sopenharmony_ci			break;
60262306a36Sopenharmony_ci		}
60362306a36Sopenharmony_ci	}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	if (done)
60662306a36Sopenharmony_ci		return done;
60762306a36Sopenharmony_ci	return ret;
60862306a36Sopenharmony_ci}
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ciint io_files_update(struct io_kiocb *req, unsigned int issue_flags)
61162306a36Sopenharmony_ci{
61262306a36Sopenharmony_ci	struct io_rsrc_update *up = io_kiocb_to_cmd(req, struct io_rsrc_update);
61362306a36Sopenharmony_ci	struct io_ring_ctx *ctx = req->ctx;
61462306a36Sopenharmony_ci	struct io_uring_rsrc_update2 up2;
61562306a36Sopenharmony_ci	int ret;
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	up2.offset = up->offset;
61862306a36Sopenharmony_ci	up2.data = up->arg;
61962306a36Sopenharmony_ci	up2.nr = 0;
62062306a36Sopenharmony_ci	up2.tags = 0;
62162306a36Sopenharmony_ci	up2.resv = 0;
62262306a36Sopenharmony_ci	up2.resv2 = 0;
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	if (up->offset == IORING_FILE_INDEX_ALLOC) {
62562306a36Sopenharmony_ci		ret = io_files_update_with_index_alloc(req, issue_flags);
62662306a36Sopenharmony_ci	} else {
62762306a36Sopenharmony_ci		io_ring_submit_lock(ctx, issue_flags);
62862306a36Sopenharmony_ci		ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE,
62962306a36Sopenharmony_ci						&up2, up->nr_args);
63062306a36Sopenharmony_ci		io_ring_submit_unlock(ctx, issue_flags);
63162306a36Sopenharmony_ci	}
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_ci	if (ret < 0)
63462306a36Sopenharmony_ci		req_set_fail(req);
63562306a36Sopenharmony_ci	io_req_set_res(req, ret, 0);
63662306a36Sopenharmony_ci	return IOU_OK;
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ciint io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, void *rsrc)
64062306a36Sopenharmony_ci{
64162306a36Sopenharmony_ci	struct io_ring_ctx *ctx = data->ctx;
64262306a36Sopenharmony_ci	struct io_rsrc_node *node = ctx->rsrc_node;
64362306a36Sopenharmony_ci	u64 *tag_slot = io_get_tag_slot(data, idx);
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	ctx->rsrc_node = io_rsrc_node_alloc(ctx);
64662306a36Sopenharmony_ci	if (unlikely(!ctx->rsrc_node)) {
64762306a36Sopenharmony_ci		ctx->rsrc_node = node;
64862306a36Sopenharmony_ci		return -ENOMEM;
64962306a36Sopenharmony_ci	}
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_ci	node->item.rsrc = rsrc;
65262306a36Sopenharmony_ci	node->type = data->rsrc_type;
65362306a36Sopenharmony_ci	node->item.tag = *tag_slot;
65462306a36Sopenharmony_ci	*tag_slot = 0;
65562306a36Sopenharmony_ci	list_add_tail(&node->node, &ctx->rsrc_ref_list);
65662306a36Sopenharmony_ci	io_put_rsrc_node(ctx, node);
65762306a36Sopenharmony_ci	return 0;
65862306a36Sopenharmony_ci}
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_civoid __io_sqe_files_unregister(struct io_ring_ctx *ctx)
66162306a36Sopenharmony_ci{
66262306a36Sopenharmony_ci	int i;
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci	for (i = 0; i < ctx->nr_user_files; i++) {
66562306a36Sopenharmony_ci		struct file *file = io_file_from_index(&ctx->file_table, i);
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci		if (!file)
66862306a36Sopenharmony_ci			continue;
66962306a36Sopenharmony_ci		io_file_bitmap_clear(&ctx->file_table, i);
67062306a36Sopenharmony_ci		fput(file);
67162306a36Sopenharmony_ci	}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	io_free_file_tables(&ctx->file_table);
67462306a36Sopenharmony_ci	io_file_table_set_alloc_range(ctx, 0, 0);
67562306a36Sopenharmony_ci	io_rsrc_data_free(ctx->file_data);
67662306a36Sopenharmony_ci	ctx->file_data = NULL;
67762306a36Sopenharmony_ci	ctx->nr_user_files = 0;
67862306a36Sopenharmony_ci}
67962306a36Sopenharmony_ci
68062306a36Sopenharmony_ciint io_sqe_files_unregister(struct io_ring_ctx *ctx)
68162306a36Sopenharmony_ci{
68262306a36Sopenharmony_ci	unsigned nr = ctx->nr_user_files;
68362306a36Sopenharmony_ci	int ret;
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	if (!ctx->file_data)
68662306a36Sopenharmony_ci		return -ENXIO;
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	/*
68962306a36Sopenharmony_ci	 * Quiesce may unlock ->uring_lock, and while it's not held
69062306a36Sopenharmony_ci	 * prevent new requests using the table.
69162306a36Sopenharmony_ci	 */
69262306a36Sopenharmony_ci	ctx->nr_user_files = 0;
69362306a36Sopenharmony_ci	ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
69462306a36Sopenharmony_ci	ctx->nr_user_files = nr;
69562306a36Sopenharmony_ci	if (!ret)
69662306a36Sopenharmony_ci		__io_sqe_files_unregister(ctx);
69762306a36Sopenharmony_ci	return ret;
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ciint io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
70162306a36Sopenharmony_ci			  unsigned nr_args, u64 __user *tags)
70262306a36Sopenharmony_ci{
70362306a36Sopenharmony_ci	__s32 __user *fds = (__s32 __user *) arg;
70462306a36Sopenharmony_ci	struct file *file;
70562306a36Sopenharmony_ci	int fd, ret;
70662306a36Sopenharmony_ci	unsigned i;
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	if (ctx->file_data)
70962306a36Sopenharmony_ci		return -EBUSY;
71062306a36Sopenharmony_ci	if (!nr_args)
71162306a36Sopenharmony_ci		return -EINVAL;
71262306a36Sopenharmony_ci	if (nr_args > IORING_MAX_FIXED_FILES)
71362306a36Sopenharmony_ci		return -EMFILE;
71462306a36Sopenharmony_ci	if (nr_args > rlimit(RLIMIT_NOFILE))
71562306a36Sopenharmony_ci		return -EMFILE;
71662306a36Sopenharmony_ci	ret = io_rsrc_data_alloc(ctx, IORING_RSRC_FILE, tags, nr_args,
71762306a36Sopenharmony_ci				 &ctx->file_data);
71862306a36Sopenharmony_ci	if (ret)
71962306a36Sopenharmony_ci		return ret;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	if (!io_alloc_file_tables(&ctx->file_table, nr_args)) {
72262306a36Sopenharmony_ci		io_rsrc_data_free(ctx->file_data);
72362306a36Sopenharmony_ci		ctx->file_data = NULL;
72462306a36Sopenharmony_ci		return -ENOMEM;
72562306a36Sopenharmony_ci	}
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	for (i = 0; i < nr_args; i++, ctx->nr_user_files++) {
72862306a36Sopenharmony_ci		struct io_fixed_file *file_slot;
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci		if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) {
73162306a36Sopenharmony_ci			ret = -EFAULT;
73262306a36Sopenharmony_ci			goto fail;
73362306a36Sopenharmony_ci		}
73462306a36Sopenharmony_ci		/* allow sparse sets */
73562306a36Sopenharmony_ci		if (!fds || fd == -1) {
73662306a36Sopenharmony_ci			ret = -EINVAL;
73762306a36Sopenharmony_ci			if (unlikely(*io_get_tag_slot(ctx->file_data, i)))
73862306a36Sopenharmony_ci				goto fail;
73962306a36Sopenharmony_ci			continue;
74062306a36Sopenharmony_ci		}
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci		file = fget(fd);
74362306a36Sopenharmony_ci		ret = -EBADF;
74462306a36Sopenharmony_ci		if (unlikely(!file))
74562306a36Sopenharmony_ci			goto fail;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci		/*
74862306a36Sopenharmony_ci		 * Don't allow io_uring instances to be registered.
74962306a36Sopenharmony_ci		 */
75062306a36Sopenharmony_ci		if (io_is_uring_fops(file)) {
75162306a36Sopenharmony_ci			fput(file);
75262306a36Sopenharmony_ci			goto fail;
75362306a36Sopenharmony_ci		}
75462306a36Sopenharmony_ci		file_slot = io_fixed_file_slot(&ctx->file_table, i);
75562306a36Sopenharmony_ci		io_fixed_file_set(file_slot, file);
75662306a36Sopenharmony_ci		io_file_bitmap_set(&ctx->file_table, i);
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	/* default it to the whole table */
76062306a36Sopenharmony_ci	io_file_table_set_alloc_range(ctx, 0, ctx->nr_user_files);
76162306a36Sopenharmony_ci	return 0;
76262306a36Sopenharmony_cifail:
76362306a36Sopenharmony_ci	__io_sqe_files_unregister(ctx);
76462306a36Sopenharmony_ci	return ret;
76562306a36Sopenharmony_ci}
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_cistatic void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc)
76862306a36Sopenharmony_ci{
76962306a36Sopenharmony_ci	io_buffer_unmap(ctx, &prsrc->buf);
77062306a36Sopenharmony_ci	prsrc->buf = NULL;
77162306a36Sopenharmony_ci}
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_civoid __io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
77462306a36Sopenharmony_ci{
77562306a36Sopenharmony_ci	unsigned int i;
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	for (i = 0; i < ctx->nr_user_bufs; i++)
77862306a36Sopenharmony_ci		io_buffer_unmap(ctx, &ctx->user_bufs[i]);
77962306a36Sopenharmony_ci	kfree(ctx->user_bufs);
78062306a36Sopenharmony_ci	io_rsrc_data_free(ctx->buf_data);
78162306a36Sopenharmony_ci	ctx->user_bufs = NULL;
78262306a36Sopenharmony_ci	ctx->buf_data = NULL;
78362306a36Sopenharmony_ci	ctx->nr_user_bufs = 0;
78462306a36Sopenharmony_ci}
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ciint io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
78762306a36Sopenharmony_ci{
78862306a36Sopenharmony_ci	unsigned nr = ctx->nr_user_bufs;
78962306a36Sopenharmony_ci	int ret;
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci	if (!ctx->buf_data)
79262306a36Sopenharmony_ci		return -ENXIO;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	/*
79562306a36Sopenharmony_ci	 * Quiesce may unlock ->uring_lock, and while it's not held
79662306a36Sopenharmony_ci	 * prevent new requests using the table.
79762306a36Sopenharmony_ci	 */
79862306a36Sopenharmony_ci	ctx->nr_user_bufs = 0;
79962306a36Sopenharmony_ci	ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
80062306a36Sopenharmony_ci	ctx->nr_user_bufs = nr;
80162306a36Sopenharmony_ci	if (!ret)
80262306a36Sopenharmony_ci		__io_sqe_buffers_unregister(ctx);
80362306a36Sopenharmony_ci	return ret;
80462306a36Sopenharmony_ci}
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci/*
80762306a36Sopenharmony_ci * Not super efficient, but this is just a registration time. And we do cache
80862306a36Sopenharmony_ci * the last compound head, so generally we'll only do a full search if we don't
80962306a36Sopenharmony_ci * match that one.
81062306a36Sopenharmony_ci *
81162306a36Sopenharmony_ci * We check if the given compound head page has already been accounted, to
81262306a36Sopenharmony_ci * avoid double accounting it. This allows us to account the full size of the
81362306a36Sopenharmony_ci * page, not just the constituent pages of a huge page.
81462306a36Sopenharmony_ci */
81562306a36Sopenharmony_cistatic bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages,
81662306a36Sopenharmony_ci				  int nr_pages, struct page *hpage)
81762306a36Sopenharmony_ci{
81862306a36Sopenharmony_ci	int i, j;
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci	/* check current page array */
82162306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++) {
82262306a36Sopenharmony_ci		if (!PageCompound(pages[i]))
82362306a36Sopenharmony_ci			continue;
82462306a36Sopenharmony_ci		if (compound_head(pages[i]) == hpage)
82562306a36Sopenharmony_ci			return true;
82662306a36Sopenharmony_ci	}
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	/* check previously registered pages */
82962306a36Sopenharmony_ci	for (i = 0; i < ctx->nr_user_bufs; i++) {
83062306a36Sopenharmony_ci		struct io_mapped_ubuf *imu = ctx->user_bufs[i];
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci		for (j = 0; j < imu->nr_bvecs; j++) {
83362306a36Sopenharmony_ci			if (!PageCompound(imu->bvec[j].bv_page))
83462306a36Sopenharmony_ci				continue;
83562306a36Sopenharmony_ci			if (compound_head(imu->bvec[j].bv_page) == hpage)
83662306a36Sopenharmony_ci				return true;
83762306a36Sopenharmony_ci		}
83862306a36Sopenharmony_ci	}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci	return false;
84162306a36Sopenharmony_ci}
84262306a36Sopenharmony_ci
84362306a36Sopenharmony_cistatic int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages,
84462306a36Sopenharmony_ci				 int nr_pages, struct io_mapped_ubuf *imu,
84562306a36Sopenharmony_ci				 struct page **last_hpage)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	int i, ret;
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	imu->acct_pages = 0;
85062306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++) {
85162306a36Sopenharmony_ci		if (!PageCompound(pages[i])) {
85262306a36Sopenharmony_ci			imu->acct_pages++;
85362306a36Sopenharmony_ci		} else {
85462306a36Sopenharmony_ci			struct page *hpage;
85562306a36Sopenharmony_ci
85662306a36Sopenharmony_ci			hpage = compound_head(pages[i]);
85762306a36Sopenharmony_ci			if (hpage == *last_hpage)
85862306a36Sopenharmony_ci				continue;
85962306a36Sopenharmony_ci			*last_hpage = hpage;
86062306a36Sopenharmony_ci			if (headpage_already_acct(ctx, pages, i, hpage))
86162306a36Sopenharmony_ci				continue;
86262306a36Sopenharmony_ci			imu->acct_pages += page_size(hpage) >> PAGE_SHIFT;
86362306a36Sopenharmony_ci		}
86462306a36Sopenharmony_ci	}
86562306a36Sopenharmony_ci
86662306a36Sopenharmony_ci	if (!imu->acct_pages)
86762306a36Sopenharmony_ci		return 0;
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	ret = io_account_mem(ctx, imu->acct_pages);
87062306a36Sopenharmony_ci	if (ret)
87162306a36Sopenharmony_ci		imu->acct_pages = 0;
87262306a36Sopenharmony_ci	return ret;
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_cistruct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages)
87662306a36Sopenharmony_ci{
87762306a36Sopenharmony_ci	unsigned long start, end, nr_pages;
87862306a36Sopenharmony_ci	struct page **pages = NULL;
87962306a36Sopenharmony_ci	int pret, ret = -ENOMEM;
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
88262306a36Sopenharmony_ci	start = ubuf >> PAGE_SHIFT;
88362306a36Sopenharmony_ci	nr_pages = end - start;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
88662306a36Sopenharmony_ci	if (!pages)
88762306a36Sopenharmony_ci		goto done;
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci	ret = 0;
89062306a36Sopenharmony_ci	mmap_read_lock(current->mm);
89162306a36Sopenharmony_ci	pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
89262306a36Sopenharmony_ci			      pages);
89362306a36Sopenharmony_ci	if (pret == nr_pages)
89462306a36Sopenharmony_ci		*npages = nr_pages;
89562306a36Sopenharmony_ci	else
89662306a36Sopenharmony_ci		ret = pret < 0 ? pret : -EFAULT;
89762306a36Sopenharmony_ci
89862306a36Sopenharmony_ci	mmap_read_unlock(current->mm);
89962306a36Sopenharmony_ci	if (ret) {
90062306a36Sopenharmony_ci		/* if we did partial map, release any pages we did get */
90162306a36Sopenharmony_ci		if (pret > 0)
90262306a36Sopenharmony_ci			unpin_user_pages(pages, pret);
90362306a36Sopenharmony_ci		goto done;
90462306a36Sopenharmony_ci	}
90562306a36Sopenharmony_ci	ret = 0;
90662306a36Sopenharmony_cidone:
90762306a36Sopenharmony_ci	if (ret < 0) {
90862306a36Sopenharmony_ci		kvfree(pages);
90962306a36Sopenharmony_ci		pages = ERR_PTR(ret);
91062306a36Sopenharmony_ci	}
91162306a36Sopenharmony_ci	return pages;
91262306a36Sopenharmony_ci}
91362306a36Sopenharmony_ci
91462306a36Sopenharmony_cistatic int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
91562306a36Sopenharmony_ci				  struct io_mapped_ubuf **pimu,
91662306a36Sopenharmony_ci				  struct page **last_hpage)
91762306a36Sopenharmony_ci{
91862306a36Sopenharmony_ci	struct io_mapped_ubuf *imu = NULL;
91962306a36Sopenharmony_ci	struct page **pages = NULL;
92062306a36Sopenharmony_ci	unsigned long off;
92162306a36Sopenharmony_ci	size_t size;
92262306a36Sopenharmony_ci	int ret, nr_pages, i;
92362306a36Sopenharmony_ci	struct folio *folio = NULL;
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	*pimu = (struct io_mapped_ubuf *)&dummy_ubuf;
92662306a36Sopenharmony_ci	if (!iov->iov_base)
92762306a36Sopenharmony_ci		return 0;
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ci	ret = -ENOMEM;
93062306a36Sopenharmony_ci	pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
93162306a36Sopenharmony_ci				&nr_pages);
93262306a36Sopenharmony_ci	if (IS_ERR(pages)) {
93362306a36Sopenharmony_ci		ret = PTR_ERR(pages);
93462306a36Sopenharmony_ci		pages = NULL;
93562306a36Sopenharmony_ci		goto done;
93662306a36Sopenharmony_ci	}
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	/* If it's a huge page, try to coalesce them into a single bvec entry */
93962306a36Sopenharmony_ci	if (nr_pages > 1) {
94062306a36Sopenharmony_ci		folio = page_folio(pages[0]);
94162306a36Sopenharmony_ci		for (i = 1; i < nr_pages; i++) {
94262306a36Sopenharmony_ci			/*
94362306a36Sopenharmony_ci			 * Pages must be consecutive and on the same folio for
94462306a36Sopenharmony_ci			 * this to work
94562306a36Sopenharmony_ci			 */
94662306a36Sopenharmony_ci			if (page_folio(pages[i]) != folio ||
94762306a36Sopenharmony_ci			    pages[i] != pages[i - 1] + 1) {
94862306a36Sopenharmony_ci				folio = NULL;
94962306a36Sopenharmony_ci				break;
95062306a36Sopenharmony_ci			}
95162306a36Sopenharmony_ci		}
95262306a36Sopenharmony_ci		if (folio) {
95362306a36Sopenharmony_ci			/*
95462306a36Sopenharmony_ci			 * The pages are bound to the folio, it doesn't
95562306a36Sopenharmony_ci			 * actually unpin them but drops all but one reference,
95662306a36Sopenharmony_ci			 * which is usually put down by io_buffer_unmap().
95762306a36Sopenharmony_ci			 * Note, needs a better helper.
95862306a36Sopenharmony_ci			 */
95962306a36Sopenharmony_ci			unpin_user_pages(&pages[1], nr_pages - 1);
96062306a36Sopenharmony_ci			nr_pages = 1;
96162306a36Sopenharmony_ci		}
96262306a36Sopenharmony_ci	}
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci	imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
96562306a36Sopenharmony_ci	if (!imu)
96662306a36Sopenharmony_ci		goto done;
96762306a36Sopenharmony_ci
96862306a36Sopenharmony_ci	ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
96962306a36Sopenharmony_ci	if (ret) {
97062306a36Sopenharmony_ci		unpin_user_pages(pages, nr_pages);
97162306a36Sopenharmony_ci		goto done;
97262306a36Sopenharmony_ci	}
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci	off = (unsigned long) iov->iov_base & ~PAGE_MASK;
97562306a36Sopenharmony_ci	size = iov->iov_len;
97662306a36Sopenharmony_ci	/* store original address for later verification */
97762306a36Sopenharmony_ci	imu->ubuf = (unsigned long) iov->iov_base;
97862306a36Sopenharmony_ci	imu->ubuf_end = imu->ubuf + iov->iov_len;
97962306a36Sopenharmony_ci	imu->nr_bvecs = nr_pages;
98062306a36Sopenharmony_ci	*pimu = imu;
98162306a36Sopenharmony_ci	ret = 0;
98262306a36Sopenharmony_ci
98362306a36Sopenharmony_ci	if (folio) {
98462306a36Sopenharmony_ci		bvec_set_page(&imu->bvec[0], pages[0], size, off);
98562306a36Sopenharmony_ci		goto done;
98662306a36Sopenharmony_ci	}
98762306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++) {
98862306a36Sopenharmony_ci		size_t vec_len;
98962306a36Sopenharmony_ci
99062306a36Sopenharmony_ci		vec_len = min_t(size_t, size, PAGE_SIZE - off);
99162306a36Sopenharmony_ci		bvec_set_page(&imu->bvec[i], pages[i], vec_len, off);
99262306a36Sopenharmony_ci		off = 0;
99362306a36Sopenharmony_ci		size -= vec_len;
99462306a36Sopenharmony_ci	}
99562306a36Sopenharmony_cidone:
99662306a36Sopenharmony_ci	if (ret)
99762306a36Sopenharmony_ci		kvfree(imu);
99862306a36Sopenharmony_ci	kvfree(pages);
99962306a36Sopenharmony_ci	return ret;
100062306a36Sopenharmony_ci}
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_cistatic int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args)
100362306a36Sopenharmony_ci{
100462306a36Sopenharmony_ci	ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL);
100562306a36Sopenharmony_ci	return ctx->user_bufs ? 0 : -ENOMEM;
100662306a36Sopenharmony_ci}
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ciint io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
100962306a36Sopenharmony_ci			    unsigned int nr_args, u64 __user *tags)
101062306a36Sopenharmony_ci{
101162306a36Sopenharmony_ci	struct page *last_hpage = NULL;
101262306a36Sopenharmony_ci	struct io_rsrc_data *data;
101362306a36Sopenharmony_ci	int i, ret;
101462306a36Sopenharmony_ci	struct iovec iov;
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_ci	BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16));
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci	if (ctx->user_bufs)
101962306a36Sopenharmony_ci		return -EBUSY;
102062306a36Sopenharmony_ci	if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
102162306a36Sopenharmony_ci		return -EINVAL;
102262306a36Sopenharmony_ci	ret = io_rsrc_data_alloc(ctx, IORING_RSRC_BUFFER, tags, nr_args, &data);
102362306a36Sopenharmony_ci	if (ret)
102462306a36Sopenharmony_ci		return ret;
102562306a36Sopenharmony_ci	ret = io_buffers_map_alloc(ctx, nr_args);
102662306a36Sopenharmony_ci	if (ret) {
102762306a36Sopenharmony_ci		io_rsrc_data_free(data);
102862306a36Sopenharmony_ci		return ret;
102962306a36Sopenharmony_ci	}
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci	for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) {
103262306a36Sopenharmony_ci		if (arg) {
103362306a36Sopenharmony_ci			ret = io_copy_iov(ctx, &iov, arg, i);
103462306a36Sopenharmony_ci			if (ret)
103562306a36Sopenharmony_ci				break;
103662306a36Sopenharmony_ci			ret = io_buffer_validate(&iov);
103762306a36Sopenharmony_ci			if (ret)
103862306a36Sopenharmony_ci				break;
103962306a36Sopenharmony_ci		} else {
104062306a36Sopenharmony_ci			memset(&iov, 0, sizeof(iov));
104162306a36Sopenharmony_ci		}
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci		if (!iov.iov_base && *io_get_tag_slot(data, i)) {
104462306a36Sopenharmony_ci			ret = -EINVAL;
104562306a36Sopenharmony_ci			break;
104662306a36Sopenharmony_ci		}
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci		ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
104962306a36Sopenharmony_ci					     &last_hpage);
105062306a36Sopenharmony_ci		if (ret)
105162306a36Sopenharmony_ci			break;
105262306a36Sopenharmony_ci	}
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	WARN_ON_ONCE(ctx->buf_data);
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	ctx->buf_data = data;
105762306a36Sopenharmony_ci	if (ret)
105862306a36Sopenharmony_ci		__io_sqe_buffers_unregister(ctx);
105962306a36Sopenharmony_ci	return ret;
106062306a36Sopenharmony_ci}
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ciint io_import_fixed(int ddir, struct iov_iter *iter,
106362306a36Sopenharmony_ci			   struct io_mapped_ubuf *imu,
106462306a36Sopenharmony_ci			   u64 buf_addr, size_t len)
106562306a36Sopenharmony_ci{
106662306a36Sopenharmony_ci	u64 buf_end;
106762306a36Sopenharmony_ci	size_t offset;
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ci	if (WARN_ON_ONCE(!imu))
107062306a36Sopenharmony_ci		return -EFAULT;
107162306a36Sopenharmony_ci	if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
107262306a36Sopenharmony_ci		return -EFAULT;
107362306a36Sopenharmony_ci	/* not inside the mapped region */
107462306a36Sopenharmony_ci	if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end))
107562306a36Sopenharmony_ci		return -EFAULT;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci	/*
107862306a36Sopenharmony_ci	 * Might not be a start of buffer, set size appropriately
107962306a36Sopenharmony_ci	 * and advance us to the beginning.
108062306a36Sopenharmony_ci	 */
108162306a36Sopenharmony_ci	offset = buf_addr - imu->ubuf;
108262306a36Sopenharmony_ci	iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len);
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	if (offset) {
108562306a36Sopenharmony_ci		/*
108662306a36Sopenharmony_ci		 * Don't use iov_iter_advance() here, as it's really slow for
108762306a36Sopenharmony_ci		 * using the latter parts of a big fixed buffer - it iterates
108862306a36Sopenharmony_ci		 * over each segment manually. We can cheat a bit here, because
108962306a36Sopenharmony_ci		 * we know that:
109062306a36Sopenharmony_ci		 *
109162306a36Sopenharmony_ci		 * 1) it's a BVEC iter, we set it up
109262306a36Sopenharmony_ci		 * 2) all bvecs are PAGE_SIZE in size, except potentially the
109362306a36Sopenharmony_ci		 *    first and last bvec
109462306a36Sopenharmony_ci		 *
109562306a36Sopenharmony_ci		 * So just find our index, and adjust the iterator afterwards.
109662306a36Sopenharmony_ci		 * If the offset is within the first bvec (or the whole first
109762306a36Sopenharmony_ci		 * bvec, just use iov_iter_advance(). This makes it easier
109862306a36Sopenharmony_ci		 * since we can just skip the first segment, which may not
109962306a36Sopenharmony_ci		 * be PAGE_SIZE aligned.
110062306a36Sopenharmony_ci		 */
110162306a36Sopenharmony_ci		const struct bio_vec *bvec = imu->bvec;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci		if (offset < bvec->bv_len) {
110462306a36Sopenharmony_ci			/*
110562306a36Sopenharmony_ci			 * Note, huge pages buffers consists of one large
110662306a36Sopenharmony_ci			 * bvec entry and should always go this way. The other
110762306a36Sopenharmony_ci			 * branch doesn't expect non PAGE_SIZE'd chunks.
110862306a36Sopenharmony_ci			 */
110962306a36Sopenharmony_ci			iter->bvec = bvec;
111062306a36Sopenharmony_ci			iter->nr_segs = bvec->bv_len;
111162306a36Sopenharmony_ci			iter->count -= offset;
111262306a36Sopenharmony_ci			iter->iov_offset = offset;
111362306a36Sopenharmony_ci		} else {
111462306a36Sopenharmony_ci			unsigned long seg_skip;
111562306a36Sopenharmony_ci
111662306a36Sopenharmony_ci			/* skip first vec */
111762306a36Sopenharmony_ci			offset -= bvec->bv_len;
111862306a36Sopenharmony_ci			seg_skip = 1 + (offset >> PAGE_SHIFT);
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci			iter->bvec = bvec + seg_skip;
112162306a36Sopenharmony_ci			iter->nr_segs -= seg_skip;
112262306a36Sopenharmony_ci			iter->count -= bvec->bv_len + offset;
112362306a36Sopenharmony_ci			iter->iov_offset = offset & ~PAGE_MASK;
112462306a36Sopenharmony_ci		}
112562306a36Sopenharmony_ci	}
112662306a36Sopenharmony_ci
112762306a36Sopenharmony_ci	return 0;
112862306a36Sopenharmony_ci}
1129