162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci#include <linux/kernel.h>
362306a36Sopenharmony_ci#include <linux/errno.h>
462306a36Sopenharmony_ci#include <linux/file.h>
562306a36Sopenharmony_ci#include <linux/mm.h>
662306a36Sopenharmony_ci#include <linux/slab.h>
762306a36Sopenharmony_ci#include <linux/nospec.h>
862306a36Sopenharmony_ci#include <linux/io_uring.h>
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <uapi/linux/io_uring.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include "io_uring.h"
1362306a36Sopenharmony_ci#include "tctx.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_cistatic struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx,
1662306a36Sopenharmony_ci					struct task_struct *task)
1762306a36Sopenharmony_ci{
1862306a36Sopenharmony_ci	struct io_wq_hash *hash;
1962306a36Sopenharmony_ci	struct io_wq_data data;
2062306a36Sopenharmony_ci	unsigned int concurrency;
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci	mutex_lock(&ctx->uring_lock);
2362306a36Sopenharmony_ci	hash = ctx->hash_map;
2462306a36Sopenharmony_ci	if (!hash) {
2562306a36Sopenharmony_ci		hash = kzalloc(sizeof(*hash), GFP_KERNEL);
2662306a36Sopenharmony_ci		if (!hash) {
2762306a36Sopenharmony_ci			mutex_unlock(&ctx->uring_lock);
2862306a36Sopenharmony_ci			return ERR_PTR(-ENOMEM);
2962306a36Sopenharmony_ci		}
3062306a36Sopenharmony_ci		refcount_set(&hash->refs, 1);
3162306a36Sopenharmony_ci		init_waitqueue_head(&hash->wait);
3262306a36Sopenharmony_ci		ctx->hash_map = hash;
3362306a36Sopenharmony_ci	}
3462306a36Sopenharmony_ci	mutex_unlock(&ctx->uring_lock);
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	data.hash = hash;
3762306a36Sopenharmony_ci	data.task = task;
3862306a36Sopenharmony_ci	data.free_work = io_wq_free_work;
3962306a36Sopenharmony_ci	data.do_work = io_wq_submit_work;
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	/* Do QD, or 4 * CPUS, whatever is smallest */
4262306a36Sopenharmony_ci	concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	return io_wq_create(concurrency, &data);
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_civoid __io_uring_free(struct task_struct *tsk)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	struct io_uring_task *tctx = tsk->io_uring;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	WARN_ON_ONCE(!xa_empty(&tctx->xa));
5262306a36Sopenharmony_ci	WARN_ON_ONCE(tctx->io_wq);
5362306a36Sopenharmony_ci	WARN_ON_ONCE(tctx->cached_refs);
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	percpu_counter_destroy(&tctx->inflight);
5662306a36Sopenharmony_ci	kfree(tctx);
5762306a36Sopenharmony_ci	tsk->io_uring = NULL;
5862306a36Sopenharmony_ci}
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci__cold int io_uring_alloc_task_context(struct task_struct *task,
6162306a36Sopenharmony_ci				       struct io_ring_ctx *ctx)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	struct io_uring_task *tctx;
6462306a36Sopenharmony_ci	int ret;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	tctx = kzalloc(sizeof(*tctx), GFP_KERNEL);
6762306a36Sopenharmony_ci	if (unlikely(!tctx))
6862306a36Sopenharmony_ci		return -ENOMEM;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
7162306a36Sopenharmony_ci	if (unlikely(ret)) {
7262306a36Sopenharmony_ci		kfree(tctx);
7362306a36Sopenharmony_ci		return ret;
7462306a36Sopenharmony_ci	}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	tctx->io_wq = io_init_wq_offload(ctx, task);
7762306a36Sopenharmony_ci	if (IS_ERR(tctx->io_wq)) {
7862306a36Sopenharmony_ci		ret = PTR_ERR(tctx->io_wq);
7962306a36Sopenharmony_ci		percpu_counter_destroy(&tctx->inflight);
8062306a36Sopenharmony_ci		kfree(tctx);
8162306a36Sopenharmony_ci		return ret;
8262306a36Sopenharmony_ci	}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	xa_init(&tctx->xa);
8562306a36Sopenharmony_ci	init_waitqueue_head(&tctx->wait);
8662306a36Sopenharmony_ci	atomic_set(&tctx->in_cancel, 0);
8762306a36Sopenharmony_ci	atomic_set(&tctx->inflight_tracked, 0);
8862306a36Sopenharmony_ci	task->io_uring = tctx;
8962306a36Sopenharmony_ci	init_llist_head(&tctx->task_list);
9062306a36Sopenharmony_ci	init_task_work(&tctx->task_work, tctx_task_work);
9162306a36Sopenharmony_ci	return 0;
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ciint __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	struct io_uring_task *tctx = current->io_uring;
9762306a36Sopenharmony_ci	struct io_tctx_node *node;
9862306a36Sopenharmony_ci	int ret;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	if (unlikely(!tctx)) {
10162306a36Sopenharmony_ci		ret = io_uring_alloc_task_context(current, ctx);
10262306a36Sopenharmony_ci		if (unlikely(ret))
10362306a36Sopenharmony_ci			return ret;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci		tctx = current->io_uring;
10662306a36Sopenharmony_ci		if (ctx->iowq_limits_set) {
10762306a36Sopenharmony_ci			unsigned int limits[2] = { ctx->iowq_limits[0],
10862306a36Sopenharmony_ci						   ctx->iowq_limits[1], };
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci			ret = io_wq_max_workers(tctx->io_wq, limits);
11162306a36Sopenharmony_ci			if (ret)
11262306a36Sopenharmony_ci				return ret;
11362306a36Sopenharmony_ci		}
11462306a36Sopenharmony_ci	}
11562306a36Sopenharmony_ci	if (!xa_load(&tctx->xa, (unsigned long)ctx)) {
11662306a36Sopenharmony_ci		node = kmalloc(sizeof(*node), GFP_KERNEL);
11762306a36Sopenharmony_ci		if (!node)
11862306a36Sopenharmony_ci			return -ENOMEM;
11962306a36Sopenharmony_ci		node->ctx = ctx;
12062306a36Sopenharmony_ci		node->task = current;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci		ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx,
12362306a36Sopenharmony_ci					node, GFP_KERNEL));
12462306a36Sopenharmony_ci		if (ret) {
12562306a36Sopenharmony_ci			kfree(node);
12662306a36Sopenharmony_ci			return ret;
12762306a36Sopenharmony_ci		}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci		mutex_lock(&ctx->uring_lock);
13062306a36Sopenharmony_ci		list_add(&node->ctx_node, &ctx->tctx_list);
13162306a36Sopenharmony_ci		mutex_unlock(&ctx->uring_lock);
13262306a36Sopenharmony_ci	}
13362306a36Sopenharmony_ci	return 0;
13462306a36Sopenharmony_ci}
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ciint __io_uring_add_tctx_node_from_submit(struct io_ring_ctx *ctx)
13762306a36Sopenharmony_ci{
13862306a36Sopenharmony_ci	int ret;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	if (ctx->flags & IORING_SETUP_SINGLE_ISSUER
14162306a36Sopenharmony_ci	    && ctx->submitter_task != current)
14262306a36Sopenharmony_ci		return -EEXIST;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	ret = __io_uring_add_tctx_node(ctx);
14562306a36Sopenharmony_ci	if (ret)
14662306a36Sopenharmony_ci		return ret;
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci	current->io_uring->last = ctx;
14962306a36Sopenharmony_ci	return 0;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci/*
15362306a36Sopenharmony_ci * Remove this io_uring_file -> task mapping.
15462306a36Sopenharmony_ci */
15562306a36Sopenharmony_ci__cold void io_uring_del_tctx_node(unsigned long index)
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci	struct io_uring_task *tctx = current->io_uring;
15862306a36Sopenharmony_ci	struct io_tctx_node *node;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	if (!tctx)
16162306a36Sopenharmony_ci		return;
16262306a36Sopenharmony_ci	node = xa_erase(&tctx->xa, index);
16362306a36Sopenharmony_ci	if (!node)
16462306a36Sopenharmony_ci		return;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	WARN_ON_ONCE(current != node->task);
16762306a36Sopenharmony_ci	WARN_ON_ONCE(list_empty(&node->ctx_node));
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	mutex_lock(&node->ctx->uring_lock);
17062306a36Sopenharmony_ci	list_del(&node->ctx_node);
17162306a36Sopenharmony_ci	mutex_unlock(&node->ctx->uring_lock);
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (tctx->last == node->ctx)
17462306a36Sopenharmony_ci		tctx->last = NULL;
17562306a36Sopenharmony_ci	kfree(node);
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci__cold void io_uring_clean_tctx(struct io_uring_task *tctx)
17962306a36Sopenharmony_ci{
18062306a36Sopenharmony_ci	struct io_wq *wq = tctx->io_wq;
18162306a36Sopenharmony_ci	struct io_tctx_node *node;
18262306a36Sopenharmony_ci	unsigned long index;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	xa_for_each(&tctx->xa, index, node) {
18562306a36Sopenharmony_ci		io_uring_del_tctx_node(index);
18662306a36Sopenharmony_ci		cond_resched();
18762306a36Sopenharmony_ci	}
18862306a36Sopenharmony_ci	if (wq) {
18962306a36Sopenharmony_ci		/*
19062306a36Sopenharmony_ci		 * Must be after io_uring_del_tctx_node() (removes nodes under
19162306a36Sopenharmony_ci		 * uring_lock) to avoid race with io_uring_try_cancel_iowq().
19262306a36Sopenharmony_ci		 */
19362306a36Sopenharmony_ci		io_wq_put_and_exit(wq);
19462306a36Sopenharmony_ci		tctx->io_wq = NULL;
19562306a36Sopenharmony_ci	}
19662306a36Sopenharmony_ci}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_civoid io_uring_unreg_ringfd(void)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	struct io_uring_task *tctx = current->io_uring;
20162306a36Sopenharmony_ci	int i;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	for (i = 0; i < IO_RINGFD_REG_MAX; i++) {
20462306a36Sopenharmony_ci		if (tctx->registered_rings[i]) {
20562306a36Sopenharmony_ci			fput(tctx->registered_rings[i]);
20662306a36Sopenharmony_ci			tctx->registered_rings[i] = NULL;
20762306a36Sopenharmony_ci		}
20862306a36Sopenharmony_ci	}
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ciint io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
21262306a36Sopenharmony_ci				     int start, int end)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	int offset;
21562306a36Sopenharmony_ci	for (offset = start; offset < end; offset++) {
21662306a36Sopenharmony_ci		offset = array_index_nospec(offset, IO_RINGFD_REG_MAX);
21762306a36Sopenharmony_ci		if (tctx->registered_rings[offset])
21862306a36Sopenharmony_ci			continue;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci		tctx->registered_rings[offset] = file;
22162306a36Sopenharmony_ci		return offset;
22262306a36Sopenharmony_ci	}
22362306a36Sopenharmony_ci	return -EBUSY;
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cistatic int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd,
22762306a36Sopenharmony_ci				     int start, int end)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	struct file *file;
23062306a36Sopenharmony_ci	int offset;
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci	file = fget(fd);
23362306a36Sopenharmony_ci	if (!file) {
23462306a36Sopenharmony_ci		return -EBADF;
23562306a36Sopenharmony_ci	} else if (!io_is_uring_fops(file)) {
23662306a36Sopenharmony_ci		fput(file);
23762306a36Sopenharmony_ci		return -EOPNOTSUPP;
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci	offset = io_ring_add_registered_file(tctx, file, start, end);
24062306a36Sopenharmony_ci	if (offset < 0)
24162306a36Sopenharmony_ci		fput(file);
24262306a36Sopenharmony_ci	return offset;
24362306a36Sopenharmony_ci}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci/*
24662306a36Sopenharmony_ci * Register a ring fd to avoid fdget/fdput for each io_uring_enter()
24762306a36Sopenharmony_ci * invocation. User passes in an array of struct io_uring_rsrc_update
24862306a36Sopenharmony_ci * with ->data set to the ring_fd, and ->offset given for the desired
24962306a36Sopenharmony_ci * index. If no index is desired, application may set ->offset == -1U
25062306a36Sopenharmony_ci * and we'll find an available index. Returns number of entries
25162306a36Sopenharmony_ci * successfully processed, or < 0 on error if none were processed.
25262306a36Sopenharmony_ci */
25362306a36Sopenharmony_ciint io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
25462306a36Sopenharmony_ci		       unsigned nr_args)
25562306a36Sopenharmony_ci{
25662306a36Sopenharmony_ci	struct io_uring_rsrc_update __user *arg = __arg;
25762306a36Sopenharmony_ci	struct io_uring_rsrc_update reg;
25862306a36Sopenharmony_ci	struct io_uring_task *tctx;
25962306a36Sopenharmony_ci	int ret, i;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
26262306a36Sopenharmony_ci		return -EINVAL;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	mutex_unlock(&ctx->uring_lock);
26562306a36Sopenharmony_ci	ret = __io_uring_add_tctx_node(ctx);
26662306a36Sopenharmony_ci	mutex_lock(&ctx->uring_lock);
26762306a36Sopenharmony_ci	if (ret)
26862306a36Sopenharmony_ci		return ret;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	tctx = current->io_uring;
27162306a36Sopenharmony_ci	for (i = 0; i < nr_args; i++) {
27262306a36Sopenharmony_ci		int start, end;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci		if (copy_from_user(&reg, &arg[i], sizeof(reg))) {
27562306a36Sopenharmony_ci			ret = -EFAULT;
27662306a36Sopenharmony_ci			break;
27762306a36Sopenharmony_ci		}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci		if (reg.resv) {
28062306a36Sopenharmony_ci			ret = -EINVAL;
28162306a36Sopenharmony_ci			break;
28262306a36Sopenharmony_ci		}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci		if (reg.offset == -1U) {
28562306a36Sopenharmony_ci			start = 0;
28662306a36Sopenharmony_ci			end = IO_RINGFD_REG_MAX;
28762306a36Sopenharmony_ci		} else {
28862306a36Sopenharmony_ci			if (reg.offset >= IO_RINGFD_REG_MAX) {
28962306a36Sopenharmony_ci				ret = -EINVAL;
29062306a36Sopenharmony_ci				break;
29162306a36Sopenharmony_ci			}
29262306a36Sopenharmony_ci			start = reg.offset;
29362306a36Sopenharmony_ci			end = start + 1;
29462306a36Sopenharmony_ci		}
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci		ret = io_ring_add_registered_fd(tctx, reg.data, start, end);
29762306a36Sopenharmony_ci		if (ret < 0)
29862306a36Sopenharmony_ci			break;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci		reg.offset = ret;
30162306a36Sopenharmony_ci		if (copy_to_user(&arg[i], &reg, sizeof(reg))) {
30262306a36Sopenharmony_ci			fput(tctx->registered_rings[reg.offset]);
30362306a36Sopenharmony_ci			tctx->registered_rings[reg.offset] = NULL;
30462306a36Sopenharmony_ci			ret = -EFAULT;
30562306a36Sopenharmony_ci			break;
30662306a36Sopenharmony_ci		}
30762306a36Sopenharmony_ci	}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci	return i ? i : ret;
31062306a36Sopenharmony_ci}
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ciint io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg,
31362306a36Sopenharmony_ci			 unsigned nr_args)
31462306a36Sopenharmony_ci{
31562306a36Sopenharmony_ci	struct io_uring_rsrc_update __user *arg = __arg;
31662306a36Sopenharmony_ci	struct io_uring_task *tctx = current->io_uring;
31762306a36Sopenharmony_ci	struct io_uring_rsrc_update reg;
31862306a36Sopenharmony_ci	int ret = 0, i;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	if (!nr_args || nr_args > IO_RINGFD_REG_MAX)
32162306a36Sopenharmony_ci		return -EINVAL;
32262306a36Sopenharmony_ci	if (!tctx)
32362306a36Sopenharmony_ci		return 0;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	for (i = 0; i < nr_args; i++) {
32662306a36Sopenharmony_ci		if (copy_from_user(&reg, &arg[i], sizeof(reg))) {
32762306a36Sopenharmony_ci			ret = -EFAULT;
32862306a36Sopenharmony_ci			break;
32962306a36Sopenharmony_ci		}
33062306a36Sopenharmony_ci		if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) {
33162306a36Sopenharmony_ci			ret = -EINVAL;
33262306a36Sopenharmony_ci			break;
33362306a36Sopenharmony_ci		}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci		reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX);
33662306a36Sopenharmony_ci		if (tctx->registered_rings[reg.offset]) {
33762306a36Sopenharmony_ci			fput(tctx->registered_rings[reg.offset]);
33862306a36Sopenharmony_ci			tctx->registered_rings[reg.offset] = NULL;
33962306a36Sopenharmony_ci		}
34062306a36Sopenharmony_ci	}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	return i ? i : ret;
34362306a36Sopenharmony_ci}
344