162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * POSIX message queues filesystem for Linux.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Copyright (C) 2003,2004  Krzysztof Benedyczak    (golbi@mat.uni.torun.pl)
562306a36Sopenharmony_ci *                          Michal Wronski          (michal.wronski@gmail.com)
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Spinlocks:               Mohamed Abbas           (abbas.mohamed@intel.com)
862306a36Sopenharmony_ci * Lockless receive & send, fd based notify:
962306a36Sopenharmony_ci *			    Manfred Spraul	    (manfred@colorfullife.com)
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Audit:                   George Wilson           (ltcgcw@us.ibm.com)
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci * This file is released under the GPL.
1462306a36Sopenharmony_ci */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci#include <linux/capability.h>
1762306a36Sopenharmony_ci#include <linux/init.h>
1862306a36Sopenharmony_ci#include <linux/pagemap.h>
1962306a36Sopenharmony_ci#include <linux/file.h>
2062306a36Sopenharmony_ci#include <linux/mount.h>
2162306a36Sopenharmony_ci#include <linux/fs_context.h>
2262306a36Sopenharmony_ci#include <linux/namei.h>
2362306a36Sopenharmony_ci#include <linux/sysctl.h>
2462306a36Sopenharmony_ci#include <linux/poll.h>
2562306a36Sopenharmony_ci#include <linux/mqueue.h>
2662306a36Sopenharmony_ci#include <linux/msg.h>
2762306a36Sopenharmony_ci#include <linux/skbuff.h>
2862306a36Sopenharmony_ci#include <linux/vmalloc.h>
2962306a36Sopenharmony_ci#include <linux/netlink.h>
3062306a36Sopenharmony_ci#include <linux/syscalls.h>
3162306a36Sopenharmony_ci#include <linux/audit.h>
3262306a36Sopenharmony_ci#include <linux/signal.h>
3362306a36Sopenharmony_ci#include <linux/mutex.h>
3462306a36Sopenharmony_ci#include <linux/nsproxy.h>
3562306a36Sopenharmony_ci#include <linux/pid.h>
3662306a36Sopenharmony_ci#include <linux/ipc_namespace.h>
3762306a36Sopenharmony_ci#include <linux/user_namespace.h>
3862306a36Sopenharmony_ci#include <linux/slab.h>
3962306a36Sopenharmony_ci#include <linux/sched/wake_q.h>
4062306a36Sopenharmony_ci#include <linux/sched/signal.h>
4162306a36Sopenharmony_ci#include <linux/sched/user.h>
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci#include <net/sock.h>
4462306a36Sopenharmony_ci#include "util.h"
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistruct mqueue_fs_context {
4762306a36Sopenharmony_ci	struct ipc_namespace	*ipc_ns;
4862306a36Sopenharmony_ci	bool			 newns;	/* Set if newly created ipc namespace */
4962306a36Sopenharmony_ci};
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci#define MQUEUE_MAGIC	0x19800202
5262306a36Sopenharmony_ci#define DIRENT_SIZE	20
5362306a36Sopenharmony_ci#define FILENT_SIZE	80
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci#define SEND		0
5662306a36Sopenharmony_ci#define RECV		1
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci#define STATE_NONE	0
5962306a36Sopenharmony_ci#define STATE_READY	1
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistruct posix_msg_tree_node {
6262306a36Sopenharmony_ci	struct rb_node		rb_node;
6362306a36Sopenharmony_ci	struct list_head	msg_list;
6462306a36Sopenharmony_ci	int			priority;
6562306a36Sopenharmony_ci};
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci/*
6862306a36Sopenharmony_ci * Locking:
6962306a36Sopenharmony_ci *
7062306a36Sopenharmony_ci * Accesses to a message queue are synchronized by acquiring info->lock.
7162306a36Sopenharmony_ci *
7262306a36Sopenharmony_ci * There are two notable exceptions:
7362306a36Sopenharmony_ci * - The actual wakeup of a sleeping task is performed using the wake_q
7462306a36Sopenharmony_ci *   framework. info->lock is already released when wake_up_q is called.
7562306a36Sopenharmony_ci * - The exit codepaths after sleeping check ext_wait_queue->state without
7662306a36Sopenharmony_ci *   any locks. If it is STATE_READY, then the syscall is completed without
7762306a36Sopenharmony_ci *   acquiring info->lock.
7862306a36Sopenharmony_ci *
7962306a36Sopenharmony_ci * MQ_BARRIER:
8062306a36Sopenharmony_ci * To achieve proper release/acquire memory barrier pairing, the state is set to
8162306a36Sopenharmony_ci * STATE_READY with smp_store_release(), and it is read with READ_ONCE followed
8262306a36Sopenharmony_ci * by smp_acquire__after_ctrl_dep(). In addition, wake_q_add_safe() is used.
8362306a36Sopenharmony_ci *
8462306a36Sopenharmony_ci * This prevents the following races:
8562306a36Sopenharmony_ci *
8662306a36Sopenharmony_ci * 1) With the simple wake_q_add(), the task could be gone already before
8762306a36Sopenharmony_ci *    the increase of the reference happens
8862306a36Sopenharmony_ci * Thread A
8962306a36Sopenharmony_ci *				Thread B
9062306a36Sopenharmony_ci * WRITE_ONCE(wait.state, STATE_NONE);
9162306a36Sopenharmony_ci * schedule_hrtimeout()
9262306a36Sopenharmony_ci *				wake_q_add(A)
9362306a36Sopenharmony_ci *				if (cmpxchg()) // success
9462306a36Sopenharmony_ci *				   ->state = STATE_READY (reordered)
9562306a36Sopenharmony_ci * <timeout returns>
9662306a36Sopenharmony_ci * if (wait.state == STATE_READY) return;
9762306a36Sopenharmony_ci * sysret to user space
9862306a36Sopenharmony_ci * sys_exit()
9962306a36Sopenharmony_ci *				get_task_struct() // UaF
10062306a36Sopenharmony_ci *
10162306a36Sopenharmony_ci * Solution: Use wake_q_add_safe() and perform the get_task_struct() before
10262306a36Sopenharmony_ci * the smp_store_release() that does ->state = STATE_READY.
10362306a36Sopenharmony_ci *
10462306a36Sopenharmony_ci * 2) Without proper _release/_acquire barriers, the woken up task
10562306a36Sopenharmony_ci *    could read stale data
10662306a36Sopenharmony_ci *
10762306a36Sopenharmony_ci * Thread A
10862306a36Sopenharmony_ci *				Thread B
10962306a36Sopenharmony_ci * do_mq_timedreceive
11062306a36Sopenharmony_ci * WRITE_ONCE(wait.state, STATE_NONE);
11162306a36Sopenharmony_ci * schedule_hrtimeout()
11262306a36Sopenharmony_ci *				state = STATE_READY;
11362306a36Sopenharmony_ci * <timeout returns>
11462306a36Sopenharmony_ci * if (wait.state == STATE_READY) return;
11562306a36Sopenharmony_ci * msg_ptr = wait.msg;		// Access to stale data!
11662306a36Sopenharmony_ci *				receiver->msg = message; (reordered)
11762306a36Sopenharmony_ci *
11862306a36Sopenharmony_ci * Solution: use _release and _acquire barriers.
11962306a36Sopenharmony_ci *
12062306a36Sopenharmony_ci * 3) There is intentionally no barrier when setting current->state
12162306a36Sopenharmony_ci *    to TASK_INTERRUPTIBLE: spin_unlock(&info->lock) provides the
12262306a36Sopenharmony_ci *    release memory barrier, and the wakeup is triggered when holding
12362306a36Sopenharmony_ci *    info->lock, i.e. spin_lock(&info->lock) provided a pairing
12462306a36Sopenharmony_ci *    acquire memory barrier.
12562306a36Sopenharmony_ci */
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cistruct ext_wait_queue {		/* queue of sleeping tasks */
12862306a36Sopenharmony_ci	struct task_struct *task;
12962306a36Sopenharmony_ci	struct list_head list;
13062306a36Sopenharmony_ci	struct msg_msg *msg;	/* ptr of loaded message */
13162306a36Sopenharmony_ci	int state;		/* one of STATE_* values */
13262306a36Sopenharmony_ci};
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_cistruct mqueue_inode_info {
13562306a36Sopenharmony_ci	spinlock_t lock;
13662306a36Sopenharmony_ci	struct inode vfs_inode;
13762306a36Sopenharmony_ci	wait_queue_head_t wait_q;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	struct rb_root msg_tree;
14062306a36Sopenharmony_ci	struct rb_node *msg_tree_rightmost;
14162306a36Sopenharmony_ci	struct posix_msg_tree_node *node_cache;
14262306a36Sopenharmony_ci	struct mq_attr attr;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	struct sigevent notify;
14562306a36Sopenharmony_ci	struct pid *notify_owner;
14662306a36Sopenharmony_ci	u32 notify_self_exec_id;
14762306a36Sopenharmony_ci	struct user_namespace *notify_user_ns;
14862306a36Sopenharmony_ci	struct ucounts *ucounts;	/* user who created, for accounting */
14962306a36Sopenharmony_ci	struct sock *notify_sock;
15062306a36Sopenharmony_ci	struct sk_buff *notify_cookie;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	/* for tasks waiting for free space and messages, respectively */
15362306a36Sopenharmony_ci	struct ext_wait_queue e_wait_q[2];
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	unsigned long qsize; /* size of queue in memory (sum of all msgs) */
15662306a36Sopenharmony_ci};
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_cistatic struct file_system_type mqueue_fs_type;
15962306a36Sopenharmony_cistatic const struct inode_operations mqueue_dir_inode_operations;
16062306a36Sopenharmony_cistatic const struct file_operations mqueue_file_operations;
16162306a36Sopenharmony_cistatic const struct super_operations mqueue_super_ops;
16262306a36Sopenharmony_cistatic const struct fs_context_operations mqueue_fs_context_ops;
16362306a36Sopenharmony_cistatic void remove_notification(struct mqueue_inode_info *info);
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_cistatic struct kmem_cache *mqueue_inode_cachep;
16662306a36Sopenharmony_ci
16762306a36Sopenharmony_cistatic inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	return container_of(inode, struct mqueue_inode_info, vfs_inode);
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci/*
17362306a36Sopenharmony_ci * This routine should be called with the mq_lock held.
17462306a36Sopenharmony_ci */
17562306a36Sopenharmony_cistatic inline struct ipc_namespace *__get_ns_from_inode(struct inode *inode)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	return get_ipc_ns(inode->i_sb->s_fs_info);
17862306a36Sopenharmony_ci}
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_cistatic struct ipc_namespace *get_ns_from_inode(struct inode *inode)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci	struct ipc_namespace *ns;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	spin_lock(&mq_lock);
18562306a36Sopenharmony_ci	ns = __get_ns_from_inode(inode);
18662306a36Sopenharmony_ci	spin_unlock(&mq_lock);
18762306a36Sopenharmony_ci	return ns;
18862306a36Sopenharmony_ci}
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci/* Auxiliary functions to manipulate messages' list */
19162306a36Sopenharmony_cistatic int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	struct rb_node **p, *parent = NULL;
19462306a36Sopenharmony_ci	struct posix_msg_tree_node *leaf;
19562306a36Sopenharmony_ci	bool rightmost = true;
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_ci	p = &info->msg_tree.rb_node;
19862306a36Sopenharmony_ci	while (*p) {
19962306a36Sopenharmony_ci		parent = *p;
20062306a36Sopenharmony_ci		leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci		if (likely(leaf->priority == msg->m_type))
20362306a36Sopenharmony_ci			goto insert_msg;
20462306a36Sopenharmony_ci		else if (msg->m_type < leaf->priority) {
20562306a36Sopenharmony_ci			p = &(*p)->rb_left;
20662306a36Sopenharmony_ci			rightmost = false;
20762306a36Sopenharmony_ci		} else
20862306a36Sopenharmony_ci			p = &(*p)->rb_right;
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci	if (info->node_cache) {
21162306a36Sopenharmony_ci		leaf = info->node_cache;
21262306a36Sopenharmony_ci		info->node_cache = NULL;
21362306a36Sopenharmony_ci	} else {
21462306a36Sopenharmony_ci		leaf = kmalloc(sizeof(*leaf), GFP_ATOMIC);
21562306a36Sopenharmony_ci		if (!leaf)
21662306a36Sopenharmony_ci			return -ENOMEM;
21762306a36Sopenharmony_ci		INIT_LIST_HEAD(&leaf->msg_list);
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci	leaf->priority = msg->m_type;
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci	if (rightmost)
22262306a36Sopenharmony_ci		info->msg_tree_rightmost = &leaf->rb_node;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	rb_link_node(&leaf->rb_node, parent, p);
22562306a36Sopenharmony_ci	rb_insert_color(&leaf->rb_node, &info->msg_tree);
22662306a36Sopenharmony_ciinsert_msg:
22762306a36Sopenharmony_ci	info->attr.mq_curmsgs++;
22862306a36Sopenharmony_ci	info->qsize += msg->m_ts;
22962306a36Sopenharmony_ci	list_add_tail(&msg->m_list, &leaf->msg_list);
23062306a36Sopenharmony_ci	return 0;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic inline void msg_tree_erase(struct posix_msg_tree_node *leaf,
23462306a36Sopenharmony_ci				  struct mqueue_inode_info *info)
23562306a36Sopenharmony_ci{
23662306a36Sopenharmony_ci	struct rb_node *node = &leaf->rb_node;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	if (info->msg_tree_rightmost == node)
23962306a36Sopenharmony_ci		info->msg_tree_rightmost = rb_prev(node);
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	rb_erase(node, &info->msg_tree);
24262306a36Sopenharmony_ci	if (info->node_cache)
24362306a36Sopenharmony_ci		kfree(leaf);
24462306a36Sopenharmony_ci	else
24562306a36Sopenharmony_ci		info->node_cache = leaf;
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_cistatic inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
24962306a36Sopenharmony_ci{
25062306a36Sopenharmony_ci	struct rb_node *parent = NULL;
25162306a36Sopenharmony_ci	struct posix_msg_tree_node *leaf;
25262306a36Sopenharmony_ci	struct msg_msg *msg;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_citry_again:
25562306a36Sopenharmony_ci	/*
25662306a36Sopenharmony_ci	 * During insert, low priorities go to the left and high to the
25762306a36Sopenharmony_ci	 * right.  On receive, we want the highest priorities first, so
25862306a36Sopenharmony_ci	 * walk all the way to the right.
25962306a36Sopenharmony_ci	 */
26062306a36Sopenharmony_ci	parent = info->msg_tree_rightmost;
26162306a36Sopenharmony_ci	if (!parent) {
26262306a36Sopenharmony_ci		if (info->attr.mq_curmsgs) {
26362306a36Sopenharmony_ci			pr_warn_once("Inconsistency in POSIX message queue, "
26462306a36Sopenharmony_ci				     "no tree element, but supposedly messages "
26562306a36Sopenharmony_ci				     "should exist!\n");
26662306a36Sopenharmony_ci			info->attr.mq_curmsgs = 0;
26762306a36Sopenharmony_ci		}
26862306a36Sopenharmony_ci		return NULL;
26962306a36Sopenharmony_ci	}
27062306a36Sopenharmony_ci	leaf = rb_entry(parent, struct posix_msg_tree_node, rb_node);
27162306a36Sopenharmony_ci	if (unlikely(list_empty(&leaf->msg_list))) {
27262306a36Sopenharmony_ci		pr_warn_once("Inconsistency in POSIX message queue, "
27362306a36Sopenharmony_ci			     "empty leaf node but we haven't implemented "
27462306a36Sopenharmony_ci			     "lazy leaf delete!\n");
27562306a36Sopenharmony_ci		msg_tree_erase(leaf, info);
27662306a36Sopenharmony_ci		goto try_again;
27762306a36Sopenharmony_ci	} else {
27862306a36Sopenharmony_ci		msg = list_first_entry(&leaf->msg_list,
27962306a36Sopenharmony_ci				       struct msg_msg, m_list);
28062306a36Sopenharmony_ci		list_del(&msg->m_list);
28162306a36Sopenharmony_ci		if (list_empty(&leaf->msg_list)) {
28262306a36Sopenharmony_ci			msg_tree_erase(leaf, info);
28362306a36Sopenharmony_ci		}
28462306a36Sopenharmony_ci	}
28562306a36Sopenharmony_ci	info->attr.mq_curmsgs--;
28662306a36Sopenharmony_ci	info->qsize -= msg->m_ts;
28762306a36Sopenharmony_ci	return msg;
28862306a36Sopenharmony_ci}
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_cistatic struct inode *mqueue_get_inode(struct super_block *sb,
29162306a36Sopenharmony_ci		struct ipc_namespace *ipc_ns, umode_t mode,
29262306a36Sopenharmony_ci		struct mq_attr *attr)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	struct inode *inode;
29562306a36Sopenharmony_ci	int ret = -ENOMEM;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	inode = new_inode(sb);
29862306a36Sopenharmony_ci	if (!inode)
29962306a36Sopenharmony_ci		goto err;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	inode->i_ino = get_next_ino();
30262306a36Sopenharmony_ci	inode->i_mode = mode;
30362306a36Sopenharmony_ci	inode->i_uid = current_fsuid();
30462306a36Sopenharmony_ci	inode->i_gid = current_fsgid();
30562306a36Sopenharmony_ci	inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	if (S_ISREG(mode)) {
30862306a36Sopenharmony_ci		struct mqueue_inode_info *info;
30962306a36Sopenharmony_ci		unsigned long mq_bytes, mq_treesize;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci		inode->i_fop = &mqueue_file_operations;
31262306a36Sopenharmony_ci		inode->i_size = FILENT_SIZE;
31362306a36Sopenharmony_ci		/* mqueue specific info */
31462306a36Sopenharmony_ci		info = MQUEUE_I(inode);
31562306a36Sopenharmony_ci		spin_lock_init(&info->lock);
31662306a36Sopenharmony_ci		init_waitqueue_head(&info->wait_q);
31762306a36Sopenharmony_ci		INIT_LIST_HEAD(&info->e_wait_q[0].list);
31862306a36Sopenharmony_ci		INIT_LIST_HEAD(&info->e_wait_q[1].list);
31962306a36Sopenharmony_ci		info->notify_owner = NULL;
32062306a36Sopenharmony_ci		info->notify_user_ns = NULL;
32162306a36Sopenharmony_ci		info->qsize = 0;
32262306a36Sopenharmony_ci		info->ucounts = NULL;	/* set when all is ok */
32362306a36Sopenharmony_ci		info->msg_tree = RB_ROOT;
32462306a36Sopenharmony_ci		info->msg_tree_rightmost = NULL;
32562306a36Sopenharmony_ci		info->node_cache = NULL;
32662306a36Sopenharmony_ci		memset(&info->attr, 0, sizeof(info->attr));
32762306a36Sopenharmony_ci		info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
32862306a36Sopenharmony_ci					   ipc_ns->mq_msg_default);
32962306a36Sopenharmony_ci		info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
33062306a36Sopenharmony_ci					    ipc_ns->mq_msgsize_default);
33162306a36Sopenharmony_ci		if (attr) {
33262306a36Sopenharmony_ci			info->attr.mq_maxmsg = attr->mq_maxmsg;
33362306a36Sopenharmony_ci			info->attr.mq_msgsize = attr->mq_msgsize;
33462306a36Sopenharmony_ci		}
33562306a36Sopenharmony_ci		/*
33662306a36Sopenharmony_ci		 * We used to allocate a static array of pointers and account
33762306a36Sopenharmony_ci		 * the size of that array as well as one msg_msg struct per
33862306a36Sopenharmony_ci		 * possible message into the queue size. That's no longer
33962306a36Sopenharmony_ci		 * accurate as the queue is now an rbtree and will grow and
34062306a36Sopenharmony_ci		 * shrink depending on usage patterns.  We can, however, still
34162306a36Sopenharmony_ci		 * account one msg_msg struct per message, but the nodes are
34262306a36Sopenharmony_ci		 * allocated depending on priority usage, and most programs
34362306a36Sopenharmony_ci		 * only use one, or a handful, of priorities.  However, since
34462306a36Sopenharmony_ci		 * this is pinned memory, we need to assume worst case, so
34562306a36Sopenharmony_ci		 * that means the min(mq_maxmsg, max_priorities) * struct
34662306a36Sopenharmony_ci		 * posix_msg_tree_node.
34762306a36Sopenharmony_ci		 */
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci		ret = -EINVAL;
35062306a36Sopenharmony_ci		if (info->attr.mq_maxmsg <= 0 || info->attr.mq_msgsize <= 0)
35162306a36Sopenharmony_ci			goto out_inode;
35262306a36Sopenharmony_ci		if (capable(CAP_SYS_RESOURCE)) {
35362306a36Sopenharmony_ci			if (info->attr.mq_maxmsg > HARD_MSGMAX ||
35462306a36Sopenharmony_ci			    info->attr.mq_msgsize > HARD_MSGSIZEMAX)
35562306a36Sopenharmony_ci				goto out_inode;
35662306a36Sopenharmony_ci		} else {
35762306a36Sopenharmony_ci			if (info->attr.mq_maxmsg > ipc_ns->mq_msg_max ||
35862306a36Sopenharmony_ci					info->attr.mq_msgsize > ipc_ns->mq_msgsize_max)
35962306a36Sopenharmony_ci				goto out_inode;
36062306a36Sopenharmony_ci		}
36162306a36Sopenharmony_ci		ret = -EOVERFLOW;
36262306a36Sopenharmony_ci		/* check for overflow */
36362306a36Sopenharmony_ci		if (info->attr.mq_msgsize > ULONG_MAX/info->attr.mq_maxmsg)
36462306a36Sopenharmony_ci			goto out_inode;
36562306a36Sopenharmony_ci		mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
36662306a36Sopenharmony_ci			min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
36762306a36Sopenharmony_ci			sizeof(struct posix_msg_tree_node);
36862306a36Sopenharmony_ci		mq_bytes = info->attr.mq_maxmsg * info->attr.mq_msgsize;
36962306a36Sopenharmony_ci		if (mq_bytes + mq_treesize < mq_bytes)
37062306a36Sopenharmony_ci			goto out_inode;
37162306a36Sopenharmony_ci		mq_bytes += mq_treesize;
37262306a36Sopenharmony_ci		info->ucounts = get_ucounts(current_ucounts());
37362306a36Sopenharmony_ci		if (info->ucounts) {
37462306a36Sopenharmony_ci			long msgqueue;
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci			spin_lock(&mq_lock);
37762306a36Sopenharmony_ci			msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
37862306a36Sopenharmony_ci			if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) {
37962306a36Sopenharmony_ci				dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
38062306a36Sopenharmony_ci				spin_unlock(&mq_lock);
38162306a36Sopenharmony_ci				put_ucounts(info->ucounts);
38262306a36Sopenharmony_ci				info->ucounts = NULL;
38362306a36Sopenharmony_ci				/* mqueue_evict_inode() releases info->messages */
38462306a36Sopenharmony_ci				ret = -EMFILE;
38562306a36Sopenharmony_ci				goto out_inode;
38662306a36Sopenharmony_ci			}
38762306a36Sopenharmony_ci			spin_unlock(&mq_lock);
38862306a36Sopenharmony_ci		}
38962306a36Sopenharmony_ci	} else if (S_ISDIR(mode)) {
39062306a36Sopenharmony_ci		inc_nlink(inode);
39162306a36Sopenharmony_ci		/* Some things misbehave if size == 0 on a directory */
39262306a36Sopenharmony_ci		inode->i_size = 2 * DIRENT_SIZE;
39362306a36Sopenharmony_ci		inode->i_op = &mqueue_dir_inode_operations;
39462306a36Sopenharmony_ci		inode->i_fop = &simple_dir_operations;
39562306a36Sopenharmony_ci	}
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	return inode;
39862306a36Sopenharmony_ciout_inode:
39962306a36Sopenharmony_ci	iput(inode);
40062306a36Sopenharmony_cierr:
40162306a36Sopenharmony_ci	return ERR_PTR(ret);
40262306a36Sopenharmony_ci}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_cistatic int mqueue_fill_super(struct super_block *sb, struct fs_context *fc)
40562306a36Sopenharmony_ci{
40662306a36Sopenharmony_ci	struct inode *inode;
40762306a36Sopenharmony_ci	struct ipc_namespace *ns = sb->s_fs_info;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
41062306a36Sopenharmony_ci	sb->s_blocksize = PAGE_SIZE;
41162306a36Sopenharmony_ci	sb->s_blocksize_bits = PAGE_SHIFT;
41262306a36Sopenharmony_ci	sb->s_magic = MQUEUE_MAGIC;
41362306a36Sopenharmony_ci	sb->s_op = &mqueue_super_ops;
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	inode = mqueue_get_inode(sb, ns, S_IFDIR | S_ISVTX | S_IRWXUGO, NULL);
41662306a36Sopenharmony_ci	if (IS_ERR(inode))
41762306a36Sopenharmony_ci		return PTR_ERR(inode);
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	sb->s_root = d_make_root(inode);
42062306a36Sopenharmony_ci	if (!sb->s_root)
42162306a36Sopenharmony_ci		return -ENOMEM;
42262306a36Sopenharmony_ci	return 0;
42362306a36Sopenharmony_ci}
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_cistatic int mqueue_get_tree(struct fs_context *fc)
42662306a36Sopenharmony_ci{
42762306a36Sopenharmony_ci	struct mqueue_fs_context *ctx = fc->fs_private;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/*
43062306a36Sopenharmony_ci	 * With a newly created ipc namespace, we don't need to do a search
43162306a36Sopenharmony_ci	 * for an ipc namespace match, but we still need to set s_fs_info.
43262306a36Sopenharmony_ci	 */
43362306a36Sopenharmony_ci	if (ctx->newns) {
43462306a36Sopenharmony_ci		fc->s_fs_info = ctx->ipc_ns;
43562306a36Sopenharmony_ci		return get_tree_nodev(fc, mqueue_fill_super);
43662306a36Sopenharmony_ci	}
43762306a36Sopenharmony_ci	return get_tree_keyed(fc, mqueue_fill_super, ctx->ipc_ns);
43862306a36Sopenharmony_ci}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_cistatic void mqueue_fs_context_free(struct fs_context *fc)
44162306a36Sopenharmony_ci{
44262306a36Sopenharmony_ci	struct mqueue_fs_context *ctx = fc->fs_private;
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	put_ipc_ns(ctx->ipc_ns);
44562306a36Sopenharmony_ci	kfree(ctx);
44662306a36Sopenharmony_ci}
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_cistatic int mqueue_init_fs_context(struct fs_context *fc)
44962306a36Sopenharmony_ci{
45062306a36Sopenharmony_ci	struct mqueue_fs_context *ctx;
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	ctx = kzalloc(sizeof(struct mqueue_fs_context), GFP_KERNEL);
45362306a36Sopenharmony_ci	if (!ctx)
45462306a36Sopenharmony_ci		return -ENOMEM;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	ctx->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns);
45762306a36Sopenharmony_ci	put_user_ns(fc->user_ns);
45862306a36Sopenharmony_ci	fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
45962306a36Sopenharmony_ci	fc->fs_private = ctx;
46062306a36Sopenharmony_ci	fc->ops = &mqueue_fs_context_ops;
46162306a36Sopenharmony_ci	return 0;
46262306a36Sopenharmony_ci}
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci/*
46562306a36Sopenharmony_ci * mq_init_ns() is currently the only caller of mq_create_mount().
46662306a36Sopenharmony_ci * So the ns parameter is always a newly created ipc namespace.
46762306a36Sopenharmony_ci */
46862306a36Sopenharmony_cistatic struct vfsmount *mq_create_mount(struct ipc_namespace *ns)
46962306a36Sopenharmony_ci{
47062306a36Sopenharmony_ci	struct mqueue_fs_context *ctx;
47162306a36Sopenharmony_ci	struct fs_context *fc;
47262306a36Sopenharmony_ci	struct vfsmount *mnt;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	fc = fs_context_for_mount(&mqueue_fs_type, SB_KERNMOUNT);
47562306a36Sopenharmony_ci	if (IS_ERR(fc))
47662306a36Sopenharmony_ci		return ERR_CAST(fc);
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	ctx = fc->fs_private;
47962306a36Sopenharmony_ci	ctx->newns = true;
48062306a36Sopenharmony_ci	put_ipc_ns(ctx->ipc_ns);
48162306a36Sopenharmony_ci	ctx->ipc_ns = get_ipc_ns(ns);
48262306a36Sopenharmony_ci	put_user_ns(fc->user_ns);
48362306a36Sopenharmony_ci	fc->user_ns = get_user_ns(ctx->ipc_ns->user_ns);
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	mnt = fc_mount(fc);
48662306a36Sopenharmony_ci	put_fs_context(fc);
48762306a36Sopenharmony_ci	return mnt;
48862306a36Sopenharmony_ci}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_cistatic void init_once(void *foo)
49162306a36Sopenharmony_ci{
49262306a36Sopenharmony_ci	struct mqueue_inode_info *p = foo;
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	inode_init_once(&p->vfs_inode);
49562306a36Sopenharmony_ci}
49662306a36Sopenharmony_ci
49762306a36Sopenharmony_cistatic struct inode *mqueue_alloc_inode(struct super_block *sb)
49862306a36Sopenharmony_ci{
49962306a36Sopenharmony_ci	struct mqueue_inode_info *ei;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	ei = alloc_inode_sb(sb, mqueue_inode_cachep, GFP_KERNEL);
50262306a36Sopenharmony_ci	if (!ei)
50362306a36Sopenharmony_ci		return NULL;
50462306a36Sopenharmony_ci	return &ei->vfs_inode;
50562306a36Sopenharmony_ci}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_cistatic void mqueue_free_inode(struct inode *inode)
50862306a36Sopenharmony_ci{
50962306a36Sopenharmony_ci	kmem_cache_free(mqueue_inode_cachep, MQUEUE_I(inode));
51062306a36Sopenharmony_ci}
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_cistatic void mqueue_evict_inode(struct inode *inode)
51362306a36Sopenharmony_ci{
51462306a36Sopenharmony_ci	struct mqueue_inode_info *info;
51562306a36Sopenharmony_ci	struct ipc_namespace *ipc_ns;
51662306a36Sopenharmony_ci	struct msg_msg *msg, *nmsg;
51762306a36Sopenharmony_ci	LIST_HEAD(tmp_msg);
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	clear_inode(inode);
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci	if (S_ISDIR(inode->i_mode))
52262306a36Sopenharmony_ci		return;
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci	ipc_ns = get_ns_from_inode(inode);
52562306a36Sopenharmony_ci	info = MQUEUE_I(inode);
52662306a36Sopenharmony_ci	spin_lock(&info->lock);
52762306a36Sopenharmony_ci	while ((msg = msg_get(info)) != NULL)
52862306a36Sopenharmony_ci		list_add_tail(&msg->m_list, &tmp_msg);
52962306a36Sopenharmony_ci	kfree(info->node_cache);
53062306a36Sopenharmony_ci	spin_unlock(&info->lock);
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	list_for_each_entry_safe(msg, nmsg, &tmp_msg, m_list) {
53362306a36Sopenharmony_ci		list_del(&msg->m_list);
53462306a36Sopenharmony_ci		free_msg(msg);
53562306a36Sopenharmony_ci	}
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	if (info->ucounts) {
53862306a36Sopenharmony_ci		unsigned long mq_bytes, mq_treesize;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci		/* Total amount of bytes accounted for the mqueue */
54162306a36Sopenharmony_ci		mq_treesize = info->attr.mq_maxmsg * sizeof(struct msg_msg) +
54262306a36Sopenharmony_ci			min_t(unsigned int, info->attr.mq_maxmsg, MQ_PRIO_MAX) *
54362306a36Sopenharmony_ci			sizeof(struct posix_msg_tree_node);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci		mq_bytes = mq_treesize + (info->attr.mq_maxmsg *
54662306a36Sopenharmony_ci					  info->attr.mq_msgsize);
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci		spin_lock(&mq_lock);
54962306a36Sopenharmony_ci		dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes);
55062306a36Sopenharmony_ci		/*
55162306a36Sopenharmony_ci		 * get_ns_from_inode() ensures that the
55262306a36Sopenharmony_ci		 * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns
55362306a36Sopenharmony_ci		 * to which we now hold a reference, or it is NULL.
55462306a36Sopenharmony_ci		 * We can't put it here under mq_lock, though.
55562306a36Sopenharmony_ci		 */
55662306a36Sopenharmony_ci		if (ipc_ns)
55762306a36Sopenharmony_ci			ipc_ns->mq_queues_count--;
55862306a36Sopenharmony_ci		spin_unlock(&mq_lock);
55962306a36Sopenharmony_ci		put_ucounts(info->ucounts);
56062306a36Sopenharmony_ci		info->ucounts = NULL;
56162306a36Sopenharmony_ci	}
56262306a36Sopenharmony_ci	if (ipc_ns)
56362306a36Sopenharmony_ci		put_ipc_ns(ipc_ns);
56462306a36Sopenharmony_ci}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_cistatic int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
56762306a36Sopenharmony_ci{
56862306a36Sopenharmony_ci	struct inode *dir = dentry->d_parent->d_inode;
56962306a36Sopenharmony_ci	struct inode *inode;
57062306a36Sopenharmony_ci	struct mq_attr *attr = arg;
57162306a36Sopenharmony_ci	int error;
57262306a36Sopenharmony_ci	struct ipc_namespace *ipc_ns;
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	spin_lock(&mq_lock);
57562306a36Sopenharmony_ci	ipc_ns = __get_ns_from_inode(dir);
57662306a36Sopenharmony_ci	if (!ipc_ns) {
57762306a36Sopenharmony_ci		error = -EACCES;
57862306a36Sopenharmony_ci		goto out_unlock;
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	if (ipc_ns->mq_queues_count >= ipc_ns->mq_queues_max &&
58262306a36Sopenharmony_ci	    !capable(CAP_SYS_RESOURCE)) {
58362306a36Sopenharmony_ci		error = -ENOSPC;
58462306a36Sopenharmony_ci		goto out_unlock;
58562306a36Sopenharmony_ci	}
58662306a36Sopenharmony_ci	ipc_ns->mq_queues_count++;
58762306a36Sopenharmony_ci	spin_unlock(&mq_lock);
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	inode = mqueue_get_inode(dir->i_sb, ipc_ns, mode, attr);
59062306a36Sopenharmony_ci	if (IS_ERR(inode)) {
59162306a36Sopenharmony_ci		error = PTR_ERR(inode);
59262306a36Sopenharmony_ci		spin_lock(&mq_lock);
59362306a36Sopenharmony_ci		ipc_ns->mq_queues_count--;
59462306a36Sopenharmony_ci		goto out_unlock;
59562306a36Sopenharmony_ci	}
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	put_ipc_ns(ipc_ns);
59862306a36Sopenharmony_ci	dir->i_size += DIRENT_SIZE;
59962306a36Sopenharmony_ci	dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	d_instantiate(dentry, inode);
60262306a36Sopenharmony_ci	dget(dentry);
60362306a36Sopenharmony_ci	return 0;
60462306a36Sopenharmony_ciout_unlock:
60562306a36Sopenharmony_ci	spin_unlock(&mq_lock);
60662306a36Sopenharmony_ci	if (ipc_ns)
60762306a36Sopenharmony_ci		put_ipc_ns(ipc_ns);
60862306a36Sopenharmony_ci	return error;
60962306a36Sopenharmony_ci}
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_cistatic int mqueue_create(struct mnt_idmap *idmap, struct inode *dir,
61262306a36Sopenharmony_ci			 struct dentry *dentry, umode_t mode, bool excl)
61362306a36Sopenharmony_ci{
61462306a36Sopenharmony_ci	return mqueue_create_attr(dentry, mode, NULL);
61562306a36Sopenharmony_ci}
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_cistatic int mqueue_unlink(struct inode *dir, struct dentry *dentry)
61862306a36Sopenharmony_ci{
61962306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
62262306a36Sopenharmony_ci	dir->i_size -= DIRENT_SIZE;
62362306a36Sopenharmony_ci	drop_nlink(inode);
62462306a36Sopenharmony_ci	dput(dentry);
62562306a36Sopenharmony_ci	return 0;
62662306a36Sopenharmony_ci}
62762306a36Sopenharmony_ci
62862306a36Sopenharmony_ci/*
62962306a36Sopenharmony_ci*	This is routine for system read from queue file.
63062306a36Sopenharmony_ci*	To avoid mess with doing here some sort of mq_receive we allow
63162306a36Sopenharmony_ci*	to read only queue size & notification info (the only values
63262306a36Sopenharmony_ci*	that are interesting from user point of view and aren't accessible
63362306a36Sopenharmony_ci*	through std routines)
63462306a36Sopenharmony_ci*/
63562306a36Sopenharmony_cistatic ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
63662306a36Sopenharmony_ci				size_t count, loff_t *off)
63762306a36Sopenharmony_ci{
63862306a36Sopenharmony_ci	struct inode *inode = file_inode(filp);
63962306a36Sopenharmony_ci	struct mqueue_inode_info *info = MQUEUE_I(inode);
64062306a36Sopenharmony_ci	char buffer[FILENT_SIZE];
64162306a36Sopenharmony_ci	ssize_t ret;
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	spin_lock(&info->lock);
64462306a36Sopenharmony_ci	snprintf(buffer, sizeof(buffer),
64562306a36Sopenharmony_ci			"QSIZE:%-10lu NOTIFY:%-5d SIGNO:%-5d NOTIFY_PID:%-6d\n",
64662306a36Sopenharmony_ci			info->qsize,
64762306a36Sopenharmony_ci			info->notify_owner ? info->notify.sigev_notify : 0,
64862306a36Sopenharmony_ci			(info->notify_owner &&
64962306a36Sopenharmony_ci			 info->notify.sigev_notify == SIGEV_SIGNAL) ?
65062306a36Sopenharmony_ci				info->notify.sigev_signo : 0,
65162306a36Sopenharmony_ci			pid_vnr(info->notify_owner));
65262306a36Sopenharmony_ci	spin_unlock(&info->lock);
65362306a36Sopenharmony_ci	buffer[sizeof(buffer)-1] = '\0';
65462306a36Sopenharmony_ci
65562306a36Sopenharmony_ci	ret = simple_read_from_buffer(u_data, count, off, buffer,
65662306a36Sopenharmony_ci				strlen(buffer));
65762306a36Sopenharmony_ci	if (ret <= 0)
65862306a36Sopenharmony_ci		return ret;
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	inode->i_atime = inode_set_ctime_current(inode);
66162306a36Sopenharmony_ci	return ret;
66262306a36Sopenharmony_ci}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_cistatic int mqueue_flush_file(struct file *filp, fl_owner_t id)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci	spin_lock(&info->lock);
66962306a36Sopenharmony_ci	if (task_tgid(current) == info->notify_owner)
67062306a36Sopenharmony_ci		remove_notification(info);
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	spin_unlock(&info->lock);
67362306a36Sopenharmony_ci	return 0;
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_cistatic __poll_t mqueue_poll_file(struct file *filp, struct poll_table_struct *poll_tab)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
67962306a36Sopenharmony_ci	__poll_t retval = 0;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	poll_wait(filp, &info->wait_q, poll_tab);
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	spin_lock(&info->lock);
68462306a36Sopenharmony_ci	if (info->attr.mq_curmsgs)
68562306a36Sopenharmony_ci		retval = EPOLLIN | EPOLLRDNORM;
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci	if (info->attr.mq_curmsgs < info->attr.mq_maxmsg)
68862306a36Sopenharmony_ci		retval |= EPOLLOUT | EPOLLWRNORM;
68962306a36Sopenharmony_ci	spin_unlock(&info->lock);
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	return retval;
69262306a36Sopenharmony_ci}
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci/* Adds current to info->e_wait_q[sr] before element with smaller prio */
69562306a36Sopenharmony_cistatic void wq_add(struct mqueue_inode_info *info, int sr,
69662306a36Sopenharmony_ci			struct ext_wait_queue *ewp)
69762306a36Sopenharmony_ci{
69862306a36Sopenharmony_ci	struct ext_wait_queue *walk;
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci	list_for_each_entry(walk, &info->e_wait_q[sr].list, list) {
70162306a36Sopenharmony_ci		if (walk->task->prio <= current->prio) {
70262306a36Sopenharmony_ci			list_add_tail(&ewp->list, &walk->list);
70362306a36Sopenharmony_ci			return;
70462306a36Sopenharmony_ci		}
70562306a36Sopenharmony_ci	}
70662306a36Sopenharmony_ci	list_add_tail(&ewp->list, &info->e_wait_q[sr].list);
70762306a36Sopenharmony_ci}
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci/*
71062306a36Sopenharmony_ci * Puts current task to sleep. Caller must hold queue lock. After return
71162306a36Sopenharmony_ci * lock isn't held.
71262306a36Sopenharmony_ci * sr: SEND or RECV
71362306a36Sopenharmony_ci */
71462306a36Sopenharmony_cistatic int wq_sleep(struct mqueue_inode_info *info, int sr,
71562306a36Sopenharmony_ci		    ktime_t *timeout, struct ext_wait_queue *ewp)
71662306a36Sopenharmony_ci	__releases(&info->lock)
71762306a36Sopenharmony_ci{
71862306a36Sopenharmony_ci	int retval;
71962306a36Sopenharmony_ci	signed long time;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	wq_add(info, sr, ewp);
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	for (;;) {
72462306a36Sopenharmony_ci		/* memory barrier not required, we hold info->lock */
72562306a36Sopenharmony_ci		__set_current_state(TASK_INTERRUPTIBLE);
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci		spin_unlock(&info->lock);
72862306a36Sopenharmony_ci		time = schedule_hrtimeout_range_clock(timeout, 0,
72962306a36Sopenharmony_ci			HRTIMER_MODE_ABS, CLOCK_REALTIME);
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci		if (READ_ONCE(ewp->state) == STATE_READY) {
73262306a36Sopenharmony_ci			/* see MQ_BARRIER for purpose/pairing */
73362306a36Sopenharmony_ci			smp_acquire__after_ctrl_dep();
73462306a36Sopenharmony_ci			retval = 0;
73562306a36Sopenharmony_ci			goto out;
73662306a36Sopenharmony_ci		}
73762306a36Sopenharmony_ci		spin_lock(&info->lock);
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci		/* we hold info->lock, so no memory barrier required */
74062306a36Sopenharmony_ci		if (READ_ONCE(ewp->state) == STATE_READY) {
74162306a36Sopenharmony_ci			retval = 0;
74262306a36Sopenharmony_ci			goto out_unlock;
74362306a36Sopenharmony_ci		}
74462306a36Sopenharmony_ci		if (signal_pending(current)) {
74562306a36Sopenharmony_ci			retval = -ERESTARTSYS;
74662306a36Sopenharmony_ci			break;
74762306a36Sopenharmony_ci		}
74862306a36Sopenharmony_ci		if (time == 0) {
74962306a36Sopenharmony_ci			retval = -ETIMEDOUT;
75062306a36Sopenharmony_ci			break;
75162306a36Sopenharmony_ci		}
75262306a36Sopenharmony_ci	}
75362306a36Sopenharmony_ci	list_del(&ewp->list);
75462306a36Sopenharmony_ciout_unlock:
75562306a36Sopenharmony_ci	spin_unlock(&info->lock);
75662306a36Sopenharmony_ciout:
75762306a36Sopenharmony_ci	return retval;
75862306a36Sopenharmony_ci}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci/*
76162306a36Sopenharmony_ci * Returns waiting task that should be serviced first or NULL if none exists
76262306a36Sopenharmony_ci */
76362306a36Sopenharmony_cistatic struct ext_wait_queue *wq_get_first_waiter(
76462306a36Sopenharmony_ci		struct mqueue_inode_info *info, int sr)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	struct list_head *ptr;
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	ptr = info->e_wait_q[sr].list.prev;
76962306a36Sopenharmony_ci	if (ptr == &info->e_wait_q[sr].list)
77062306a36Sopenharmony_ci		return NULL;
77162306a36Sopenharmony_ci	return list_entry(ptr, struct ext_wait_queue, list);
77262306a36Sopenharmony_ci}
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_cistatic inline void set_cookie(struct sk_buff *skb, char code)
77662306a36Sopenharmony_ci{
77762306a36Sopenharmony_ci	((char *)skb->data)[NOTIFY_COOKIE_LEN-1] = code;
77862306a36Sopenharmony_ci}
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci/*
78162306a36Sopenharmony_ci * The next function is only to split too long sys_mq_timedsend
78262306a36Sopenharmony_ci */
78362306a36Sopenharmony_cistatic void __do_notify(struct mqueue_inode_info *info)
78462306a36Sopenharmony_ci{
78562306a36Sopenharmony_ci	/* notification
78662306a36Sopenharmony_ci	 * invoked when there is registered process and there isn't process
78762306a36Sopenharmony_ci	 * waiting synchronously for message AND state of queue changed from
78862306a36Sopenharmony_ci	 * empty to not empty. Here we are sure that no one is waiting
78962306a36Sopenharmony_ci	 * synchronously. */
79062306a36Sopenharmony_ci	if (info->notify_owner &&
79162306a36Sopenharmony_ci	    info->attr.mq_curmsgs == 1) {
79262306a36Sopenharmony_ci		switch (info->notify.sigev_notify) {
79362306a36Sopenharmony_ci		case SIGEV_NONE:
79462306a36Sopenharmony_ci			break;
79562306a36Sopenharmony_ci		case SIGEV_SIGNAL: {
79662306a36Sopenharmony_ci			struct kernel_siginfo sig_i;
79762306a36Sopenharmony_ci			struct task_struct *task;
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci			/* do_mq_notify() accepts sigev_signo == 0, why?? */
80062306a36Sopenharmony_ci			if (!info->notify.sigev_signo)
80162306a36Sopenharmony_ci				break;
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci			clear_siginfo(&sig_i);
80462306a36Sopenharmony_ci			sig_i.si_signo = info->notify.sigev_signo;
80562306a36Sopenharmony_ci			sig_i.si_errno = 0;
80662306a36Sopenharmony_ci			sig_i.si_code = SI_MESGQ;
80762306a36Sopenharmony_ci			sig_i.si_value = info->notify.sigev_value;
80862306a36Sopenharmony_ci			rcu_read_lock();
80962306a36Sopenharmony_ci			/* map current pid/uid into info->owner's namespaces */
81062306a36Sopenharmony_ci			sig_i.si_pid = task_tgid_nr_ns(current,
81162306a36Sopenharmony_ci						ns_of_pid(info->notify_owner));
81262306a36Sopenharmony_ci			sig_i.si_uid = from_kuid_munged(info->notify_user_ns,
81362306a36Sopenharmony_ci						current_uid());
81462306a36Sopenharmony_ci			/*
81562306a36Sopenharmony_ci			 * We can't use kill_pid_info(), this signal should
81662306a36Sopenharmony_ci			 * bypass check_kill_permission(). It is from kernel
81762306a36Sopenharmony_ci			 * but si_fromuser() can't know this.
81862306a36Sopenharmony_ci			 * We do check the self_exec_id, to avoid sending
81962306a36Sopenharmony_ci			 * signals to programs that don't expect them.
82062306a36Sopenharmony_ci			 */
82162306a36Sopenharmony_ci			task = pid_task(info->notify_owner, PIDTYPE_TGID);
82262306a36Sopenharmony_ci			if (task && task->self_exec_id ==
82362306a36Sopenharmony_ci						info->notify_self_exec_id) {
82462306a36Sopenharmony_ci				do_send_sig_info(info->notify.sigev_signo,
82562306a36Sopenharmony_ci						&sig_i, task, PIDTYPE_TGID);
82662306a36Sopenharmony_ci			}
82762306a36Sopenharmony_ci			rcu_read_unlock();
82862306a36Sopenharmony_ci			break;
82962306a36Sopenharmony_ci		}
83062306a36Sopenharmony_ci		case SIGEV_THREAD:
83162306a36Sopenharmony_ci			set_cookie(info->notify_cookie, NOTIFY_WOKENUP);
83262306a36Sopenharmony_ci			netlink_sendskb(info->notify_sock, info->notify_cookie);
83362306a36Sopenharmony_ci			break;
83462306a36Sopenharmony_ci		}
83562306a36Sopenharmony_ci		/* after notification unregisters process */
83662306a36Sopenharmony_ci		put_pid(info->notify_owner);
83762306a36Sopenharmony_ci		put_user_ns(info->notify_user_ns);
83862306a36Sopenharmony_ci		info->notify_owner = NULL;
83962306a36Sopenharmony_ci		info->notify_user_ns = NULL;
84062306a36Sopenharmony_ci	}
84162306a36Sopenharmony_ci	wake_up(&info->wait_q);
84262306a36Sopenharmony_ci}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_cistatic int prepare_timeout(const struct __kernel_timespec __user *u_abs_timeout,
84562306a36Sopenharmony_ci			   struct timespec64 *ts)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	if (get_timespec64(ts, u_abs_timeout))
84862306a36Sopenharmony_ci		return -EFAULT;
84962306a36Sopenharmony_ci	if (!timespec64_valid(ts))
85062306a36Sopenharmony_ci		return -EINVAL;
85162306a36Sopenharmony_ci	return 0;
85262306a36Sopenharmony_ci}
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_cistatic void remove_notification(struct mqueue_inode_info *info)
85562306a36Sopenharmony_ci{
85662306a36Sopenharmony_ci	if (info->notify_owner != NULL &&
85762306a36Sopenharmony_ci	    info->notify.sigev_notify == SIGEV_THREAD) {
85862306a36Sopenharmony_ci		set_cookie(info->notify_cookie, NOTIFY_REMOVED);
85962306a36Sopenharmony_ci		netlink_sendskb(info->notify_sock, info->notify_cookie);
86062306a36Sopenharmony_ci	}
86162306a36Sopenharmony_ci	put_pid(info->notify_owner);
86262306a36Sopenharmony_ci	put_user_ns(info->notify_user_ns);
86362306a36Sopenharmony_ci	info->notify_owner = NULL;
86462306a36Sopenharmony_ci	info->notify_user_ns = NULL;
86562306a36Sopenharmony_ci}
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_cistatic int prepare_open(struct dentry *dentry, int oflag, int ro,
86862306a36Sopenharmony_ci			umode_t mode, struct filename *name,
86962306a36Sopenharmony_ci			struct mq_attr *attr)
87062306a36Sopenharmony_ci{
87162306a36Sopenharmony_ci	static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE,
87262306a36Sopenharmony_ci						  MAY_READ | MAY_WRITE };
87362306a36Sopenharmony_ci	int acc;
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	if (d_really_is_negative(dentry)) {
87662306a36Sopenharmony_ci		if (!(oflag & O_CREAT))
87762306a36Sopenharmony_ci			return -ENOENT;
87862306a36Sopenharmony_ci		if (ro)
87962306a36Sopenharmony_ci			return ro;
88062306a36Sopenharmony_ci		audit_inode_parent_hidden(name, dentry->d_parent);
88162306a36Sopenharmony_ci		return vfs_mkobj(dentry, mode & ~current_umask(),
88262306a36Sopenharmony_ci				  mqueue_create_attr, attr);
88362306a36Sopenharmony_ci	}
88462306a36Sopenharmony_ci	/* it already existed */
88562306a36Sopenharmony_ci	audit_inode(name, dentry, 0);
88662306a36Sopenharmony_ci	if ((oflag & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
88762306a36Sopenharmony_ci		return -EEXIST;
88862306a36Sopenharmony_ci	if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
88962306a36Sopenharmony_ci		return -EINVAL;
89062306a36Sopenharmony_ci	acc = oflag2acc[oflag & O_ACCMODE];
89162306a36Sopenharmony_ci	return inode_permission(&nop_mnt_idmap, d_inode(dentry), acc);
89262306a36Sopenharmony_ci}
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_cistatic int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
89562306a36Sopenharmony_ci		      struct mq_attr *attr)
89662306a36Sopenharmony_ci{
89762306a36Sopenharmony_ci	struct vfsmount *mnt = current->nsproxy->ipc_ns->mq_mnt;
89862306a36Sopenharmony_ci	struct dentry *root = mnt->mnt_root;
89962306a36Sopenharmony_ci	struct filename *name;
90062306a36Sopenharmony_ci	struct path path;
90162306a36Sopenharmony_ci	int fd, error;
90262306a36Sopenharmony_ci	int ro;
90362306a36Sopenharmony_ci
90462306a36Sopenharmony_ci	audit_mq_open(oflag, mode, attr);
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	if (IS_ERR(name = getname(u_name)))
90762306a36Sopenharmony_ci		return PTR_ERR(name);
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	fd = get_unused_fd_flags(O_CLOEXEC);
91062306a36Sopenharmony_ci	if (fd < 0)
91162306a36Sopenharmony_ci		goto out_putname;
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	ro = mnt_want_write(mnt);	/* we'll drop it in any case */
91462306a36Sopenharmony_ci	inode_lock(d_inode(root));
91562306a36Sopenharmony_ci	path.dentry = lookup_one_len(name->name, root, strlen(name->name));
91662306a36Sopenharmony_ci	if (IS_ERR(path.dentry)) {
91762306a36Sopenharmony_ci		error = PTR_ERR(path.dentry);
91862306a36Sopenharmony_ci		goto out_putfd;
91962306a36Sopenharmony_ci	}
92062306a36Sopenharmony_ci	path.mnt = mntget(mnt);
92162306a36Sopenharmony_ci	error = prepare_open(path.dentry, oflag, ro, mode, name, attr);
92262306a36Sopenharmony_ci	if (!error) {
92362306a36Sopenharmony_ci		struct file *file = dentry_open(&path, oflag, current_cred());
92462306a36Sopenharmony_ci		if (!IS_ERR(file))
92562306a36Sopenharmony_ci			fd_install(fd, file);
92662306a36Sopenharmony_ci		else
92762306a36Sopenharmony_ci			error = PTR_ERR(file);
92862306a36Sopenharmony_ci	}
92962306a36Sopenharmony_ci	path_put(&path);
93062306a36Sopenharmony_ciout_putfd:
93162306a36Sopenharmony_ci	if (error) {
93262306a36Sopenharmony_ci		put_unused_fd(fd);
93362306a36Sopenharmony_ci		fd = error;
93462306a36Sopenharmony_ci	}
93562306a36Sopenharmony_ci	inode_unlock(d_inode(root));
93662306a36Sopenharmony_ci	if (!ro)
93762306a36Sopenharmony_ci		mnt_drop_write(mnt);
93862306a36Sopenharmony_ciout_putname:
93962306a36Sopenharmony_ci	putname(name);
94062306a36Sopenharmony_ci	return fd;
94162306a36Sopenharmony_ci}
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ciSYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode,
94462306a36Sopenharmony_ci		struct mq_attr __user *, u_attr)
94562306a36Sopenharmony_ci{
94662306a36Sopenharmony_ci	struct mq_attr attr;
94762306a36Sopenharmony_ci	if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr)))
94862306a36Sopenharmony_ci		return -EFAULT;
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	return do_mq_open(u_name, oflag, mode, u_attr ? &attr : NULL);
95162306a36Sopenharmony_ci}
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ciSYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
95462306a36Sopenharmony_ci{
95562306a36Sopenharmony_ci	int err;
95662306a36Sopenharmony_ci	struct filename *name;
95762306a36Sopenharmony_ci	struct dentry *dentry;
95862306a36Sopenharmony_ci	struct inode *inode = NULL;
95962306a36Sopenharmony_ci	struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
96062306a36Sopenharmony_ci	struct vfsmount *mnt = ipc_ns->mq_mnt;
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci	name = getname(u_name);
96362306a36Sopenharmony_ci	if (IS_ERR(name))
96462306a36Sopenharmony_ci		return PTR_ERR(name);
96562306a36Sopenharmony_ci
96662306a36Sopenharmony_ci	audit_inode_parent_hidden(name, mnt->mnt_root);
96762306a36Sopenharmony_ci	err = mnt_want_write(mnt);
96862306a36Sopenharmony_ci	if (err)
96962306a36Sopenharmony_ci		goto out_name;
97062306a36Sopenharmony_ci	inode_lock_nested(d_inode(mnt->mnt_root), I_MUTEX_PARENT);
97162306a36Sopenharmony_ci	dentry = lookup_one_len(name->name, mnt->mnt_root,
97262306a36Sopenharmony_ci				strlen(name->name));
97362306a36Sopenharmony_ci	if (IS_ERR(dentry)) {
97462306a36Sopenharmony_ci		err = PTR_ERR(dentry);
97562306a36Sopenharmony_ci		goto out_unlock;
97662306a36Sopenharmony_ci	}
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci	inode = d_inode(dentry);
97962306a36Sopenharmony_ci	if (!inode) {
98062306a36Sopenharmony_ci		err = -ENOENT;
98162306a36Sopenharmony_ci	} else {
98262306a36Sopenharmony_ci		ihold(inode);
98362306a36Sopenharmony_ci		err = vfs_unlink(&nop_mnt_idmap, d_inode(dentry->d_parent),
98462306a36Sopenharmony_ci				 dentry, NULL);
98562306a36Sopenharmony_ci	}
98662306a36Sopenharmony_ci	dput(dentry);
98762306a36Sopenharmony_ci
98862306a36Sopenharmony_ciout_unlock:
98962306a36Sopenharmony_ci	inode_unlock(d_inode(mnt->mnt_root));
99062306a36Sopenharmony_ci	iput(inode);
99162306a36Sopenharmony_ci	mnt_drop_write(mnt);
99262306a36Sopenharmony_ciout_name:
99362306a36Sopenharmony_ci	putname(name);
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_ci	return err;
99662306a36Sopenharmony_ci}
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci/* Pipelined send and receive functions.
99962306a36Sopenharmony_ci *
100062306a36Sopenharmony_ci * If a receiver finds no waiting message, then it registers itself in the
100162306a36Sopenharmony_ci * list of waiting receivers. A sender checks that list before adding the new
100262306a36Sopenharmony_ci * message into the message array. If there is a waiting receiver, then it
100362306a36Sopenharmony_ci * bypasses the message array and directly hands the message over to the
100462306a36Sopenharmony_ci * receiver. The receiver accepts the message and returns without grabbing the
100562306a36Sopenharmony_ci * queue spinlock:
100662306a36Sopenharmony_ci *
100762306a36Sopenharmony_ci * - Set pointer to message.
100862306a36Sopenharmony_ci * - Queue the receiver task for later wakeup (without the info->lock).
100962306a36Sopenharmony_ci * - Update its state to STATE_READY. Now the receiver can continue.
101062306a36Sopenharmony_ci * - Wake up the process after the lock is dropped. Should the process wake up
101162306a36Sopenharmony_ci *   before this wakeup (due to a timeout or a signal) it will either see
101262306a36Sopenharmony_ci *   STATE_READY and continue or acquire the lock to check the state again.
101362306a36Sopenharmony_ci *
101462306a36Sopenharmony_ci * The same algorithm is used for senders.
101562306a36Sopenharmony_ci */
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_cistatic inline void __pipelined_op(struct wake_q_head *wake_q,
101862306a36Sopenharmony_ci				  struct mqueue_inode_info *info,
101962306a36Sopenharmony_ci				  struct ext_wait_queue *this)
102062306a36Sopenharmony_ci{
102162306a36Sopenharmony_ci	struct task_struct *task;
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci	list_del(&this->list);
102462306a36Sopenharmony_ci	task = get_task_struct(this->task);
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	/* see MQ_BARRIER for purpose/pairing */
102762306a36Sopenharmony_ci	smp_store_release(&this->state, STATE_READY);
102862306a36Sopenharmony_ci	wake_q_add_safe(wake_q, task);
102962306a36Sopenharmony_ci}
103062306a36Sopenharmony_ci
103162306a36Sopenharmony_ci/* pipelined_send() - send a message directly to the task waiting in
103262306a36Sopenharmony_ci * sys_mq_timedreceive() (without inserting message into a queue).
103362306a36Sopenharmony_ci */
103462306a36Sopenharmony_cistatic inline void pipelined_send(struct wake_q_head *wake_q,
103562306a36Sopenharmony_ci				  struct mqueue_inode_info *info,
103662306a36Sopenharmony_ci				  struct msg_msg *message,
103762306a36Sopenharmony_ci				  struct ext_wait_queue *receiver)
103862306a36Sopenharmony_ci{
103962306a36Sopenharmony_ci	receiver->msg = message;
104062306a36Sopenharmony_ci	__pipelined_op(wake_q, info, receiver);
104162306a36Sopenharmony_ci}
104262306a36Sopenharmony_ci
104362306a36Sopenharmony_ci/* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
104462306a36Sopenharmony_ci * gets its message and put to the queue (we have one free place for sure). */
104562306a36Sopenharmony_cistatic inline void pipelined_receive(struct wake_q_head *wake_q,
104662306a36Sopenharmony_ci				     struct mqueue_inode_info *info)
104762306a36Sopenharmony_ci{
104862306a36Sopenharmony_ci	struct ext_wait_queue *sender = wq_get_first_waiter(info, SEND);
104962306a36Sopenharmony_ci
105062306a36Sopenharmony_ci	if (!sender) {
105162306a36Sopenharmony_ci		/* for poll */
105262306a36Sopenharmony_ci		wake_up_interruptible(&info->wait_q);
105362306a36Sopenharmony_ci		return;
105462306a36Sopenharmony_ci	}
105562306a36Sopenharmony_ci	if (msg_insert(sender->msg, info))
105662306a36Sopenharmony_ci		return;
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci	__pipelined_op(wake_q, info, sender);
105962306a36Sopenharmony_ci}
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_cistatic int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
106262306a36Sopenharmony_ci		size_t msg_len, unsigned int msg_prio,
106362306a36Sopenharmony_ci		struct timespec64 *ts)
106462306a36Sopenharmony_ci{
106562306a36Sopenharmony_ci	struct fd f;
106662306a36Sopenharmony_ci	struct inode *inode;
106762306a36Sopenharmony_ci	struct ext_wait_queue wait;
106862306a36Sopenharmony_ci	struct ext_wait_queue *receiver;
106962306a36Sopenharmony_ci	struct msg_msg *msg_ptr;
107062306a36Sopenharmony_ci	struct mqueue_inode_info *info;
107162306a36Sopenharmony_ci	ktime_t expires, *timeout = NULL;
107262306a36Sopenharmony_ci	struct posix_msg_tree_node *new_leaf = NULL;
107362306a36Sopenharmony_ci	int ret = 0;
107462306a36Sopenharmony_ci	DEFINE_WAKE_Q(wake_q);
107562306a36Sopenharmony_ci
107662306a36Sopenharmony_ci	if (unlikely(msg_prio >= (unsigned long) MQ_PRIO_MAX))
107762306a36Sopenharmony_ci		return -EINVAL;
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci	if (ts) {
108062306a36Sopenharmony_ci		expires = timespec64_to_ktime(*ts);
108162306a36Sopenharmony_ci		timeout = &expires;
108262306a36Sopenharmony_ci	}
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	audit_mq_sendrecv(mqdes, msg_len, msg_prio, ts);
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	f = fdget(mqdes);
108762306a36Sopenharmony_ci	if (unlikely(!f.file)) {
108862306a36Sopenharmony_ci		ret = -EBADF;
108962306a36Sopenharmony_ci		goto out;
109062306a36Sopenharmony_ci	}
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci	inode = file_inode(f.file);
109362306a36Sopenharmony_ci	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
109462306a36Sopenharmony_ci		ret = -EBADF;
109562306a36Sopenharmony_ci		goto out_fput;
109662306a36Sopenharmony_ci	}
109762306a36Sopenharmony_ci	info = MQUEUE_I(inode);
109862306a36Sopenharmony_ci	audit_file(f.file);
109962306a36Sopenharmony_ci
110062306a36Sopenharmony_ci	if (unlikely(!(f.file->f_mode & FMODE_WRITE))) {
110162306a36Sopenharmony_ci		ret = -EBADF;
110262306a36Sopenharmony_ci		goto out_fput;
110362306a36Sopenharmony_ci	}
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci	if (unlikely(msg_len > info->attr.mq_msgsize)) {
110662306a36Sopenharmony_ci		ret = -EMSGSIZE;
110762306a36Sopenharmony_ci		goto out_fput;
110862306a36Sopenharmony_ci	}
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	/* First try to allocate memory, before doing anything with
111162306a36Sopenharmony_ci	 * existing queues. */
111262306a36Sopenharmony_ci	msg_ptr = load_msg(u_msg_ptr, msg_len);
111362306a36Sopenharmony_ci	if (IS_ERR(msg_ptr)) {
111462306a36Sopenharmony_ci		ret = PTR_ERR(msg_ptr);
111562306a36Sopenharmony_ci		goto out_fput;
111662306a36Sopenharmony_ci	}
111762306a36Sopenharmony_ci	msg_ptr->m_ts = msg_len;
111862306a36Sopenharmony_ci	msg_ptr->m_type = msg_prio;
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci	/*
112162306a36Sopenharmony_ci	 * msg_insert really wants us to have a valid, spare node struct so
112262306a36Sopenharmony_ci	 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
112362306a36Sopenharmony_ci	 * fall back to that if necessary.
112462306a36Sopenharmony_ci	 */
112562306a36Sopenharmony_ci	if (!info->node_cache)
112662306a36Sopenharmony_ci		new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	spin_lock(&info->lock);
112962306a36Sopenharmony_ci
113062306a36Sopenharmony_ci	if (!info->node_cache && new_leaf) {
113162306a36Sopenharmony_ci		/* Save our speculative allocation into the cache */
113262306a36Sopenharmony_ci		INIT_LIST_HEAD(&new_leaf->msg_list);
113362306a36Sopenharmony_ci		info->node_cache = new_leaf;
113462306a36Sopenharmony_ci		new_leaf = NULL;
113562306a36Sopenharmony_ci	} else {
113662306a36Sopenharmony_ci		kfree(new_leaf);
113762306a36Sopenharmony_ci	}
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci	if (info->attr.mq_curmsgs == info->attr.mq_maxmsg) {
114062306a36Sopenharmony_ci		if (f.file->f_flags & O_NONBLOCK) {
114162306a36Sopenharmony_ci			ret = -EAGAIN;
114262306a36Sopenharmony_ci		} else {
114362306a36Sopenharmony_ci			wait.task = current;
114462306a36Sopenharmony_ci			wait.msg = (void *) msg_ptr;
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci			/* memory barrier not required, we hold info->lock */
114762306a36Sopenharmony_ci			WRITE_ONCE(wait.state, STATE_NONE);
114862306a36Sopenharmony_ci			ret = wq_sleep(info, SEND, timeout, &wait);
114962306a36Sopenharmony_ci			/*
115062306a36Sopenharmony_ci			 * wq_sleep must be called with info->lock held, and
115162306a36Sopenharmony_ci			 * returns with the lock released
115262306a36Sopenharmony_ci			 */
115362306a36Sopenharmony_ci			goto out_free;
115462306a36Sopenharmony_ci		}
115562306a36Sopenharmony_ci	} else {
115662306a36Sopenharmony_ci		receiver = wq_get_first_waiter(info, RECV);
115762306a36Sopenharmony_ci		if (receiver) {
115862306a36Sopenharmony_ci			pipelined_send(&wake_q, info, msg_ptr, receiver);
115962306a36Sopenharmony_ci		} else {
116062306a36Sopenharmony_ci			/* adds message to the queue */
116162306a36Sopenharmony_ci			ret = msg_insert(msg_ptr, info);
116262306a36Sopenharmony_ci			if (ret)
116362306a36Sopenharmony_ci				goto out_unlock;
116462306a36Sopenharmony_ci			__do_notify(info);
116562306a36Sopenharmony_ci		}
116662306a36Sopenharmony_ci		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
116762306a36Sopenharmony_ci	}
116862306a36Sopenharmony_ciout_unlock:
116962306a36Sopenharmony_ci	spin_unlock(&info->lock);
117062306a36Sopenharmony_ci	wake_up_q(&wake_q);
117162306a36Sopenharmony_ciout_free:
117262306a36Sopenharmony_ci	if (ret)
117362306a36Sopenharmony_ci		free_msg(msg_ptr);
117462306a36Sopenharmony_ciout_fput:
117562306a36Sopenharmony_ci	fdput(f);
117662306a36Sopenharmony_ciout:
117762306a36Sopenharmony_ci	return ret;
117862306a36Sopenharmony_ci}
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_cistatic int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
118162306a36Sopenharmony_ci		size_t msg_len, unsigned int __user *u_msg_prio,
118262306a36Sopenharmony_ci		struct timespec64 *ts)
118362306a36Sopenharmony_ci{
118462306a36Sopenharmony_ci	ssize_t ret;
118562306a36Sopenharmony_ci	struct msg_msg *msg_ptr;
118662306a36Sopenharmony_ci	struct fd f;
118762306a36Sopenharmony_ci	struct inode *inode;
118862306a36Sopenharmony_ci	struct mqueue_inode_info *info;
118962306a36Sopenharmony_ci	struct ext_wait_queue wait;
119062306a36Sopenharmony_ci	ktime_t expires, *timeout = NULL;
119162306a36Sopenharmony_ci	struct posix_msg_tree_node *new_leaf = NULL;
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci	if (ts) {
119462306a36Sopenharmony_ci		expires = timespec64_to_ktime(*ts);
119562306a36Sopenharmony_ci		timeout = &expires;
119662306a36Sopenharmony_ci	}
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci	audit_mq_sendrecv(mqdes, msg_len, 0, ts);
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci	f = fdget(mqdes);
120162306a36Sopenharmony_ci	if (unlikely(!f.file)) {
120262306a36Sopenharmony_ci		ret = -EBADF;
120362306a36Sopenharmony_ci		goto out;
120462306a36Sopenharmony_ci	}
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_ci	inode = file_inode(f.file);
120762306a36Sopenharmony_ci	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
120862306a36Sopenharmony_ci		ret = -EBADF;
120962306a36Sopenharmony_ci		goto out_fput;
121062306a36Sopenharmony_ci	}
121162306a36Sopenharmony_ci	info = MQUEUE_I(inode);
121262306a36Sopenharmony_ci	audit_file(f.file);
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci	if (unlikely(!(f.file->f_mode & FMODE_READ))) {
121562306a36Sopenharmony_ci		ret = -EBADF;
121662306a36Sopenharmony_ci		goto out_fput;
121762306a36Sopenharmony_ci	}
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	/* checks if buffer is big enough */
122062306a36Sopenharmony_ci	if (unlikely(msg_len < info->attr.mq_msgsize)) {
122162306a36Sopenharmony_ci		ret = -EMSGSIZE;
122262306a36Sopenharmony_ci		goto out_fput;
122362306a36Sopenharmony_ci	}
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci	/*
122662306a36Sopenharmony_ci	 * msg_insert really wants us to have a valid, spare node struct so
122762306a36Sopenharmony_ci	 * it doesn't have to kmalloc a GFP_ATOMIC allocation, but it will
122862306a36Sopenharmony_ci	 * fall back to that if necessary.
122962306a36Sopenharmony_ci	 */
123062306a36Sopenharmony_ci	if (!info->node_cache)
123162306a36Sopenharmony_ci		new_leaf = kmalloc(sizeof(*new_leaf), GFP_KERNEL);
123262306a36Sopenharmony_ci
123362306a36Sopenharmony_ci	spin_lock(&info->lock);
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	if (!info->node_cache && new_leaf) {
123662306a36Sopenharmony_ci		/* Save our speculative allocation into the cache */
123762306a36Sopenharmony_ci		INIT_LIST_HEAD(&new_leaf->msg_list);
123862306a36Sopenharmony_ci		info->node_cache = new_leaf;
123962306a36Sopenharmony_ci	} else {
124062306a36Sopenharmony_ci		kfree(new_leaf);
124162306a36Sopenharmony_ci	}
124262306a36Sopenharmony_ci
124362306a36Sopenharmony_ci	if (info->attr.mq_curmsgs == 0) {
124462306a36Sopenharmony_ci		if (f.file->f_flags & O_NONBLOCK) {
124562306a36Sopenharmony_ci			spin_unlock(&info->lock);
124662306a36Sopenharmony_ci			ret = -EAGAIN;
124762306a36Sopenharmony_ci		} else {
124862306a36Sopenharmony_ci			wait.task = current;
124962306a36Sopenharmony_ci
125062306a36Sopenharmony_ci			/* memory barrier not required, we hold info->lock */
125162306a36Sopenharmony_ci			WRITE_ONCE(wait.state, STATE_NONE);
125262306a36Sopenharmony_ci			ret = wq_sleep(info, RECV, timeout, &wait);
125362306a36Sopenharmony_ci			msg_ptr = wait.msg;
125462306a36Sopenharmony_ci		}
125562306a36Sopenharmony_ci	} else {
125662306a36Sopenharmony_ci		DEFINE_WAKE_Q(wake_q);
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci		msg_ptr = msg_get(info);
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci		inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci		/* There is now free space in queue. */
126362306a36Sopenharmony_ci		pipelined_receive(&wake_q, info);
126462306a36Sopenharmony_ci		spin_unlock(&info->lock);
126562306a36Sopenharmony_ci		wake_up_q(&wake_q);
126662306a36Sopenharmony_ci		ret = 0;
126762306a36Sopenharmony_ci	}
126862306a36Sopenharmony_ci	if (ret == 0) {
126962306a36Sopenharmony_ci		ret = msg_ptr->m_ts;
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci		if ((u_msg_prio && put_user(msg_ptr->m_type, u_msg_prio)) ||
127262306a36Sopenharmony_ci			store_msg(u_msg_ptr, msg_ptr, msg_ptr->m_ts)) {
127362306a36Sopenharmony_ci			ret = -EFAULT;
127462306a36Sopenharmony_ci		}
127562306a36Sopenharmony_ci		free_msg(msg_ptr);
127662306a36Sopenharmony_ci	}
127762306a36Sopenharmony_ciout_fput:
127862306a36Sopenharmony_ci	fdput(f);
127962306a36Sopenharmony_ciout:
128062306a36Sopenharmony_ci	return ret;
128162306a36Sopenharmony_ci}
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_ciSYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
128462306a36Sopenharmony_ci		size_t, msg_len, unsigned int, msg_prio,
128562306a36Sopenharmony_ci		const struct __kernel_timespec __user *, u_abs_timeout)
128662306a36Sopenharmony_ci{
128762306a36Sopenharmony_ci	struct timespec64 ts, *p = NULL;
128862306a36Sopenharmony_ci	if (u_abs_timeout) {
128962306a36Sopenharmony_ci		int res = prepare_timeout(u_abs_timeout, &ts);
129062306a36Sopenharmony_ci		if (res)
129162306a36Sopenharmony_ci			return res;
129262306a36Sopenharmony_ci		p = &ts;
129362306a36Sopenharmony_ci	}
129462306a36Sopenharmony_ci	return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
129562306a36Sopenharmony_ci}
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ciSYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
129862306a36Sopenharmony_ci		size_t, msg_len, unsigned int __user *, u_msg_prio,
129962306a36Sopenharmony_ci		const struct __kernel_timespec __user *, u_abs_timeout)
130062306a36Sopenharmony_ci{
130162306a36Sopenharmony_ci	struct timespec64 ts, *p = NULL;
130262306a36Sopenharmony_ci	if (u_abs_timeout) {
130362306a36Sopenharmony_ci		int res = prepare_timeout(u_abs_timeout, &ts);
130462306a36Sopenharmony_ci		if (res)
130562306a36Sopenharmony_ci			return res;
130662306a36Sopenharmony_ci		p = &ts;
130762306a36Sopenharmony_ci	}
130862306a36Sopenharmony_ci	return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
130962306a36Sopenharmony_ci}
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci/*
131262306a36Sopenharmony_ci * Notes: the case when user wants us to deregister (with NULL as pointer)
131362306a36Sopenharmony_ci * and he isn't currently owner of notification, will be silently discarded.
131462306a36Sopenharmony_ci * It isn't explicitly defined in the POSIX.
131562306a36Sopenharmony_ci */
131662306a36Sopenharmony_cistatic int do_mq_notify(mqd_t mqdes, const struct sigevent *notification)
131762306a36Sopenharmony_ci{
131862306a36Sopenharmony_ci	int ret;
131962306a36Sopenharmony_ci	struct fd f;
132062306a36Sopenharmony_ci	struct sock *sock;
132162306a36Sopenharmony_ci	struct inode *inode;
132262306a36Sopenharmony_ci	struct mqueue_inode_info *info;
132362306a36Sopenharmony_ci	struct sk_buff *nc;
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci	audit_mq_notify(mqdes, notification);
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	nc = NULL;
132862306a36Sopenharmony_ci	sock = NULL;
132962306a36Sopenharmony_ci	if (notification != NULL) {
133062306a36Sopenharmony_ci		if (unlikely(notification->sigev_notify != SIGEV_NONE &&
133162306a36Sopenharmony_ci			     notification->sigev_notify != SIGEV_SIGNAL &&
133262306a36Sopenharmony_ci			     notification->sigev_notify != SIGEV_THREAD))
133362306a36Sopenharmony_ci			return -EINVAL;
133462306a36Sopenharmony_ci		if (notification->sigev_notify == SIGEV_SIGNAL &&
133562306a36Sopenharmony_ci			!valid_signal(notification->sigev_signo)) {
133662306a36Sopenharmony_ci			return -EINVAL;
133762306a36Sopenharmony_ci		}
133862306a36Sopenharmony_ci		if (notification->sigev_notify == SIGEV_THREAD) {
133962306a36Sopenharmony_ci			long timeo;
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci			/* create the notify skb */
134262306a36Sopenharmony_ci			nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL);
134362306a36Sopenharmony_ci			if (!nc)
134462306a36Sopenharmony_ci				return -ENOMEM;
134562306a36Sopenharmony_ci
134662306a36Sopenharmony_ci			if (copy_from_user(nc->data,
134762306a36Sopenharmony_ci					notification->sigev_value.sival_ptr,
134862306a36Sopenharmony_ci					NOTIFY_COOKIE_LEN)) {
134962306a36Sopenharmony_ci				ret = -EFAULT;
135062306a36Sopenharmony_ci				goto free_skb;
135162306a36Sopenharmony_ci			}
135262306a36Sopenharmony_ci
135362306a36Sopenharmony_ci			/* TODO: add a header? */
135462306a36Sopenharmony_ci			skb_put(nc, NOTIFY_COOKIE_LEN);
135562306a36Sopenharmony_ci			/* and attach it to the socket */
135662306a36Sopenharmony_ciretry:
135762306a36Sopenharmony_ci			f = fdget(notification->sigev_signo);
135862306a36Sopenharmony_ci			if (!f.file) {
135962306a36Sopenharmony_ci				ret = -EBADF;
136062306a36Sopenharmony_ci				goto out;
136162306a36Sopenharmony_ci			}
136262306a36Sopenharmony_ci			sock = netlink_getsockbyfilp(f.file);
136362306a36Sopenharmony_ci			fdput(f);
136462306a36Sopenharmony_ci			if (IS_ERR(sock)) {
136562306a36Sopenharmony_ci				ret = PTR_ERR(sock);
136662306a36Sopenharmony_ci				goto free_skb;
136762306a36Sopenharmony_ci			}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci			timeo = MAX_SCHEDULE_TIMEOUT;
137062306a36Sopenharmony_ci			ret = netlink_attachskb(sock, nc, &timeo, NULL);
137162306a36Sopenharmony_ci			if (ret == 1) {
137262306a36Sopenharmony_ci				sock = NULL;
137362306a36Sopenharmony_ci				goto retry;
137462306a36Sopenharmony_ci			}
137562306a36Sopenharmony_ci			if (ret)
137662306a36Sopenharmony_ci				return ret;
137762306a36Sopenharmony_ci		}
137862306a36Sopenharmony_ci	}
137962306a36Sopenharmony_ci
138062306a36Sopenharmony_ci	f = fdget(mqdes);
138162306a36Sopenharmony_ci	if (!f.file) {
138262306a36Sopenharmony_ci		ret = -EBADF;
138362306a36Sopenharmony_ci		goto out;
138462306a36Sopenharmony_ci	}
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_ci	inode = file_inode(f.file);
138762306a36Sopenharmony_ci	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
138862306a36Sopenharmony_ci		ret = -EBADF;
138962306a36Sopenharmony_ci		goto out_fput;
139062306a36Sopenharmony_ci	}
139162306a36Sopenharmony_ci	info = MQUEUE_I(inode);
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	ret = 0;
139462306a36Sopenharmony_ci	spin_lock(&info->lock);
139562306a36Sopenharmony_ci	if (notification == NULL) {
139662306a36Sopenharmony_ci		if (info->notify_owner == task_tgid(current)) {
139762306a36Sopenharmony_ci			remove_notification(info);
139862306a36Sopenharmony_ci			inode->i_atime = inode_set_ctime_current(inode);
139962306a36Sopenharmony_ci		}
140062306a36Sopenharmony_ci	} else if (info->notify_owner != NULL) {
140162306a36Sopenharmony_ci		ret = -EBUSY;
140262306a36Sopenharmony_ci	} else {
140362306a36Sopenharmony_ci		switch (notification->sigev_notify) {
140462306a36Sopenharmony_ci		case SIGEV_NONE:
140562306a36Sopenharmony_ci			info->notify.sigev_notify = SIGEV_NONE;
140662306a36Sopenharmony_ci			break;
140762306a36Sopenharmony_ci		case SIGEV_THREAD:
140862306a36Sopenharmony_ci			info->notify_sock = sock;
140962306a36Sopenharmony_ci			info->notify_cookie = nc;
141062306a36Sopenharmony_ci			sock = NULL;
141162306a36Sopenharmony_ci			nc = NULL;
141262306a36Sopenharmony_ci			info->notify.sigev_notify = SIGEV_THREAD;
141362306a36Sopenharmony_ci			break;
141462306a36Sopenharmony_ci		case SIGEV_SIGNAL:
141562306a36Sopenharmony_ci			info->notify.sigev_signo = notification->sigev_signo;
141662306a36Sopenharmony_ci			info->notify.sigev_value = notification->sigev_value;
141762306a36Sopenharmony_ci			info->notify.sigev_notify = SIGEV_SIGNAL;
141862306a36Sopenharmony_ci			info->notify_self_exec_id = current->self_exec_id;
141962306a36Sopenharmony_ci			break;
142062306a36Sopenharmony_ci		}
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_ci		info->notify_owner = get_pid(task_tgid(current));
142362306a36Sopenharmony_ci		info->notify_user_ns = get_user_ns(current_user_ns());
142462306a36Sopenharmony_ci		inode->i_atime = inode_set_ctime_current(inode);
142562306a36Sopenharmony_ci	}
142662306a36Sopenharmony_ci	spin_unlock(&info->lock);
142762306a36Sopenharmony_ciout_fput:
142862306a36Sopenharmony_ci	fdput(f);
142962306a36Sopenharmony_ciout:
143062306a36Sopenharmony_ci	if (sock)
143162306a36Sopenharmony_ci		netlink_detachskb(sock, nc);
143262306a36Sopenharmony_ci	else
143362306a36Sopenharmony_cifree_skb:
143462306a36Sopenharmony_ci		dev_kfree_skb(nc);
143562306a36Sopenharmony_ci
143662306a36Sopenharmony_ci	return ret;
143762306a36Sopenharmony_ci}
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ciSYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
144062306a36Sopenharmony_ci		const struct sigevent __user *, u_notification)
144162306a36Sopenharmony_ci{
144262306a36Sopenharmony_ci	struct sigevent n, *p = NULL;
144362306a36Sopenharmony_ci	if (u_notification) {
144462306a36Sopenharmony_ci		if (copy_from_user(&n, u_notification, sizeof(struct sigevent)))
144562306a36Sopenharmony_ci			return -EFAULT;
144662306a36Sopenharmony_ci		p = &n;
144762306a36Sopenharmony_ci	}
144862306a36Sopenharmony_ci	return do_mq_notify(mqdes, p);
144962306a36Sopenharmony_ci}
145062306a36Sopenharmony_ci
145162306a36Sopenharmony_cistatic int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
145262306a36Sopenharmony_ci{
145362306a36Sopenharmony_ci	struct fd f;
145462306a36Sopenharmony_ci	struct inode *inode;
145562306a36Sopenharmony_ci	struct mqueue_inode_info *info;
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci	if (new && (new->mq_flags & (~O_NONBLOCK)))
145862306a36Sopenharmony_ci		return -EINVAL;
145962306a36Sopenharmony_ci
146062306a36Sopenharmony_ci	f = fdget(mqdes);
146162306a36Sopenharmony_ci	if (!f.file)
146262306a36Sopenharmony_ci		return -EBADF;
146362306a36Sopenharmony_ci
146462306a36Sopenharmony_ci	if (unlikely(f.file->f_op != &mqueue_file_operations)) {
146562306a36Sopenharmony_ci		fdput(f);
146662306a36Sopenharmony_ci		return -EBADF;
146762306a36Sopenharmony_ci	}
146862306a36Sopenharmony_ci
146962306a36Sopenharmony_ci	inode = file_inode(f.file);
147062306a36Sopenharmony_ci	info = MQUEUE_I(inode);
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_ci	spin_lock(&info->lock);
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci	if (old) {
147562306a36Sopenharmony_ci		*old = info->attr;
147662306a36Sopenharmony_ci		old->mq_flags = f.file->f_flags & O_NONBLOCK;
147762306a36Sopenharmony_ci	}
147862306a36Sopenharmony_ci	if (new) {
147962306a36Sopenharmony_ci		audit_mq_getsetattr(mqdes, new);
148062306a36Sopenharmony_ci		spin_lock(&f.file->f_lock);
148162306a36Sopenharmony_ci		if (new->mq_flags & O_NONBLOCK)
148262306a36Sopenharmony_ci			f.file->f_flags |= O_NONBLOCK;
148362306a36Sopenharmony_ci		else
148462306a36Sopenharmony_ci			f.file->f_flags &= ~O_NONBLOCK;
148562306a36Sopenharmony_ci		spin_unlock(&f.file->f_lock);
148662306a36Sopenharmony_ci
148762306a36Sopenharmony_ci		inode->i_atime = inode_set_ctime_current(inode);
148862306a36Sopenharmony_ci	}
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci	spin_unlock(&info->lock);
149162306a36Sopenharmony_ci	fdput(f);
149262306a36Sopenharmony_ci	return 0;
149362306a36Sopenharmony_ci}
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ciSYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
149662306a36Sopenharmony_ci		const struct mq_attr __user *, u_mqstat,
149762306a36Sopenharmony_ci		struct mq_attr __user *, u_omqstat)
149862306a36Sopenharmony_ci{
149962306a36Sopenharmony_ci	int ret;
150062306a36Sopenharmony_ci	struct mq_attr mqstat, omqstat;
150162306a36Sopenharmony_ci	struct mq_attr *new = NULL, *old = NULL;
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_ci	if (u_mqstat) {
150462306a36Sopenharmony_ci		new = &mqstat;
150562306a36Sopenharmony_ci		if (copy_from_user(new, u_mqstat, sizeof(struct mq_attr)))
150662306a36Sopenharmony_ci			return -EFAULT;
150762306a36Sopenharmony_ci	}
150862306a36Sopenharmony_ci	if (u_omqstat)
150962306a36Sopenharmony_ci		old = &omqstat;
151062306a36Sopenharmony_ci
151162306a36Sopenharmony_ci	ret = do_mq_getsetattr(mqdes, new, old);
151262306a36Sopenharmony_ci	if (ret || !old)
151362306a36Sopenharmony_ci		return ret;
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	if (copy_to_user(u_omqstat, old, sizeof(struct mq_attr)))
151662306a36Sopenharmony_ci		return -EFAULT;
151762306a36Sopenharmony_ci	return 0;
151862306a36Sopenharmony_ci}
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci#ifdef CONFIG_COMPAT
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_cistruct compat_mq_attr {
152362306a36Sopenharmony_ci	compat_long_t mq_flags;      /* message queue flags		     */
152462306a36Sopenharmony_ci	compat_long_t mq_maxmsg;     /* maximum number of messages	     */
152562306a36Sopenharmony_ci	compat_long_t mq_msgsize;    /* maximum message size		     */
152662306a36Sopenharmony_ci	compat_long_t mq_curmsgs;    /* number of messages currently queued  */
152762306a36Sopenharmony_ci	compat_long_t __reserved[4]; /* ignored for input, zeroed for output */
152862306a36Sopenharmony_ci};
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_cistatic inline int get_compat_mq_attr(struct mq_attr *attr,
153162306a36Sopenharmony_ci			const struct compat_mq_attr __user *uattr)
153262306a36Sopenharmony_ci{
153362306a36Sopenharmony_ci	struct compat_mq_attr v;
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_ci	if (copy_from_user(&v, uattr, sizeof(*uattr)))
153662306a36Sopenharmony_ci		return -EFAULT;
153762306a36Sopenharmony_ci
153862306a36Sopenharmony_ci	memset(attr, 0, sizeof(*attr));
153962306a36Sopenharmony_ci	attr->mq_flags = v.mq_flags;
154062306a36Sopenharmony_ci	attr->mq_maxmsg = v.mq_maxmsg;
154162306a36Sopenharmony_ci	attr->mq_msgsize = v.mq_msgsize;
154262306a36Sopenharmony_ci	attr->mq_curmsgs = v.mq_curmsgs;
154362306a36Sopenharmony_ci	return 0;
154462306a36Sopenharmony_ci}
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_cistatic inline int put_compat_mq_attr(const struct mq_attr *attr,
154762306a36Sopenharmony_ci			struct compat_mq_attr __user *uattr)
154862306a36Sopenharmony_ci{
154962306a36Sopenharmony_ci	struct compat_mq_attr v;
155062306a36Sopenharmony_ci
155162306a36Sopenharmony_ci	memset(&v, 0, sizeof(v));
155262306a36Sopenharmony_ci	v.mq_flags = attr->mq_flags;
155362306a36Sopenharmony_ci	v.mq_maxmsg = attr->mq_maxmsg;
155462306a36Sopenharmony_ci	v.mq_msgsize = attr->mq_msgsize;
155562306a36Sopenharmony_ci	v.mq_curmsgs = attr->mq_curmsgs;
155662306a36Sopenharmony_ci	if (copy_to_user(uattr, &v, sizeof(*uattr)))
155762306a36Sopenharmony_ci		return -EFAULT;
155862306a36Sopenharmony_ci	return 0;
155962306a36Sopenharmony_ci}
156062306a36Sopenharmony_ci
156162306a36Sopenharmony_ciCOMPAT_SYSCALL_DEFINE4(mq_open, const char __user *, u_name,
156262306a36Sopenharmony_ci		       int, oflag, compat_mode_t, mode,
156362306a36Sopenharmony_ci		       struct compat_mq_attr __user *, u_attr)
156462306a36Sopenharmony_ci{
156562306a36Sopenharmony_ci	struct mq_attr attr, *p = NULL;
156662306a36Sopenharmony_ci	if (u_attr && oflag & O_CREAT) {
156762306a36Sopenharmony_ci		p = &attr;
156862306a36Sopenharmony_ci		if (get_compat_mq_attr(&attr, u_attr))
156962306a36Sopenharmony_ci			return -EFAULT;
157062306a36Sopenharmony_ci	}
157162306a36Sopenharmony_ci	return do_mq_open(u_name, oflag, mode, p);
157262306a36Sopenharmony_ci}
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ciCOMPAT_SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes,
157562306a36Sopenharmony_ci		       const struct compat_sigevent __user *, u_notification)
157662306a36Sopenharmony_ci{
157762306a36Sopenharmony_ci	struct sigevent n, *p = NULL;
157862306a36Sopenharmony_ci	if (u_notification) {
157962306a36Sopenharmony_ci		if (get_compat_sigevent(&n, u_notification))
158062306a36Sopenharmony_ci			return -EFAULT;
158162306a36Sopenharmony_ci		if (n.sigev_notify == SIGEV_THREAD)
158262306a36Sopenharmony_ci			n.sigev_value.sival_ptr = compat_ptr(n.sigev_value.sival_int);
158362306a36Sopenharmony_ci		p = &n;
158462306a36Sopenharmony_ci	}
158562306a36Sopenharmony_ci	return do_mq_notify(mqdes, p);
158662306a36Sopenharmony_ci}
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ciCOMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
158962306a36Sopenharmony_ci		       const struct compat_mq_attr __user *, u_mqstat,
159062306a36Sopenharmony_ci		       struct compat_mq_attr __user *, u_omqstat)
159162306a36Sopenharmony_ci{
159262306a36Sopenharmony_ci	int ret;
159362306a36Sopenharmony_ci	struct mq_attr mqstat, omqstat;
159462306a36Sopenharmony_ci	struct mq_attr *new = NULL, *old = NULL;
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci	if (u_mqstat) {
159762306a36Sopenharmony_ci		new = &mqstat;
159862306a36Sopenharmony_ci		if (get_compat_mq_attr(new, u_mqstat))
159962306a36Sopenharmony_ci			return -EFAULT;
160062306a36Sopenharmony_ci	}
160162306a36Sopenharmony_ci	if (u_omqstat)
160262306a36Sopenharmony_ci		old = &omqstat;
160362306a36Sopenharmony_ci
160462306a36Sopenharmony_ci	ret = do_mq_getsetattr(mqdes, new, old);
160562306a36Sopenharmony_ci	if (ret || !old)
160662306a36Sopenharmony_ci		return ret;
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	if (put_compat_mq_attr(old, u_omqstat))
160962306a36Sopenharmony_ci		return -EFAULT;
161062306a36Sopenharmony_ci	return 0;
161162306a36Sopenharmony_ci}
161262306a36Sopenharmony_ci#endif
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci#ifdef CONFIG_COMPAT_32BIT_TIME
161562306a36Sopenharmony_cistatic int compat_prepare_timeout(const struct old_timespec32 __user *p,
161662306a36Sopenharmony_ci				   struct timespec64 *ts)
161762306a36Sopenharmony_ci{
161862306a36Sopenharmony_ci	if (get_old_timespec32(ts, p))
161962306a36Sopenharmony_ci		return -EFAULT;
162062306a36Sopenharmony_ci	if (!timespec64_valid(ts))
162162306a36Sopenharmony_ci		return -EINVAL;
162262306a36Sopenharmony_ci	return 0;
162362306a36Sopenharmony_ci}
162462306a36Sopenharmony_ci
162562306a36Sopenharmony_ciSYSCALL_DEFINE5(mq_timedsend_time32, mqd_t, mqdes,
162662306a36Sopenharmony_ci		const char __user *, u_msg_ptr,
162762306a36Sopenharmony_ci		unsigned int, msg_len, unsigned int, msg_prio,
162862306a36Sopenharmony_ci		const struct old_timespec32 __user *, u_abs_timeout)
162962306a36Sopenharmony_ci{
163062306a36Sopenharmony_ci	struct timespec64 ts, *p = NULL;
163162306a36Sopenharmony_ci	if (u_abs_timeout) {
163262306a36Sopenharmony_ci		int res = compat_prepare_timeout(u_abs_timeout, &ts);
163362306a36Sopenharmony_ci		if (res)
163462306a36Sopenharmony_ci			return res;
163562306a36Sopenharmony_ci		p = &ts;
163662306a36Sopenharmony_ci	}
163762306a36Sopenharmony_ci	return do_mq_timedsend(mqdes, u_msg_ptr, msg_len, msg_prio, p);
163862306a36Sopenharmony_ci}
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_ciSYSCALL_DEFINE5(mq_timedreceive_time32, mqd_t, mqdes,
164162306a36Sopenharmony_ci		char __user *, u_msg_ptr,
164262306a36Sopenharmony_ci		unsigned int, msg_len, unsigned int __user *, u_msg_prio,
164362306a36Sopenharmony_ci		const struct old_timespec32 __user *, u_abs_timeout)
164462306a36Sopenharmony_ci{
164562306a36Sopenharmony_ci	struct timespec64 ts, *p = NULL;
164662306a36Sopenharmony_ci	if (u_abs_timeout) {
164762306a36Sopenharmony_ci		int res = compat_prepare_timeout(u_abs_timeout, &ts);
164862306a36Sopenharmony_ci		if (res)
164962306a36Sopenharmony_ci			return res;
165062306a36Sopenharmony_ci		p = &ts;
165162306a36Sopenharmony_ci	}
165262306a36Sopenharmony_ci	return do_mq_timedreceive(mqdes, u_msg_ptr, msg_len, u_msg_prio, p);
165362306a36Sopenharmony_ci}
165462306a36Sopenharmony_ci#endif
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_cistatic const struct inode_operations mqueue_dir_inode_operations = {
165762306a36Sopenharmony_ci	.lookup = simple_lookup,
165862306a36Sopenharmony_ci	.create = mqueue_create,
165962306a36Sopenharmony_ci	.unlink = mqueue_unlink,
166062306a36Sopenharmony_ci};
166162306a36Sopenharmony_ci
166262306a36Sopenharmony_cistatic const struct file_operations mqueue_file_operations = {
166362306a36Sopenharmony_ci	.flush = mqueue_flush_file,
166462306a36Sopenharmony_ci	.poll = mqueue_poll_file,
166562306a36Sopenharmony_ci	.read = mqueue_read_file,
166662306a36Sopenharmony_ci	.llseek = default_llseek,
166762306a36Sopenharmony_ci};
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_cistatic const struct super_operations mqueue_super_ops = {
167062306a36Sopenharmony_ci	.alloc_inode = mqueue_alloc_inode,
167162306a36Sopenharmony_ci	.free_inode = mqueue_free_inode,
167262306a36Sopenharmony_ci	.evict_inode = mqueue_evict_inode,
167362306a36Sopenharmony_ci	.statfs = simple_statfs,
167462306a36Sopenharmony_ci};
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_cistatic const struct fs_context_operations mqueue_fs_context_ops = {
167762306a36Sopenharmony_ci	.free		= mqueue_fs_context_free,
167862306a36Sopenharmony_ci	.get_tree	= mqueue_get_tree,
167962306a36Sopenharmony_ci};
168062306a36Sopenharmony_ci
168162306a36Sopenharmony_cistatic struct file_system_type mqueue_fs_type = {
168262306a36Sopenharmony_ci	.name			= "mqueue",
168362306a36Sopenharmony_ci	.init_fs_context	= mqueue_init_fs_context,
168462306a36Sopenharmony_ci	.kill_sb		= kill_litter_super,
168562306a36Sopenharmony_ci	.fs_flags		= FS_USERNS_MOUNT,
168662306a36Sopenharmony_ci};
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ciint mq_init_ns(struct ipc_namespace *ns)
168962306a36Sopenharmony_ci{
169062306a36Sopenharmony_ci	struct vfsmount *m;
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci	ns->mq_queues_count  = 0;
169362306a36Sopenharmony_ci	ns->mq_queues_max    = DFLT_QUEUESMAX;
169462306a36Sopenharmony_ci	ns->mq_msg_max       = DFLT_MSGMAX;
169562306a36Sopenharmony_ci	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
169662306a36Sopenharmony_ci	ns->mq_msg_default   = DFLT_MSG;
169762306a36Sopenharmony_ci	ns->mq_msgsize_default  = DFLT_MSGSIZE;
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci	m = mq_create_mount(ns);
170062306a36Sopenharmony_ci	if (IS_ERR(m))
170162306a36Sopenharmony_ci		return PTR_ERR(m);
170262306a36Sopenharmony_ci	ns->mq_mnt = m;
170362306a36Sopenharmony_ci	return 0;
170462306a36Sopenharmony_ci}
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_civoid mq_clear_sbinfo(struct ipc_namespace *ns)
170762306a36Sopenharmony_ci{
170862306a36Sopenharmony_ci	ns->mq_mnt->mnt_sb->s_fs_info = NULL;
170962306a36Sopenharmony_ci}
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_cistatic int __init init_mqueue_fs(void)
171262306a36Sopenharmony_ci{
171362306a36Sopenharmony_ci	int error;
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci	mqueue_inode_cachep = kmem_cache_create("mqueue_inode_cache",
171662306a36Sopenharmony_ci				sizeof(struct mqueue_inode_info), 0,
171762306a36Sopenharmony_ci				SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT, init_once);
171862306a36Sopenharmony_ci	if (mqueue_inode_cachep == NULL)
171962306a36Sopenharmony_ci		return -ENOMEM;
172062306a36Sopenharmony_ci
172162306a36Sopenharmony_ci	if (!setup_mq_sysctls(&init_ipc_ns)) {
172262306a36Sopenharmony_ci		pr_warn("sysctl registration failed\n");
172362306a36Sopenharmony_ci		error = -ENOMEM;
172462306a36Sopenharmony_ci		goto out_kmem;
172562306a36Sopenharmony_ci	}
172662306a36Sopenharmony_ci
172762306a36Sopenharmony_ci	error = register_filesystem(&mqueue_fs_type);
172862306a36Sopenharmony_ci	if (error)
172962306a36Sopenharmony_ci		goto out_sysctl;
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci	spin_lock_init(&mq_lock);
173262306a36Sopenharmony_ci
173362306a36Sopenharmony_ci	error = mq_init_ns(&init_ipc_ns);
173462306a36Sopenharmony_ci	if (error)
173562306a36Sopenharmony_ci		goto out_filesystem;
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci	return 0;
173862306a36Sopenharmony_ci
173962306a36Sopenharmony_ciout_filesystem:
174062306a36Sopenharmony_ci	unregister_filesystem(&mqueue_fs_type);
174162306a36Sopenharmony_ciout_sysctl:
174262306a36Sopenharmony_ci	retire_mq_sysctls(&init_ipc_ns);
174362306a36Sopenharmony_ciout_kmem:
174462306a36Sopenharmony_ci	kmem_cache_destroy(mqueue_inode_cachep);
174562306a36Sopenharmony_ci	return error;
174662306a36Sopenharmony_ci}
174762306a36Sopenharmony_ci
174862306a36Sopenharmony_cidevice_initcall(init_mqueue_fs);
1749