162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * linux/ipc/namespace.c
462306a36Sopenharmony_ci * Copyright (C) 2006 Pavel Emelyanov <xemul@openvz.org> OpenVZ, SWsoft Inc.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/ipc.h>
862306a36Sopenharmony_ci#include <linux/msg.h>
962306a36Sopenharmony_ci#include <linux/ipc_namespace.h>
1062306a36Sopenharmony_ci#include <linux/rcupdate.h>
1162306a36Sopenharmony_ci#include <linux/nsproxy.h>
1262306a36Sopenharmony_ci#include <linux/slab.h>
1362306a36Sopenharmony_ci#include <linux/cred.h>
1462306a36Sopenharmony_ci#include <linux/fs.h>
1562306a36Sopenharmony_ci#include <linux/mount.h>
1662306a36Sopenharmony_ci#include <linux/user_namespace.h>
1762306a36Sopenharmony_ci#include <linux/proc_ns.h>
1862306a36Sopenharmony_ci#include <linux/sched/task.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#include "util.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci/*
2362306a36Sopenharmony_ci * The work queue is used to avoid the cost of synchronize_rcu in kern_unmount.
2462306a36Sopenharmony_ci */
2562306a36Sopenharmony_cistatic void free_ipc(struct work_struct *unused);
2662306a36Sopenharmony_cistatic DECLARE_WORK(free_ipc_work, free_ipc);
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cistatic struct ucounts *inc_ipc_namespaces(struct user_namespace *ns)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	return inc_ucount(ns, current_euid(), UCOUNT_IPC_NAMESPACES);
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic void dec_ipc_namespaces(struct ucounts *ucounts)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	dec_ucount(ucounts, UCOUNT_IPC_NAMESPACES);
3662306a36Sopenharmony_ci}
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
3962306a36Sopenharmony_ci					   struct ipc_namespace *old_ns)
4062306a36Sopenharmony_ci{
4162306a36Sopenharmony_ci	struct ipc_namespace *ns;
4262306a36Sopenharmony_ci	struct ucounts *ucounts;
4362306a36Sopenharmony_ci	int err;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	err = -ENOSPC;
4662306a36Sopenharmony_ci again:
4762306a36Sopenharmony_ci	ucounts = inc_ipc_namespaces(user_ns);
4862306a36Sopenharmony_ci	if (!ucounts) {
4962306a36Sopenharmony_ci		/*
5062306a36Sopenharmony_ci		 * IPC namespaces are freed asynchronously, by free_ipc_work.
5162306a36Sopenharmony_ci		 * If frees were pending, flush_work will wait, and
5262306a36Sopenharmony_ci		 * return true. Fail the allocation if no frees are pending.
5362306a36Sopenharmony_ci		 */
5462306a36Sopenharmony_ci		if (flush_work(&free_ipc_work))
5562306a36Sopenharmony_ci			goto again;
5662306a36Sopenharmony_ci		goto fail;
5762306a36Sopenharmony_ci	}
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci	err = -ENOMEM;
6062306a36Sopenharmony_ci	ns = kzalloc(sizeof(struct ipc_namespace), GFP_KERNEL_ACCOUNT);
6162306a36Sopenharmony_ci	if (ns == NULL)
6262306a36Sopenharmony_ci		goto fail_dec;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	err = ns_alloc_inum(&ns->ns);
6562306a36Sopenharmony_ci	if (err)
6662306a36Sopenharmony_ci		goto fail_free;
6762306a36Sopenharmony_ci	ns->ns.ops = &ipcns_operations;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	refcount_set(&ns->ns.count, 1);
7062306a36Sopenharmony_ci	ns->user_ns = get_user_ns(user_ns);
7162306a36Sopenharmony_ci	ns->ucounts = ucounts;
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	err = mq_init_ns(ns);
7462306a36Sopenharmony_ci	if (err)
7562306a36Sopenharmony_ci		goto fail_put;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	err = -ENOMEM;
7862306a36Sopenharmony_ci	if (!setup_mq_sysctls(ns))
7962306a36Sopenharmony_ci		goto fail_put;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	if (!setup_ipc_sysctls(ns))
8262306a36Sopenharmony_ci		goto fail_mq;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	err = msg_init_ns(ns);
8562306a36Sopenharmony_ci	if (err)
8662306a36Sopenharmony_ci		goto fail_put;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	sem_init_ns(ns);
8962306a36Sopenharmony_ci	shm_init_ns(ns);
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	return ns;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cifail_mq:
9462306a36Sopenharmony_ci	retire_mq_sysctls(ns);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cifail_put:
9762306a36Sopenharmony_ci	put_user_ns(ns->user_ns);
9862306a36Sopenharmony_ci	ns_free_inum(&ns->ns);
9962306a36Sopenharmony_cifail_free:
10062306a36Sopenharmony_ci	kfree(ns);
10162306a36Sopenharmony_cifail_dec:
10262306a36Sopenharmony_ci	dec_ipc_namespaces(ucounts);
10362306a36Sopenharmony_cifail:
10462306a36Sopenharmony_ci	return ERR_PTR(err);
10562306a36Sopenharmony_ci}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_cistruct ipc_namespace *copy_ipcs(unsigned long flags,
10862306a36Sopenharmony_ci	struct user_namespace *user_ns, struct ipc_namespace *ns)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci	if (!(flags & CLONE_NEWIPC))
11162306a36Sopenharmony_ci		return get_ipc_ns(ns);
11262306a36Sopenharmony_ci	return create_ipc_ns(user_ns, ns);
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci/*
11662306a36Sopenharmony_ci * free_ipcs - free all ipcs of one type
11762306a36Sopenharmony_ci * @ns:   the namespace to remove the ipcs from
11862306a36Sopenharmony_ci * @ids:  the table of ipcs to free
11962306a36Sopenharmony_ci * @free: the function called to free each individual ipc
12062306a36Sopenharmony_ci *
12162306a36Sopenharmony_ci * Called for each kind of ipc when an ipc_namespace exits.
12262306a36Sopenharmony_ci */
12362306a36Sopenharmony_civoid free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
12462306a36Sopenharmony_ci	       void (*free)(struct ipc_namespace *, struct kern_ipc_perm *))
12562306a36Sopenharmony_ci{
12662306a36Sopenharmony_ci	struct kern_ipc_perm *perm;
12762306a36Sopenharmony_ci	int next_id;
12862306a36Sopenharmony_ci	int total, in_use;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	down_write(&ids->rwsem);
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	in_use = ids->in_use;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	for (total = 0, next_id = 0; total < in_use; next_id++) {
13562306a36Sopenharmony_ci		perm = idr_find(&ids->ipcs_idr, next_id);
13662306a36Sopenharmony_ci		if (perm == NULL)
13762306a36Sopenharmony_ci			continue;
13862306a36Sopenharmony_ci		rcu_read_lock();
13962306a36Sopenharmony_ci		ipc_lock_object(perm);
14062306a36Sopenharmony_ci		free(ns, perm);
14162306a36Sopenharmony_ci		total++;
14262306a36Sopenharmony_ci	}
14362306a36Sopenharmony_ci	up_write(&ids->rwsem);
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cistatic void free_ipc_ns(struct ipc_namespace *ns)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	/*
14962306a36Sopenharmony_ci	 * Caller needs to wait for an RCU grace period to have passed
15062306a36Sopenharmony_ci	 * after making the mount point inaccessible to new accesses.
15162306a36Sopenharmony_ci	 */
15262306a36Sopenharmony_ci	mntput(ns->mq_mnt);
15362306a36Sopenharmony_ci	sem_exit_ns(ns);
15462306a36Sopenharmony_ci	msg_exit_ns(ns);
15562306a36Sopenharmony_ci	shm_exit_ns(ns);
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	retire_mq_sysctls(ns);
15862306a36Sopenharmony_ci	retire_ipc_sysctls(ns);
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	dec_ipc_namespaces(ns->ucounts);
16162306a36Sopenharmony_ci	put_user_ns(ns->user_ns);
16262306a36Sopenharmony_ci	ns_free_inum(&ns->ns);
16362306a36Sopenharmony_ci	kfree(ns);
16462306a36Sopenharmony_ci}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_cistatic LLIST_HEAD(free_ipc_list);
16762306a36Sopenharmony_cistatic void free_ipc(struct work_struct *unused)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci	struct llist_node *node = llist_del_all(&free_ipc_list);
17062306a36Sopenharmony_ci	struct ipc_namespace *n, *t;
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	llist_for_each_entry_safe(n, t, node, mnt_llist)
17362306a36Sopenharmony_ci		mnt_make_shortterm(n->mq_mnt);
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	/* Wait for any last users to have gone away. */
17662306a36Sopenharmony_ci	synchronize_rcu();
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	llist_for_each_entry_safe(n, t, node, mnt_llist)
17962306a36Sopenharmony_ci		free_ipc_ns(n);
18062306a36Sopenharmony_ci}
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci/*
18362306a36Sopenharmony_ci * put_ipc_ns - drop a reference to an ipc namespace.
18462306a36Sopenharmony_ci * @ns: the namespace to put
18562306a36Sopenharmony_ci *
18662306a36Sopenharmony_ci * If this is the last task in the namespace exiting, and
18762306a36Sopenharmony_ci * it is dropping the refcount to 0, then it can race with
18862306a36Sopenharmony_ci * a task in another ipc namespace but in a mounts namespace
18962306a36Sopenharmony_ci * which has this ipcns's mqueuefs mounted, doing some action
19062306a36Sopenharmony_ci * with one of the mqueuefs files.  That can raise the refcount.
19162306a36Sopenharmony_ci * So dropping the refcount, and raising the refcount when
19262306a36Sopenharmony_ci * accessing it through the VFS, are protected with mq_lock.
19362306a36Sopenharmony_ci *
19462306a36Sopenharmony_ci * (Clearly, a task raising the refcount on its own ipc_ns
19562306a36Sopenharmony_ci * needn't take mq_lock since it can't race with the last task
19662306a36Sopenharmony_ci * in the ipcns exiting).
19762306a36Sopenharmony_ci */
19862306a36Sopenharmony_civoid put_ipc_ns(struct ipc_namespace *ns)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	if (refcount_dec_and_lock(&ns->ns.count, &mq_lock)) {
20162306a36Sopenharmony_ci		mq_clear_sbinfo(ns);
20262306a36Sopenharmony_ci		spin_unlock(&mq_lock);
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci		if (llist_add(&ns->mnt_llist, &free_ipc_list))
20562306a36Sopenharmony_ci			schedule_work(&free_ipc_work);
20662306a36Sopenharmony_ci	}
20762306a36Sopenharmony_ci}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_cistatic inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
21062306a36Sopenharmony_ci{
21162306a36Sopenharmony_ci	return container_of(ns, struct ipc_namespace, ns);
21262306a36Sopenharmony_ci}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_cistatic struct ns_common *ipcns_get(struct task_struct *task)
21562306a36Sopenharmony_ci{
21662306a36Sopenharmony_ci	struct ipc_namespace *ns = NULL;
21762306a36Sopenharmony_ci	struct nsproxy *nsproxy;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	task_lock(task);
22062306a36Sopenharmony_ci	nsproxy = task->nsproxy;
22162306a36Sopenharmony_ci	if (nsproxy)
22262306a36Sopenharmony_ci		ns = get_ipc_ns(nsproxy->ipc_ns);
22362306a36Sopenharmony_ci	task_unlock(task);
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	return ns ? &ns->ns : NULL;
22662306a36Sopenharmony_ci}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_cistatic void ipcns_put(struct ns_common *ns)
22962306a36Sopenharmony_ci{
23062306a36Sopenharmony_ci	return put_ipc_ns(to_ipc_ns(ns));
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic int ipcns_install(struct nsset *nsset, struct ns_common *new)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct nsproxy *nsproxy = nsset->nsproxy;
23662306a36Sopenharmony_ci	struct ipc_namespace *ns = to_ipc_ns(new);
23762306a36Sopenharmony_ci	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
23862306a36Sopenharmony_ci	    !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
23962306a36Sopenharmony_ci		return -EPERM;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	put_ipc_ns(nsproxy->ipc_ns);
24262306a36Sopenharmony_ci	nsproxy->ipc_ns = get_ipc_ns(ns);
24362306a36Sopenharmony_ci	return 0;
24462306a36Sopenharmony_ci}
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_cistatic struct user_namespace *ipcns_owner(struct ns_common *ns)
24762306a36Sopenharmony_ci{
24862306a36Sopenharmony_ci	return to_ipc_ns(ns)->user_ns;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ciconst struct proc_ns_operations ipcns_operations = {
25262306a36Sopenharmony_ci	.name		= "ipc",
25362306a36Sopenharmony_ci	.type		= CLONE_NEWIPC,
25462306a36Sopenharmony_ci	.get		= ipcns_get,
25562306a36Sopenharmony_ci	.put		= ipcns_put,
25662306a36Sopenharmony_ci	.install	= ipcns_install,
25762306a36Sopenharmony_ci	.owner		= ipcns_owner,
25862306a36Sopenharmony_ci};
259