162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/fs/namespace.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * (C) Copyright Al Viro 2000, 2001
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Based on code from fs/super.c, copyright Linus Torvalds and others.
862306a36Sopenharmony_ci * Heavily rewritten.
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/syscalls.h>
1262306a36Sopenharmony_ci#include <linux/export.h>
1362306a36Sopenharmony_ci#include <linux/capability.h>
1462306a36Sopenharmony_ci#include <linux/mnt_namespace.h>
1562306a36Sopenharmony_ci#include <linux/user_namespace.h>
1662306a36Sopenharmony_ci#include <linux/namei.h>
1762306a36Sopenharmony_ci#include <linux/security.h>
1862306a36Sopenharmony_ci#include <linux/cred.h>
1962306a36Sopenharmony_ci#include <linux/idr.h>
2062306a36Sopenharmony_ci#include <linux/init.h>		/* init_rootfs */
2162306a36Sopenharmony_ci#include <linux/fs_struct.h>	/* get_fs_root et.al. */
2262306a36Sopenharmony_ci#include <linux/fsnotify.h>	/* fsnotify_vfsmount_delete */
2362306a36Sopenharmony_ci#include <linux/file.h>
2462306a36Sopenharmony_ci#include <linux/uaccess.h>
2562306a36Sopenharmony_ci#include <linux/proc_ns.h>
2662306a36Sopenharmony_ci#include <linux/magic.h>
2762306a36Sopenharmony_ci#include <linux/memblock.h>
2862306a36Sopenharmony_ci#include <linux/proc_fs.h>
2962306a36Sopenharmony_ci#include <linux/task_work.h>
3062306a36Sopenharmony_ci#include <linux/sched/task.h>
3162306a36Sopenharmony_ci#include <uapi/linux/mount.h>
3262306a36Sopenharmony_ci#include <linux/fs_context.h>
3362306a36Sopenharmony_ci#include <linux/shmem_fs.h>
3462306a36Sopenharmony_ci#include <linux/mnt_idmapping.h>
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci#include "pnode.h"
3762306a36Sopenharmony_ci#include "internal.h"
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci/* Maximum number of mounts in a mount namespace */
4062306a36Sopenharmony_cistatic unsigned int sysctl_mount_max __read_mostly = 100000;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic unsigned int m_hash_mask __read_mostly;
4362306a36Sopenharmony_cistatic unsigned int m_hash_shift __read_mostly;
4462306a36Sopenharmony_cistatic unsigned int mp_hash_mask __read_mostly;
4562306a36Sopenharmony_cistatic unsigned int mp_hash_shift __read_mostly;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic __initdata unsigned long mhash_entries;
4862306a36Sopenharmony_cistatic int __init set_mhash_entries(char *str)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	if (!str)
5162306a36Sopenharmony_ci		return 0;
5262306a36Sopenharmony_ci	mhash_entries = simple_strtoul(str, &str, 0);
5362306a36Sopenharmony_ci	return 1;
5462306a36Sopenharmony_ci}
5562306a36Sopenharmony_ci__setup("mhash_entries=", set_mhash_entries);
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cistatic __initdata unsigned long mphash_entries;
5862306a36Sopenharmony_cistatic int __init set_mphash_entries(char *str)
5962306a36Sopenharmony_ci{
6062306a36Sopenharmony_ci	if (!str)
6162306a36Sopenharmony_ci		return 0;
6262306a36Sopenharmony_ci	mphash_entries = simple_strtoul(str, &str, 0);
6362306a36Sopenharmony_ci	return 1;
6462306a36Sopenharmony_ci}
6562306a36Sopenharmony_ci__setup("mphash_entries=", set_mphash_entries);
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic u64 event;
6862306a36Sopenharmony_cistatic DEFINE_IDA(mnt_id_ida);
6962306a36Sopenharmony_cistatic DEFINE_IDA(mnt_group_ida);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_cistatic struct hlist_head *mount_hashtable __read_mostly;
7262306a36Sopenharmony_cistatic struct hlist_head *mountpoint_hashtable __read_mostly;
7362306a36Sopenharmony_cistatic struct kmem_cache *mnt_cache __read_mostly;
7462306a36Sopenharmony_cistatic DECLARE_RWSEM(namespace_sem);
7562306a36Sopenharmony_cistatic HLIST_HEAD(unmounted);	/* protected by namespace_sem */
7662306a36Sopenharmony_cistatic LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_cistruct mount_kattr {
7962306a36Sopenharmony_ci	unsigned int attr_set;
8062306a36Sopenharmony_ci	unsigned int attr_clr;
8162306a36Sopenharmony_ci	unsigned int propagation;
8262306a36Sopenharmony_ci	unsigned int lookup_flags;
8362306a36Sopenharmony_ci	bool recurse;
8462306a36Sopenharmony_ci	struct user_namespace *mnt_userns;
8562306a36Sopenharmony_ci	struct mnt_idmap *mnt_idmap;
8662306a36Sopenharmony_ci};
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci/* /sys/fs */
8962306a36Sopenharmony_cistruct kobject *fs_kobj;
9062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(fs_kobj);
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci/*
9362306a36Sopenharmony_ci * vfsmount lock may be taken for read to prevent changes to the
9462306a36Sopenharmony_ci * vfsmount hash, ie. during mountpoint lookups or walking back
9562306a36Sopenharmony_ci * up the tree.
9662306a36Sopenharmony_ci *
9762306a36Sopenharmony_ci * It should be taken for write in all cases where the vfsmount
9862306a36Sopenharmony_ci * tree or hash is modified or when a vfsmount structure is modified.
9962306a36Sopenharmony_ci */
10062306a36Sopenharmony_ci__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_cistatic inline void lock_mount_hash(void)
10362306a36Sopenharmony_ci{
10462306a36Sopenharmony_ci	write_seqlock(&mount_lock);
10562306a36Sopenharmony_ci}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_cistatic inline void unlock_mount_hash(void)
10862306a36Sopenharmony_ci{
10962306a36Sopenharmony_ci	write_sequnlock(&mount_lock);
11062306a36Sopenharmony_ci}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_cistatic inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
11562306a36Sopenharmony_ci	tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
11662306a36Sopenharmony_ci	tmp = tmp + (tmp >> m_hash_shift);
11762306a36Sopenharmony_ci	return &mount_hashtable[tmp & m_hash_mask];
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_cistatic inline struct hlist_head *mp_hash(struct dentry *dentry)
12162306a36Sopenharmony_ci{
12262306a36Sopenharmony_ci	unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
12362306a36Sopenharmony_ci	tmp = tmp + (tmp >> mp_hash_shift);
12462306a36Sopenharmony_ci	return &mountpoint_hashtable[tmp & mp_hash_mask];
12562306a36Sopenharmony_ci}
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_cistatic int mnt_alloc_id(struct mount *mnt)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	int res = ida_alloc(&mnt_id_ida, GFP_KERNEL);
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	if (res < 0)
13262306a36Sopenharmony_ci		return res;
13362306a36Sopenharmony_ci	mnt->mnt_id = res;
13462306a36Sopenharmony_ci	return 0;
13562306a36Sopenharmony_ci}
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_cistatic void mnt_free_id(struct mount *mnt)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	ida_free(&mnt_id_ida, mnt->mnt_id);
14062306a36Sopenharmony_ci}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci/*
14362306a36Sopenharmony_ci * Allocate a new peer group ID
14462306a36Sopenharmony_ci */
14562306a36Sopenharmony_cistatic int mnt_alloc_group_id(struct mount *mnt)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL);
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci	if (res < 0)
15062306a36Sopenharmony_ci		return res;
15162306a36Sopenharmony_ci	mnt->mnt_group_id = res;
15262306a36Sopenharmony_ci	return 0;
15362306a36Sopenharmony_ci}
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci/*
15662306a36Sopenharmony_ci * Release a peer group ID
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_civoid mnt_release_group_id(struct mount *mnt)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	ida_free(&mnt_group_ida, mnt->mnt_group_id);
16162306a36Sopenharmony_ci	mnt->mnt_group_id = 0;
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci/*
16562306a36Sopenharmony_ci * vfsmount lock must be held for read
16662306a36Sopenharmony_ci */
16762306a36Sopenharmony_cistatic inline void mnt_add_count(struct mount *mnt, int n)
16862306a36Sopenharmony_ci{
16962306a36Sopenharmony_ci#ifdef CONFIG_SMP
17062306a36Sopenharmony_ci	this_cpu_add(mnt->mnt_pcp->mnt_count, n);
17162306a36Sopenharmony_ci#else
17262306a36Sopenharmony_ci	preempt_disable();
17362306a36Sopenharmony_ci	mnt->mnt_count += n;
17462306a36Sopenharmony_ci	preempt_enable();
17562306a36Sopenharmony_ci#endif
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci/*
17962306a36Sopenharmony_ci * vfsmount lock must be held for write
18062306a36Sopenharmony_ci */
18162306a36Sopenharmony_ciint mnt_get_count(struct mount *mnt)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci#ifdef CONFIG_SMP
18462306a36Sopenharmony_ci	int count = 0;
18562306a36Sopenharmony_ci	int cpu;
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
18862306a36Sopenharmony_ci		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count;
18962306a36Sopenharmony_ci	}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	return count;
19262306a36Sopenharmony_ci#else
19362306a36Sopenharmony_ci	return mnt->mnt_count;
19462306a36Sopenharmony_ci#endif
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ci
19762306a36Sopenharmony_cistatic struct mount *alloc_vfsmnt(const char *name)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
20062306a36Sopenharmony_ci	if (mnt) {
20162306a36Sopenharmony_ci		int err;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci		err = mnt_alloc_id(mnt);
20462306a36Sopenharmony_ci		if (err)
20562306a36Sopenharmony_ci			goto out_free_cache;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		if (name) {
20862306a36Sopenharmony_ci			mnt->mnt_devname = kstrdup_const(name,
20962306a36Sopenharmony_ci							 GFP_KERNEL_ACCOUNT);
21062306a36Sopenharmony_ci			if (!mnt->mnt_devname)
21162306a36Sopenharmony_ci				goto out_free_id;
21262306a36Sopenharmony_ci		}
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci#ifdef CONFIG_SMP
21562306a36Sopenharmony_ci		mnt->mnt_pcp = alloc_percpu(struct mnt_pcp);
21662306a36Sopenharmony_ci		if (!mnt->mnt_pcp)
21762306a36Sopenharmony_ci			goto out_free_devname;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci		this_cpu_add(mnt->mnt_pcp->mnt_count, 1);
22062306a36Sopenharmony_ci#else
22162306a36Sopenharmony_ci		mnt->mnt_count = 1;
22262306a36Sopenharmony_ci		mnt->mnt_writers = 0;
22362306a36Sopenharmony_ci#endif
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci		INIT_HLIST_NODE(&mnt->mnt_hash);
22662306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_child);
22762306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_mounts);
22862306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_list);
22962306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_expire);
23062306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_share);
23162306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_slave_list);
23262306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_slave);
23362306a36Sopenharmony_ci		INIT_HLIST_NODE(&mnt->mnt_mp_list);
23462306a36Sopenharmony_ci		INIT_LIST_HEAD(&mnt->mnt_umounting);
23562306a36Sopenharmony_ci		INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
23662306a36Sopenharmony_ci		mnt->mnt.mnt_idmap = &nop_mnt_idmap;
23762306a36Sopenharmony_ci	}
23862306a36Sopenharmony_ci	return mnt;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci#ifdef CONFIG_SMP
24162306a36Sopenharmony_ciout_free_devname:
24262306a36Sopenharmony_ci	kfree_const(mnt->mnt_devname);
24362306a36Sopenharmony_ci#endif
24462306a36Sopenharmony_ciout_free_id:
24562306a36Sopenharmony_ci	mnt_free_id(mnt);
24662306a36Sopenharmony_ciout_free_cache:
24762306a36Sopenharmony_ci	kmem_cache_free(mnt_cache, mnt);
24862306a36Sopenharmony_ci	return NULL;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci/*
25262306a36Sopenharmony_ci * Most r/o checks on a fs are for operations that take
25362306a36Sopenharmony_ci * discrete amounts of time, like a write() or unlink().
25462306a36Sopenharmony_ci * We must keep track of when those operations start
25562306a36Sopenharmony_ci * (for permission checks) and when they end, so that
25662306a36Sopenharmony_ci * we can determine when writes are able to occur to
25762306a36Sopenharmony_ci * a filesystem.
25862306a36Sopenharmony_ci */
25962306a36Sopenharmony_ci/*
26062306a36Sopenharmony_ci * __mnt_is_readonly: check whether a mount is read-only
26162306a36Sopenharmony_ci * @mnt: the mount to check for its write status
26262306a36Sopenharmony_ci *
26362306a36Sopenharmony_ci * This shouldn't be used directly ouside of the VFS.
26462306a36Sopenharmony_ci * It does not guarantee that the filesystem will stay
26562306a36Sopenharmony_ci * r/w, just that it is right *now*.  This can not and
26662306a36Sopenharmony_ci * should not be used in place of IS_RDONLY(inode).
26762306a36Sopenharmony_ci * mnt_want/drop_write() will _keep_ the filesystem
26862306a36Sopenharmony_ci * r/w.
26962306a36Sopenharmony_ci */
27062306a36Sopenharmony_cibool __mnt_is_readonly(struct vfsmount *mnt)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb);
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__mnt_is_readonly);
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic inline void mnt_inc_writers(struct mount *mnt)
27762306a36Sopenharmony_ci{
27862306a36Sopenharmony_ci#ifdef CONFIG_SMP
27962306a36Sopenharmony_ci	this_cpu_inc(mnt->mnt_pcp->mnt_writers);
28062306a36Sopenharmony_ci#else
28162306a36Sopenharmony_ci	mnt->mnt_writers++;
28262306a36Sopenharmony_ci#endif
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_cistatic inline void mnt_dec_writers(struct mount *mnt)
28662306a36Sopenharmony_ci{
28762306a36Sopenharmony_ci#ifdef CONFIG_SMP
28862306a36Sopenharmony_ci	this_cpu_dec(mnt->mnt_pcp->mnt_writers);
28962306a36Sopenharmony_ci#else
29062306a36Sopenharmony_ci	mnt->mnt_writers--;
29162306a36Sopenharmony_ci#endif
29262306a36Sopenharmony_ci}
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_cistatic unsigned int mnt_get_writers(struct mount *mnt)
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci#ifdef CONFIG_SMP
29762306a36Sopenharmony_ci	unsigned int count = 0;
29862306a36Sopenharmony_ci	int cpu;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
30162306a36Sopenharmony_ci		count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers;
30262306a36Sopenharmony_ci	}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	return count;
30562306a36Sopenharmony_ci#else
30662306a36Sopenharmony_ci	return mnt->mnt_writers;
30762306a36Sopenharmony_ci#endif
30862306a36Sopenharmony_ci}
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_cistatic int mnt_is_readonly(struct vfsmount *mnt)
31162306a36Sopenharmony_ci{
31262306a36Sopenharmony_ci	if (READ_ONCE(mnt->mnt_sb->s_readonly_remount))
31362306a36Sopenharmony_ci		return 1;
31462306a36Sopenharmony_ci	/*
31562306a36Sopenharmony_ci	 * The barrier pairs with the barrier in sb_start_ro_state_change()
31662306a36Sopenharmony_ci	 * making sure if we don't see s_readonly_remount set yet, we also will
31762306a36Sopenharmony_ci	 * not see any superblock / mount flag changes done by remount.
31862306a36Sopenharmony_ci	 * It also pairs with the barrier in sb_end_ro_state_change()
31962306a36Sopenharmony_ci	 * assuring that if we see s_readonly_remount already cleared, we will
32062306a36Sopenharmony_ci	 * see the values of superblock / mount flags updated by remount.
32162306a36Sopenharmony_ci	 */
32262306a36Sopenharmony_ci	smp_rmb();
32362306a36Sopenharmony_ci	return __mnt_is_readonly(mnt);
32462306a36Sopenharmony_ci}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci/*
32762306a36Sopenharmony_ci * Most r/o & frozen checks on a fs are for operations that take discrete
32862306a36Sopenharmony_ci * amounts of time, like a write() or unlink().  We must keep track of when
32962306a36Sopenharmony_ci * those operations start (for permission checks) and when they end, so that we
33062306a36Sopenharmony_ci * can determine when writes are able to occur to a filesystem.
33162306a36Sopenharmony_ci */
33262306a36Sopenharmony_ci/**
33362306a36Sopenharmony_ci * __mnt_want_write - get write access to a mount without freeze protection
33462306a36Sopenharmony_ci * @m: the mount on which to take a write
33562306a36Sopenharmony_ci *
33662306a36Sopenharmony_ci * This tells the low-level filesystem that a write is about to be performed to
33762306a36Sopenharmony_ci * it, and makes sure that writes are allowed (mnt it read-write) before
33862306a36Sopenharmony_ci * returning success. This operation does not protect against filesystem being
33962306a36Sopenharmony_ci * frozen. When the write operation is finished, __mnt_drop_write() must be
34062306a36Sopenharmony_ci * called. This is effectively a refcount.
34162306a36Sopenharmony_ci */
34262306a36Sopenharmony_ciint __mnt_want_write(struct vfsmount *m)
34362306a36Sopenharmony_ci{
34462306a36Sopenharmony_ci	struct mount *mnt = real_mount(m);
34562306a36Sopenharmony_ci	int ret = 0;
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	preempt_disable();
34862306a36Sopenharmony_ci	mnt_inc_writers(mnt);
34962306a36Sopenharmony_ci	/*
35062306a36Sopenharmony_ci	 * The store to mnt_inc_writers must be visible before we pass
35162306a36Sopenharmony_ci	 * MNT_WRITE_HOLD loop below, so that the slowpath can see our
35262306a36Sopenharmony_ci	 * incremented count after it has set MNT_WRITE_HOLD.
35362306a36Sopenharmony_ci	 */
35462306a36Sopenharmony_ci	smp_mb();
35562306a36Sopenharmony_ci	might_lock(&mount_lock.lock);
35662306a36Sopenharmony_ci	while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) {
35762306a36Sopenharmony_ci		if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
35862306a36Sopenharmony_ci			cpu_relax();
35962306a36Sopenharmony_ci		} else {
36062306a36Sopenharmony_ci			/*
36162306a36Sopenharmony_ci			 * This prevents priority inversion, if the task
36262306a36Sopenharmony_ci			 * setting MNT_WRITE_HOLD got preempted on a remote
36362306a36Sopenharmony_ci			 * CPU, and it prevents life lock if the task setting
36462306a36Sopenharmony_ci			 * MNT_WRITE_HOLD has a lower priority and is bound to
36562306a36Sopenharmony_ci			 * the same CPU as the task that is spinning here.
36662306a36Sopenharmony_ci			 */
36762306a36Sopenharmony_ci			preempt_enable();
36862306a36Sopenharmony_ci			lock_mount_hash();
36962306a36Sopenharmony_ci			unlock_mount_hash();
37062306a36Sopenharmony_ci			preempt_disable();
37162306a36Sopenharmony_ci		}
37262306a36Sopenharmony_ci	}
37362306a36Sopenharmony_ci	/*
37462306a36Sopenharmony_ci	 * The barrier pairs with the barrier sb_start_ro_state_change() making
37562306a36Sopenharmony_ci	 * sure that if we see MNT_WRITE_HOLD cleared, we will also see
37662306a36Sopenharmony_ci	 * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in
37762306a36Sopenharmony_ci	 * mnt_is_readonly() and bail in case we are racing with remount
37862306a36Sopenharmony_ci	 * read-only.
37962306a36Sopenharmony_ci	 */
38062306a36Sopenharmony_ci	smp_rmb();
38162306a36Sopenharmony_ci	if (mnt_is_readonly(m)) {
38262306a36Sopenharmony_ci		mnt_dec_writers(mnt);
38362306a36Sopenharmony_ci		ret = -EROFS;
38462306a36Sopenharmony_ci	}
38562306a36Sopenharmony_ci	preempt_enable();
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	return ret;
38862306a36Sopenharmony_ci}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci/**
39162306a36Sopenharmony_ci * mnt_want_write - get write access to a mount
39262306a36Sopenharmony_ci * @m: the mount on which to take a write
39362306a36Sopenharmony_ci *
39462306a36Sopenharmony_ci * This tells the low-level filesystem that a write is about to be performed to
39562306a36Sopenharmony_ci * it, and makes sure that writes are allowed (mount is read-write, filesystem
39662306a36Sopenharmony_ci * is not frozen) before returning success.  When the write operation is
39762306a36Sopenharmony_ci * finished, mnt_drop_write() must be called.  This is effectively a refcount.
39862306a36Sopenharmony_ci */
39962306a36Sopenharmony_ciint mnt_want_write(struct vfsmount *m)
40062306a36Sopenharmony_ci{
40162306a36Sopenharmony_ci	int ret;
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	sb_start_write(m->mnt_sb);
40462306a36Sopenharmony_ci	ret = __mnt_want_write(m);
40562306a36Sopenharmony_ci	if (ret)
40662306a36Sopenharmony_ci		sb_end_write(m->mnt_sb);
40762306a36Sopenharmony_ci	return ret;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mnt_want_write);
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci/**
41262306a36Sopenharmony_ci * __mnt_want_write_file - get write access to a file's mount
41362306a36Sopenharmony_ci * @file: the file who's mount on which to take a write
41462306a36Sopenharmony_ci *
41562306a36Sopenharmony_ci * This is like __mnt_want_write, but if the file is already open for writing it
41662306a36Sopenharmony_ci * skips incrementing mnt_writers (since the open file already has a reference)
41762306a36Sopenharmony_ci * and instead only does the check for emergency r/o remounts.  This must be
41862306a36Sopenharmony_ci * paired with __mnt_drop_write_file.
41962306a36Sopenharmony_ci */
42062306a36Sopenharmony_ciint __mnt_want_write_file(struct file *file)
42162306a36Sopenharmony_ci{
42262306a36Sopenharmony_ci	if (file->f_mode & FMODE_WRITER) {
42362306a36Sopenharmony_ci		/*
42462306a36Sopenharmony_ci		 * Superblock may have become readonly while there are still
42562306a36Sopenharmony_ci		 * writable fd's, e.g. due to a fs error with errors=remount-ro
42662306a36Sopenharmony_ci		 */
42762306a36Sopenharmony_ci		if (__mnt_is_readonly(file->f_path.mnt))
42862306a36Sopenharmony_ci			return -EROFS;
42962306a36Sopenharmony_ci		return 0;
43062306a36Sopenharmony_ci	}
43162306a36Sopenharmony_ci	return __mnt_want_write(file->f_path.mnt);
43262306a36Sopenharmony_ci}
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci/**
43562306a36Sopenharmony_ci * mnt_want_write_file - get write access to a file's mount
43662306a36Sopenharmony_ci * @file: the file who's mount on which to take a write
43762306a36Sopenharmony_ci *
43862306a36Sopenharmony_ci * This is like mnt_want_write, but if the file is already open for writing it
43962306a36Sopenharmony_ci * skips incrementing mnt_writers (since the open file already has a reference)
44062306a36Sopenharmony_ci * and instead only does the freeze protection and the check for emergency r/o
44162306a36Sopenharmony_ci * remounts.  This must be paired with mnt_drop_write_file.
44262306a36Sopenharmony_ci */
44362306a36Sopenharmony_ciint mnt_want_write_file(struct file *file)
44462306a36Sopenharmony_ci{
44562306a36Sopenharmony_ci	int ret;
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	sb_start_write(file_inode(file)->i_sb);
44862306a36Sopenharmony_ci	ret = __mnt_want_write_file(file);
44962306a36Sopenharmony_ci	if (ret)
45062306a36Sopenharmony_ci		sb_end_write(file_inode(file)->i_sb);
45162306a36Sopenharmony_ci	return ret;
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mnt_want_write_file);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci/**
45662306a36Sopenharmony_ci * __mnt_drop_write - give up write access to a mount
45762306a36Sopenharmony_ci * @mnt: the mount on which to give up write access
45862306a36Sopenharmony_ci *
45962306a36Sopenharmony_ci * Tells the low-level filesystem that we are done
46062306a36Sopenharmony_ci * performing writes to it.  Must be matched with
46162306a36Sopenharmony_ci * __mnt_want_write() call above.
46262306a36Sopenharmony_ci */
46362306a36Sopenharmony_civoid __mnt_drop_write(struct vfsmount *mnt)
46462306a36Sopenharmony_ci{
46562306a36Sopenharmony_ci	preempt_disable();
46662306a36Sopenharmony_ci	mnt_dec_writers(real_mount(mnt));
46762306a36Sopenharmony_ci	preempt_enable();
46862306a36Sopenharmony_ci}
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci/**
47162306a36Sopenharmony_ci * mnt_drop_write - give up write access to a mount
47262306a36Sopenharmony_ci * @mnt: the mount on which to give up write access
47362306a36Sopenharmony_ci *
47462306a36Sopenharmony_ci * Tells the low-level filesystem that we are done performing writes to it and
47562306a36Sopenharmony_ci * also allows filesystem to be frozen again.  Must be matched with
47662306a36Sopenharmony_ci * mnt_want_write() call above.
47762306a36Sopenharmony_ci */
47862306a36Sopenharmony_civoid mnt_drop_write(struct vfsmount *mnt)
47962306a36Sopenharmony_ci{
48062306a36Sopenharmony_ci	__mnt_drop_write(mnt);
48162306a36Sopenharmony_ci	sb_end_write(mnt->mnt_sb);
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mnt_drop_write);
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_civoid __mnt_drop_write_file(struct file *file)
48662306a36Sopenharmony_ci{
48762306a36Sopenharmony_ci	if (!(file->f_mode & FMODE_WRITER))
48862306a36Sopenharmony_ci		__mnt_drop_write(file->f_path.mnt);
48962306a36Sopenharmony_ci}
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_civoid mnt_drop_write_file(struct file *file)
49262306a36Sopenharmony_ci{
49362306a36Sopenharmony_ci	__mnt_drop_write_file(file);
49462306a36Sopenharmony_ci	sb_end_write(file_inode(file)->i_sb);
49562306a36Sopenharmony_ci}
49662306a36Sopenharmony_ciEXPORT_SYMBOL(mnt_drop_write_file);
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci/**
49962306a36Sopenharmony_ci * mnt_hold_writers - prevent write access to the given mount
50062306a36Sopenharmony_ci * @mnt: mnt to prevent write access to
50162306a36Sopenharmony_ci *
50262306a36Sopenharmony_ci * Prevents write access to @mnt if there are no active writers for @mnt.
50362306a36Sopenharmony_ci * This function needs to be called and return successfully before changing
50462306a36Sopenharmony_ci * properties of @mnt that need to remain stable for callers with write access
50562306a36Sopenharmony_ci * to @mnt.
50662306a36Sopenharmony_ci *
50762306a36Sopenharmony_ci * After this functions has been called successfully callers must pair it with
50862306a36Sopenharmony_ci * a call to mnt_unhold_writers() in order to stop preventing write access to
50962306a36Sopenharmony_ci * @mnt.
51062306a36Sopenharmony_ci *
51162306a36Sopenharmony_ci * Context: This function expects lock_mount_hash() to be held serializing
51262306a36Sopenharmony_ci *          setting MNT_WRITE_HOLD.
51362306a36Sopenharmony_ci * Return: On success 0 is returned.
51462306a36Sopenharmony_ci *	   On error, -EBUSY is returned.
51562306a36Sopenharmony_ci */
51662306a36Sopenharmony_cistatic inline int mnt_hold_writers(struct mount *mnt)
51762306a36Sopenharmony_ci{
51862306a36Sopenharmony_ci	mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
51962306a36Sopenharmony_ci	/*
52062306a36Sopenharmony_ci	 * After storing MNT_WRITE_HOLD, we'll read the counters. This store
52162306a36Sopenharmony_ci	 * should be visible before we do.
52262306a36Sopenharmony_ci	 */
52362306a36Sopenharmony_ci	smp_mb();
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	/*
52662306a36Sopenharmony_ci	 * With writers on hold, if this value is zero, then there are
52762306a36Sopenharmony_ci	 * definitely no active writers (although held writers may subsequently
52862306a36Sopenharmony_ci	 * increment the count, they'll have to wait, and decrement it after
52962306a36Sopenharmony_ci	 * seeing MNT_READONLY).
53062306a36Sopenharmony_ci	 *
53162306a36Sopenharmony_ci	 * It is OK to have counter incremented on one CPU and decremented on
53262306a36Sopenharmony_ci	 * another: the sum will add up correctly. The danger would be when we
53362306a36Sopenharmony_ci	 * sum up each counter, if we read a counter before it is incremented,
53462306a36Sopenharmony_ci	 * but then read another CPU's count which it has been subsequently
53562306a36Sopenharmony_ci	 * decremented from -- we would see more decrements than we should.
53662306a36Sopenharmony_ci	 * MNT_WRITE_HOLD protects against this scenario, because
53762306a36Sopenharmony_ci	 * mnt_want_write first increments count, then smp_mb, then spins on
53862306a36Sopenharmony_ci	 * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
53962306a36Sopenharmony_ci	 * we're counting up here.
54062306a36Sopenharmony_ci	 */
54162306a36Sopenharmony_ci	if (mnt_get_writers(mnt) > 0)
54262306a36Sopenharmony_ci		return -EBUSY;
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_ci	return 0;
54562306a36Sopenharmony_ci}
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci/**
54862306a36Sopenharmony_ci * mnt_unhold_writers - stop preventing write access to the given mount
54962306a36Sopenharmony_ci * @mnt: mnt to stop preventing write access to
55062306a36Sopenharmony_ci *
55162306a36Sopenharmony_ci * Stop preventing write access to @mnt allowing callers to gain write access
55262306a36Sopenharmony_ci * to @mnt again.
55362306a36Sopenharmony_ci *
55462306a36Sopenharmony_ci * This function can only be called after a successful call to
55562306a36Sopenharmony_ci * mnt_hold_writers().
55662306a36Sopenharmony_ci *
55762306a36Sopenharmony_ci * Context: This function expects lock_mount_hash() to be held.
55862306a36Sopenharmony_ci */
55962306a36Sopenharmony_cistatic inline void mnt_unhold_writers(struct mount *mnt)
56062306a36Sopenharmony_ci{
56162306a36Sopenharmony_ci	/*
56262306a36Sopenharmony_ci	 * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
56362306a36Sopenharmony_ci	 * that become unheld will see MNT_READONLY.
56462306a36Sopenharmony_ci	 */
56562306a36Sopenharmony_ci	smp_wmb();
56662306a36Sopenharmony_ci	mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
56762306a36Sopenharmony_ci}
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_cistatic int mnt_make_readonly(struct mount *mnt)
57062306a36Sopenharmony_ci{
57162306a36Sopenharmony_ci	int ret;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci	ret = mnt_hold_writers(mnt);
57462306a36Sopenharmony_ci	if (!ret)
57562306a36Sopenharmony_ci		mnt->mnt.mnt_flags |= MNT_READONLY;
57662306a36Sopenharmony_ci	mnt_unhold_writers(mnt);
57762306a36Sopenharmony_ci	return ret;
57862306a36Sopenharmony_ci}
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ciint sb_prepare_remount_readonly(struct super_block *sb)
58162306a36Sopenharmony_ci{
58262306a36Sopenharmony_ci	struct mount *mnt;
58362306a36Sopenharmony_ci	int err = 0;
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	/* Racy optimization.  Recheck the counter under MNT_WRITE_HOLD */
58662306a36Sopenharmony_ci	if (atomic_long_read(&sb->s_remove_count))
58762306a36Sopenharmony_ci		return -EBUSY;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	lock_mount_hash();
59062306a36Sopenharmony_ci	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
59162306a36Sopenharmony_ci		if (!(mnt->mnt.mnt_flags & MNT_READONLY)) {
59262306a36Sopenharmony_ci			err = mnt_hold_writers(mnt);
59362306a36Sopenharmony_ci			if (err)
59462306a36Sopenharmony_ci				break;
59562306a36Sopenharmony_ci		}
59662306a36Sopenharmony_ci	}
59762306a36Sopenharmony_ci	if (!err && atomic_long_read(&sb->s_remove_count))
59862306a36Sopenharmony_ci		err = -EBUSY;
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	if (!err)
60162306a36Sopenharmony_ci		sb_start_ro_state_change(sb);
60262306a36Sopenharmony_ci	list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) {
60362306a36Sopenharmony_ci		if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD)
60462306a36Sopenharmony_ci			mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
60562306a36Sopenharmony_ci	}
60662306a36Sopenharmony_ci	unlock_mount_hash();
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci	return err;
60962306a36Sopenharmony_ci}
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_cistatic void free_vfsmnt(struct mount *mnt)
61262306a36Sopenharmony_ci{
61362306a36Sopenharmony_ci	mnt_idmap_put(mnt_idmap(&mnt->mnt));
61462306a36Sopenharmony_ci	kfree_const(mnt->mnt_devname);
61562306a36Sopenharmony_ci#ifdef CONFIG_SMP
61662306a36Sopenharmony_ci	free_percpu(mnt->mnt_pcp);
61762306a36Sopenharmony_ci#endif
61862306a36Sopenharmony_ci	kmem_cache_free(mnt_cache, mnt);
61962306a36Sopenharmony_ci}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_cistatic void delayed_free_vfsmnt(struct rcu_head *head)
62262306a36Sopenharmony_ci{
62362306a36Sopenharmony_ci	free_vfsmnt(container_of(head, struct mount, mnt_rcu));
62462306a36Sopenharmony_ci}
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci/* call under rcu_read_lock */
62762306a36Sopenharmony_ciint __legitimize_mnt(struct vfsmount *bastard, unsigned seq)
62862306a36Sopenharmony_ci{
62962306a36Sopenharmony_ci	struct mount *mnt;
63062306a36Sopenharmony_ci	if (read_seqretry(&mount_lock, seq))
63162306a36Sopenharmony_ci		return 1;
63262306a36Sopenharmony_ci	if (bastard == NULL)
63362306a36Sopenharmony_ci		return 0;
63462306a36Sopenharmony_ci	mnt = real_mount(bastard);
63562306a36Sopenharmony_ci	mnt_add_count(mnt, 1);
63662306a36Sopenharmony_ci	smp_mb();			// see mntput_no_expire()
63762306a36Sopenharmony_ci	if (likely(!read_seqretry(&mount_lock, seq)))
63862306a36Sopenharmony_ci		return 0;
63962306a36Sopenharmony_ci	if (bastard->mnt_flags & MNT_SYNC_UMOUNT) {
64062306a36Sopenharmony_ci		mnt_add_count(mnt, -1);
64162306a36Sopenharmony_ci		return 1;
64262306a36Sopenharmony_ci	}
64362306a36Sopenharmony_ci	lock_mount_hash();
64462306a36Sopenharmony_ci	if (unlikely(bastard->mnt_flags & MNT_DOOMED)) {
64562306a36Sopenharmony_ci		mnt_add_count(mnt, -1);
64662306a36Sopenharmony_ci		unlock_mount_hash();
64762306a36Sopenharmony_ci		return 1;
64862306a36Sopenharmony_ci	}
64962306a36Sopenharmony_ci	unlock_mount_hash();
65062306a36Sopenharmony_ci	/* caller will mntput() */
65162306a36Sopenharmony_ci	return -1;
65262306a36Sopenharmony_ci}
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci/* call under rcu_read_lock */
65562306a36Sopenharmony_cistatic bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
65662306a36Sopenharmony_ci{
65762306a36Sopenharmony_ci	int res = __legitimize_mnt(bastard, seq);
65862306a36Sopenharmony_ci	if (likely(!res))
65962306a36Sopenharmony_ci		return true;
66062306a36Sopenharmony_ci	if (unlikely(res < 0)) {
66162306a36Sopenharmony_ci		rcu_read_unlock();
66262306a36Sopenharmony_ci		mntput(bastard);
66362306a36Sopenharmony_ci		rcu_read_lock();
66462306a36Sopenharmony_ci	}
66562306a36Sopenharmony_ci	return false;
66662306a36Sopenharmony_ci}
66762306a36Sopenharmony_ci
66862306a36Sopenharmony_ci/**
66962306a36Sopenharmony_ci * __lookup_mnt - find first child mount
67062306a36Sopenharmony_ci * @mnt:	parent mount
67162306a36Sopenharmony_ci * @dentry:	mountpoint
67262306a36Sopenharmony_ci *
67362306a36Sopenharmony_ci * If @mnt has a child mount @c mounted @dentry find and return it.
67462306a36Sopenharmony_ci *
67562306a36Sopenharmony_ci * Note that the child mount @c need not be unique. There are cases
67662306a36Sopenharmony_ci * where shadow mounts are created. For example, during mount
67762306a36Sopenharmony_ci * propagation when a source mount @mnt whose root got overmounted by a
67862306a36Sopenharmony_ci * mount @o after path lookup but before @namespace_sem could be
67962306a36Sopenharmony_ci * acquired gets copied and propagated. So @mnt gets copied including
68062306a36Sopenharmony_ci * @o. When @mnt is propagated to a destination mount @d that already
68162306a36Sopenharmony_ci * has another mount @n mounted at the same mountpoint then the source
68262306a36Sopenharmony_ci * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on
68362306a36Sopenharmony_ci * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt
68462306a36Sopenharmony_ci * on @dentry.
68562306a36Sopenharmony_ci *
68662306a36Sopenharmony_ci * Return: The first child of @mnt mounted @dentry or NULL.
68762306a36Sopenharmony_ci */
68862306a36Sopenharmony_cistruct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
68962306a36Sopenharmony_ci{
69062306a36Sopenharmony_ci	struct hlist_head *head = m_hash(mnt, dentry);
69162306a36Sopenharmony_ci	struct mount *p;
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ci	hlist_for_each_entry_rcu(p, head, mnt_hash)
69462306a36Sopenharmony_ci		if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
69562306a36Sopenharmony_ci			return p;
69662306a36Sopenharmony_ci	return NULL;
69762306a36Sopenharmony_ci}
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci/*
70062306a36Sopenharmony_ci * lookup_mnt - Return the first child mount mounted at path
70162306a36Sopenharmony_ci *
70262306a36Sopenharmony_ci * "First" means first mounted chronologically.  If you create the
70362306a36Sopenharmony_ci * following mounts:
70462306a36Sopenharmony_ci *
70562306a36Sopenharmony_ci * mount /dev/sda1 /mnt
70662306a36Sopenharmony_ci * mount /dev/sda2 /mnt
70762306a36Sopenharmony_ci * mount /dev/sda3 /mnt
70862306a36Sopenharmony_ci *
70962306a36Sopenharmony_ci * Then lookup_mnt() on the base /mnt dentry in the root mount will
71062306a36Sopenharmony_ci * return successively the root dentry and vfsmount of /dev/sda1, then
71162306a36Sopenharmony_ci * /dev/sda2, then /dev/sda3, then NULL.
71262306a36Sopenharmony_ci *
71362306a36Sopenharmony_ci * lookup_mnt takes a reference to the found vfsmount.
71462306a36Sopenharmony_ci */
71562306a36Sopenharmony_cistruct vfsmount *lookup_mnt(const struct path *path)
71662306a36Sopenharmony_ci{
71762306a36Sopenharmony_ci	struct mount *child_mnt;
71862306a36Sopenharmony_ci	struct vfsmount *m;
71962306a36Sopenharmony_ci	unsigned seq;
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	rcu_read_lock();
72262306a36Sopenharmony_ci	do {
72362306a36Sopenharmony_ci		seq = read_seqbegin(&mount_lock);
72462306a36Sopenharmony_ci		child_mnt = __lookup_mnt(path->mnt, path->dentry);
72562306a36Sopenharmony_ci		m = child_mnt ? &child_mnt->mnt : NULL;
72662306a36Sopenharmony_ci	} while (!legitimize_mnt(m, seq));
72762306a36Sopenharmony_ci	rcu_read_unlock();
72862306a36Sopenharmony_ci	return m;
72962306a36Sopenharmony_ci}
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_cistatic inline void lock_ns_list(struct mnt_namespace *ns)
73262306a36Sopenharmony_ci{
73362306a36Sopenharmony_ci	spin_lock(&ns->ns_lock);
73462306a36Sopenharmony_ci}
73562306a36Sopenharmony_ci
73662306a36Sopenharmony_cistatic inline void unlock_ns_list(struct mnt_namespace *ns)
73762306a36Sopenharmony_ci{
73862306a36Sopenharmony_ci	spin_unlock(&ns->ns_lock);
73962306a36Sopenharmony_ci}
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_cistatic inline bool mnt_is_cursor(struct mount *mnt)
74262306a36Sopenharmony_ci{
74362306a36Sopenharmony_ci	return mnt->mnt.mnt_flags & MNT_CURSOR;
74462306a36Sopenharmony_ci}
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci/*
74762306a36Sopenharmony_ci * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
74862306a36Sopenharmony_ci *                         current mount namespace.
74962306a36Sopenharmony_ci *
75062306a36Sopenharmony_ci * The common case is dentries are not mountpoints at all and that
75162306a36Sopenharmony_ci * test is handled inline.  For the slow case when we are actually
75262306a36Sopenharmony_ci * dealing with a mountpoint of some kind, walk through all of the
75362306a36Sopenharmony_ci * mounts in the current mount namespace and test to see if the dentry
75462306a36Sopenharmony_ci * is a mountpoint.
75562306a36Sopenharmony_ci *
75662306a36Sopenharmony_ci * The mount_hashtable is not usable in the context because we
75762306a36Sopenharmony_ci * need to identify all mounts that may be in the current mount
75862306a36Sopenharmony_ci * namespace not just a mount that happens to have some specified
75962306a36Sopenharmony_ci * parent mount.
76062306a36Sopenharmony_ci */
76162306a36Sopenharmony_cibool __is_local_mountpoint(struct dentry *dentry)
76262306a36Sopenharmony_ci{
76362306a36Sopenharmony_ci	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
76462306a36Sopenharmony_ci	struct mount *mnt;
76562306a36Sopenharmony_ci	bool is_covered = false;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	down_read(&namespace_sem);
76862306a36Sopenharmony_ci	lock_ns_list(ns);
76962306a36Sopenharmony_ci	list_for_each_entry(mnt, &ns->list, mnt_list) {
77062306a36Sopenharmony_ci		if (mnt_is_cursor(mnt))
77162306a36Sopenharmony_ci			continue;
77262306a36Sopenharmony_ci		is_covered = (mnt->mnt_mountpoint == dentry);
77362306a36Sopenharmony_ci		if (is_covered)
77462306a36Sopenharmony_ci			break;
77562306a36Sopenharmony_ci	}
77662306a36Sopenharmony_ci	unlock_ns_list(ns);
77762306a36Sopenharmony_ci	up_read(&namespace_sem);
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci	return is_covered;
78062306a36Sopenharmony_ci}
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_cistatic struct mountpoint *lookup_mountpoint(struct dentry *dentry)
78362306a36Sopenharmony_ci{
78462306a36Sopenharmony_ci	struct hlist_head *chain = mp_hash(dentry);
78562306a36Sopenharmony_ci	struct mountpoint *mp;
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	hlist_for_each_entry(mp, chain, m_hash) {
78862306a36Sopenharmony_ci		if (mp->m_dentry == dentry) {
78962306a36Sopenharmony_ci			mp->m_count++;
79062306a36Sopenharmony_ci			return mp;
79162306a36Sopenharmony_ci		}
79262306a36Sopenharmony_ci	}
79362306a36Sopenharmony_ci	return NULL;
79462306a36Sopenharmony_ci}
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_cistatic struct mountpoint *get_mountpoint(struct dentry *dentry)
79762306a36Sopenharmony_ci{
79862306a36Sopenharmony_ci	struct mountpoint *mp, *new = NULL;
79962306a36Sopenharmony_ci	int ret;
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	if (d_mountpoint(dentry)) {
80262306a36Sopenharmony_ci		/* might be worth a WARN_ON() */
80362306a36Sopenharmony_ci		if (d_unlinked(dentry))
80462306a36Sopenharmony_ci			return ERR_PTR(-ENOENT);
80562306a36Sopenharmony_cimountpoint:
80662306a36Sopenharmony_ci		read_seqlock_excl(&mount_lock);
80762306a36Sopenharmony_ci		mp = lookup_mountpoint(dentry);
80862306a36Sopenharmony_ci		read_sequnlock_excl(&mount_lock);
80962306a36Sopenharmony_ci		if (mp)
81062306a36Sopenharmony_ci			goto done;
81162306a36Sopenharmony_ci	}
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	if (!new)
81462306a36Sopenharmony_ci		new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
81562306a36Sopenharmony_ci	if (!new)
81662306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci	/* Exactly one processes may set d_mounted */
82062306a36Sopenharmony_ci	ret = d_set_mounted(dentry);
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	/* Someone else set d_mounted? */
82362306a36Sopenharmony_ci	if (ret == -EBUSY)
82462306a36Sopenharmony_ci		goto mountpoint;
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	/* The dentry is not available as a mountpoint? */
82762306a36Sopenharmony_ci	mp = ERR_PTR(ret);
82862306a36Sopenharmony_ci	if (ret)
82962306a36Sopenharmony_ci		goto done;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	/* Add the new mountpoint to the hash table */
83262306a36Sopenharmony_ci	read_seqlock_excl(&mount_lock);
83362306a36Sopenharmony_ci	new->m_dentry = dget(dentry);
83462306a36Sopenharmony_ci	new->m_count = 1;
83562306a36Sopenharmony_ci	hlist_add_head(&new->m_hash, mp_hash(dentry));
83662306a36Sopenharmony_ci	INIT_HLIST_HEAD(&new->m_list);
83762306a36Sopenharmony_ci	read_sequnlock_excl(&mount_lock);
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	mp = new;
84062306a36Sopenharmony_ci	new = NULL;
84162306a36Sopenharmony_cidone:
84262306a36Sopenharmony_ci	kfree(new);
84362306a36Sopenharmony_ci	return mp;
84462306a36Sopenharmony_ci}
84562306a36Sopenharmony_ci
84662306a36Sopenharmony_ci/*
84762306a36Sopenharmony_ci * vfsmount lock must be held.  Additionally, the caller is responsible
84862306a36Sopenharmony_ci * for serializing calls for given disposal list.
84962306a36Sopenharmony_ci */
85062306a36Sopenharmony_cistatic void __put_mountpoint(struct mountpoint *mp, struct list_head *list)
85162306a36Sopenharmony_ci{
85262306a36Sopenharmony_ci	if (!--mp->m_count) {
85362306a36Sopenharmony_ci		struct dentry *dentry = mp->m_dentry;
85462306a36Sopenharmony_ci		BUG_ON(!hlist_empty(&mp->m_list));
85562306a36Sopenharmony_ci		spin_lock(&dentry->d_lock);
85662306a36Sopenharmony_ci		dentry->d_flags &= ~DCACHE_MOUNTED;
85762306a36Sopenharmony_ci		spin_unlock(&dentry->d_lock);
85862306a36Sopenharmony_ci		dput_to_list(dentry, list);
85962306a36Sopenharmony_ci		hlist_del(&mp->m_hash);
86062306a36Sopenharmony_ci		kfree(mp);
86162306a36Sopenharmony_ci	}
86262306a36Sopenharmony_ci}
86362306a36Sopenharmony_ci
86462306a36Sopenharmony_ci/* called with namespace_lock and vfsmount lock */
86562306a36Sopenharmony_cistatic void put_mountpoint(struct mountpoint *mp)
86662306a36Sopenharmony_ci{
86762306a36Sopenharmony_ci	__put_mountpoint(mp, &ex_mountpoints);
86862306a36Sopenharmony_ci}
86962306a36Sopenharmony_ci
87062306a36Sopenharmony_cistatic inline int check_mnt(struct mount *mnt)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	return mnt->mnt_ns == current->nsproxy->mnt_ns;
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci/*
87662306a36Sopenharmony_ci * vfsmount lock must be held for write
87762306a36Sopenharmony_ci */
87862306a36Sopenharmony_cistatic void touch_mnt_namespace(struct mnt_namespace *ns)
87962306a36Sopenharmony_ci{
88062306a36Sopenharmony_ci	if (ns) {
88162306a36Sopenharmony_ci		ns->event = ++event;
88262306a36Sopenharmony_ci		wake_up_interruptible(&ns->poll);
88362306a36Sopenharmony_ci	}
88462306a36Sopenharmony_ci}
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci/*
88762306a36Sopenharmony_ci * vfsmount lock must be held for write
88862306a36Sopenharmony_ci */
88962306a36Sopenharmony_cistatic void __touch_mnt_namespace(struct mnt_namespace *ns)
89062306a36Sopenharmony_ci{
89162306a36Sopenharmony_ci	if (ns && ns->event != event) {
89262306a36Sopenharmony_ci		ns->event = event;
89362306a36Sopenharmony_ci		wake_up_interruptible(&ns->poll);
89462306a36Sopenharmony_ci	}
89562306a36Sopenharmony_ci}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci/*
89862306a36Sopenharmony_ci * vfsmount lock must be held for write
89962306a36Sopenharmony_ci */
90062306a36Sopenharmony_cistatic struct mountpoint *unhash_mnt(struct mount *mnt)
90162306a36Sopenharmony_ci{
90262306a36Sopenharmony_ci	struct mountpoint *mp;
90362306a36Sopenharmony_ci	mnt->mnt_parent = mnt;
90462306a36Sopenharmony_ci	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
90562306a36Sopenharmony_ci	list_del_init(&mnt->mnt_child);
90662306a36Sopenharmony_ci	hlist_del_init_rcu(&mnt->mnt_hash);
90762306a36Sopenharmony_ci	hlist_del_init(&mnt->mnt_mp_list);
90862306a36Sopenharmony_ci	mp = mnt->mnt_mp;
90962306a36Sopenharmony_ci	mnt->mnt_mp = NULL;
91062306a36Sopenharmony_ci	return mp;
91162306a36Sopenharmony_ci}
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci/*
91462306a36Sopenharmony_ci * vfsmount lock must be held for write
91562306a36Sopenharmony_ci */
91662306a36Sopenharmony_cistatic void umount_mnt(struct mount *mnt)
91762306a36Sopenharmony_ci{
91862306a36Sopenharmony_ci	put_mountpoint(unhash_mnt(mnt));
91962306a36Sopenharmony_ci}
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci/*
92262306a36Sopenharmony_ci * vfsmount lock must be held for write
92362306a36Sopenharmony_ci */
92462306a36Sopenharmony_civoid mnt_set_mountpoint(struct mount *mnt,
92562306a36Sopenharmony_ci			struct mountpoint *mp,
92662306a36Sopenharmony_ci			struct mount *child_mnt)
92762306a36Sopenharmony_ci{
92862306a36Sopenharmony_ci	mp->m_count++;
92962306a36Sopenharmony_ci	mnt_add_count(mnt, 1);	/* essentially, that's mntget */
93062306a36Sopenharmony_ci	child_mnt->mnt_mountpoint = mp->m_dentry;
93162306a36Sopenharmony_ci	child_mnt->mnt_parent = mnt;
93262306a36Sopenharmony_ci	child_mnt->mnt_mp = mp;
93362306a36Sopenharmony_ci	hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
93462306a36Sopenharmony_ci}
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci/**
93762306a36Sopenharmony_ci * mnt_set_mountpoint_beneath - mount a mount beneath another one
93862306a36Sopenharmony_ci *
93962306a36Sopenharmony_ci * @new_parent: the source mount
94062306a36Sopenharmony_ci * @top_mnt:    the mount beneath which @new_parent is mounted
94162306a36Sopenharmony_ci * @new_mp:     the new mountpoint of @top_mnt on @new_parent
94262306a36Sopenharmony_ci *
94362306a36Sopenharmony_ci * Remove @top_mnt from its current mountpoint @top_mnt->mnt_mp and
94462306a36Sopenharmony_ci * parent @top_mnt->mnt_parent and mount it on top of @new_parent at
94562306a36Sopenharmony_ci * @new_mp. And mount @new_parent on the old parent and old
94662306a36Sopenharmony_ci * mountpoint of @top_mnt.
94762306a36Sopenharmony_ci *
94862306a36Sopenharmony_ci * Context: This function expects namespace_lock() and lock_mount_hash()
94962306a36Sopenharmony_ci *          to have been acquired in that order.
95062306a36Sopenharmony_ci */
95162306a36Sopenharmony_cistatic void mnt_set_mountpoint_beneath(struct mount *new_parent,
95262306a36Sopenharmony_ci				       struct mount *top_mnt,
95362306a36Sopenharmony_ci				       struct mountpoint *new_mp)
95462306a36Sopenharmony_ci{
95562306a36Sopenharmony_ci	struct mount *old_top_parent = top_mnt->mnt_parent;
95662306a36Sopenharmony_ci	struct mountpoint *old_top_mp = top_mnt->mnt_mp;
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	mnt_set_mountpoint(old_top_parent, old_top_mp, new_parent);
95962306a36Sopenharmony_ci	mnt_change_mountpoint(new_parent, new_mp, top_mnt);
96062306a36Sopenharmony_ci}
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci
96362306a36Sopenharmony_cistatic void __attach_mnt(struct mount *mnt, struct mount *parent)
96462306a36Sopenharmony_ci{
96562306a36Sopenharmony_ci	hlist_add_head_rcu(&mnt->mnt_hash,
96662306a36Sopenharmony_ci			   m_hash(&parent->mnt, mnt->mnt_mountpoint));
96762306a36Sopenharmony_ci	list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
96862306a36Sopenharmony_ci}
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci/**
97162306a36Sopenharmony_ci * attach_mnt - mount a mount, attach to @mount_hashtable and parent's
97262306a36Sopenharmony_ci *              list of child mounts
97362306a36Sopenharmony_ci * @parent:  the parent
97462306a36Sopenharmony_ci * @mnt:     the new mount
97562306a36Sopenharmony_ci * @mp:      the new mountpoint
97662306a36Sopenharmony_ci * @beneath: whether to mount @mnt beneath or on top of @parent
97762306a36Sopenharmony_ci *
97862306a36Sopenharmony_ci * If @beneath is false, mount @mnt at @mp on @parent. Then attach @mnt
97962306a36Sopenharmony_ci * to @parent's child mount list and to @mount_hashtable.
98062306a36Sopenharmony_ci *
98162306a36Sopenharmony_ci * If @beneath is true, remove @mnt from its current parent and
98262306a36Sopenharmony_ci * mountpoint and mount it on @mp on @parent, and mount @parent on the
98362306a36Sopenharmony_ci * old parent and old mountpoint of @mnt. Finally, attach @parent to
98462306a36Sopenharmony_ci * @mnt_hashtable and @parent->mnt_parent->mnt_mounts.
98562306a36Sopenharmony_ci *
98662306a36Sopenharmony_ci * Note, when __attach_mnt() is called @mnt->mnt_parent already points
98762306a36Sopenharmony_ci * to the correct parent.
98862306a36Sopenharmony_ci *
98962306a36Sopenharmony_ci * Context: This function expects namespace_lock() and lock_mount_hash()
99062306a36Sopenharmony_ci *          to have been acquired in that order.
99162306a36Sopenharmony_ci */
99262306a36Sopenharmony_cistatic void attach_mnt(struct mount *mnt, struct mount *parent,
99362306a36Sopenharmony_ci		       struct mountpoint *mp, bool beneath)
99462306a36Sopenharmony_ci{
99562306a36Sopenharmony_ci	if (beneath)
99662306a36Sopenharmony_ci		mnt_set_mountpoint_beneath(mnt, parent, mp);
99762306a36Sopenharmony_ci	else
99862306a36Sopenharmony_ci		mnt_set_mountpoint(parent, mp, mnt);
99962306a36Sopenharmony_ci	/*
100062306a36Sopenharmony_ci	 * Note, @mnt->mnt_parent has to be used. If @mnt was mounted
100162306a36Sopenharmony_ci	 * beneath @parent then @mnt will need to be attached to
100262306a36Sopenharmony_ci	 * @parent's old parent, not @parent. IOW, @mnt->mnt_parent
100362306a36Sopenharmony_ci	 * isn't the same mount as @parent.
100462306a36Sopenharmony_ci	 */
100562306a36Sopenharmony_ci	__attach_mnt(mnt, mnt->mnt_parent);
100662306a36Sopenharmony_ci}
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_civoid mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
100962306a36Sopenharmony_ci{
101062306a36Sopenharmony_ci	struct mountpoint *old_mp = mnt->mnt_mp;
101162306a36Sopenharmony_ci	struct mount *old_parent = mnt->mnt_parent;
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	list_del_init(&mnt->mnt_child);
101462306a36Sopenharmony_ci	hlist_del_init(&mnt->mnt_mp_list);
101562306a36Sopenharmony_ci	hlist_del_init_rcu(&mnt->mnt_hash);
101662306a36Sopenharmony_ci
101762306a36Sopenharmony_ci	attach_mnt(mnt, parent, mp, false);
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	put_mountpoint(old_mp);
102062306a36Sopenharmony_ci	mnt_add_count(old_parent, -1);
102162306a36Sopenharmony_ci}
102262306a36Sopenharmony_ci
102362306a36Sopenharmony_ci/*
102462306a36Sopenharmony_ci * vfsmount lock must be held for write
102562306a36Sopenharmony_ci */
102662306a36Sopenharmony_cistatic void commit_tree(struct mount *mnt)
102762306a36Sopenharmony_ci{
102862306a36Sopenharmony_ci	struct mount *parent = mnt->mnt_parent;
102962306a36Sopenharmony_ci	struct mount *m;
103062306a36Sopenharmony_ci	LIST_HEAD(head);
103162306a36Sopenharmony_ci	struct mnt_namespace *n = parent->mnt_ns;
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ci	BUG_ON(parent == mnt);
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci	list_add_tail(&head, &mnt->mnt_list);
103662306a36Sopenharmony_ci	list_for_each_entry(m, &head, mnt_list)
103762306a36Sopenharmony_ci		m->mnt_ns = n;
103862306a36Sopenharmony_ci
103962306a36Sopenharmony_ci	list_splice(&head, n->list.prev);
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci	n->mounts += n->pending_mounts;
104262306a36Sopenharmony_ci	n->pending_mounts = 0;
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci	__attach_mnt(mnt, parent);
104562306a36Sopenharmony_ci	touch_mnt_namespace(n);
104662306a36Sopenharmony_ci}
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_cistatic struct mount *next_mnt(struct mount *p, struct mount *root)
104962306a36Sopenharmony_ci{
105062306a36Sopenharmony_ci	struct list_head *next = p->mnt_mounts.next;
105162306a36Sopenharmony_ci	if (next == &p->mnt_mounts) {
105262306a36Sopenharmony_ci		while (1) {
105362306a36Sopenharmony_ci			if (p == root)
105462306a36Sopenharmony_ci				return NULL;
105562306a36Sopenharmony_ci			next = p->mnt_child.next;
105662306a36Sopenharmony_ci			if (next != &p->mnt_parent->mnt_mounts)
105762306a36Sopenharmony_ci				break;
105862306a36Sopenharmony_ci			p = p->mnt_parent;
105962306a36Sopenharmony_ci		}
106062306a36Sopenharmony_ci	}
106162306a36Sopenharmony_ci	return list_entry(next, struct mount, mnt_child);
106262306a36Sopenharmony_ci}
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_cistatic struct mount *skip_mnt_tree(struct mount *p)
106562306a36Sopenharmony_ci{
106662306a36Sopenharmony_ci	struct list_head *prev = p->mnt_mounts.prev;
106762306a36Sopenharmony_ci	while (prev != &p->mnt_mounts) {
106862306a36Sopenharmony_ci		p = list_entry(prev, struct mount, mnt_child);
106962306a36Sopenharmony_ci		prev = p->mnt_mounts.prev;
107062306a36Sopenharmony_ci	}
107162306a36Sopenharmony_ci	return p;
107262306a36Sopenharmony_ci}
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci/**
107562306a36Sopenharmony_ci * vfs_create_mount - Create a mount for a configured superblock
107662306a36Sopenharmony_ci * @fc: The configuration context with the superblock attached
107762306a36Sopenharmony_ci *
107862306a36Sopenharmony_ci * Create a mount to an already configured superblock.  If necessary, the
107962306a36Sopenharmony_ci * caller should invoke vfs_get_tree() before calling this.
108062306a36Sopenharmony_ci *
108162306a36Sopenharmony_ci * Note that this does not attach the mount to anything.
108262306a36Sopenharmony_ci */
108362306a36Sopenharmony_cistruct vfsmount *vfs_create_mount(struct fs_context *fc)
108462306a36Sopenharmony_ci{
108562306a36Sopenharmony_ci	struct mount *mnt;
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_ci	if (!fc->root)
108862306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci	mnt = alloc_vfsmnt(fc->source ?: "none");
109162306a36Sopenharmony_ci	if (!mnt)
109262306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	if (fc->sb_flags & SB_KERNMOUNT)
109562306a36Sopenharmony_ci		mnt->mnt.mnt_flags = MNT_INTERNAL;
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci	atomic_inc(&fc->root->d_sb->s_active);
109862306a36Sopenharmony_ci	mnt->mnt.mnt_sb		= fc->root->d_sb;
109962306a36Sopenharmony_ci	mnt->mnt.mnt_root	= dget(fc->root);
110062306a36Sopenharmony_ci	mnt->mnt_mountpoint	= mnt->mnt.mnt_root;
110162306a36Sopenharmony_ci	mnt->mnt_parent		= mnt;
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	lock_mount_hash();
110462306a36Sopenharmony_ci	list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
110562306a36Sopenharmony_ci	unlock_mount_hash();
110662306a36Sopenharmony_ci	return &mnt->mnt;
110762306a36Sopenharmony_ci}
110862306a36Sopenharmony_ciEXPORT_SYMBOL(vfs_create_mount);
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_cistruct vfsmount *fc_mount(struct fs_context *fc)
111162306a36Sopenharmony_ci{
111262306a36Sopenharmony_ci	int err = vfs_get_tree(fc);
111362306a36Sopenharmony_ci	if (!err) {
111462306a36Sopenharmony_ci		up_write(&fc->root->d_sb->s_umount);
111562306a36Sopenharmony_ci		return vfs_create_mount(fc);
111662306a36Sopenharmony_ci	}
111762306a36Sopenharmony_ci	return ERR_PTR(err);
111862306a36Sopenharmony_ci}
111962306a36Sopenharmony_ciEXPORT_SYMBOL(fc_mount);
112062306a36Sopenharmony_ci
112162306a36Sopenharmony_cistruct vfsmount *vfs_kern_mount(struct file_system_type *type,
112262306a36Sopenharmony_ci				int flags, const char *name,
112362306a36Sopenharmony_ci				void *data)
112462306a36Sopenharmony_ci{
112562306a36Sopenharmony_ci	struct fs_context *fc;
112662306a36Sopenharmony_ci	struct vfsmount *mnt;
112762306a36Sopenharmony_ci	int ret = 0;
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_ci	if (!type)
113062306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	fc = fs_context_for_mount(type, flags);
113362306a36Sopenharmony_ci	if (IS_ERR(fc))
113462306a36Sopenharmony_ci		return ERR_CAST(fc);
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_ci	if (name)
113762306a36Sopenharmony_ci		ret = vfs_parse_fs_string(fc, "source",
113862306a36Sopenharmony_ci					  name, strlen(name));
113962306a36Sopenharmony_ci	if (!ret)
114062306a36Sopenharmony_ci		ret = parse_monolithic_mount_data(fc, data);
114162306a36Sopenharmony_ci	if (!ret)
114262306a36Sopenharmony_ci		mnt = fc_mount(fc);
114362306a36Sopenharmony_ci	else
114462306a36Sopenharmony_ci		mnt = ERR_PTR(ret);
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_ci	put_fs_context(fc);
114762306a36Sopenharmony_ci	return mnt;
114862306a36Sopenharmony_ci}
114962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfs_kern_mount);
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_cistruct vfsmount *
115262306a36Sopenharmony_civfs_submount(const struct dentry *mountpoint, struct file_system_type *type,
115362306a36Sopenharmony_ci	     const char *name, void *data)
115462306a36Sopenharmony_ci{
115562306a36Sopenharmony_ci	/* Until it is worked out how to pass the user namespace
115662306a36Sopenharmony_ci	 * through from the parent mount to the submount don't support
115762306a36Sopenharmony_ci	 * unprivileged mounts with submounts.
115862306a36Sopenharmony_ci	 */
115962306a36Sopenharmony_ci	if (mountpoint->d_sb->s_user_ns != &init_user_ns)
116062306a36Sopenharmony_ci		return ERR_PTR(-EPERM);
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci	return vfs_kern_mount(type, SB_SUBMOUNT, name, data);
116362306a36Sopenharmony_ci}
116462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfs_submount);
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_cistatic struct mount *clone_mnt(struct mount *old, struct dentry *root,
116762306a36Sopenharmony_ci					int flag)
116862306a36Sopenharmony_ci{
116962306a36Sopenharmony_ci	struct super_block *sb = old->mnt.mnt_sb;
117062306a36Sopenharmony_ci	struct mount *mnt;
117162306a36Sopenharmony_ci	int err;
117262306a36Sopenharmony_ci
117362306a36Sopenharmony_ci	mnt = alloc_vfsmnt(old->mnt_devname);
117462306a36Sopenharmony_ci	if (!mnt)
117562306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci	if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE))
117862306a36Sopenharmony_ci		mnt->mnt_group_id = 0; /* not a peer of original */
117962306a36Sopenharmony_ci	else
118062306a36Sopenharmony_ci		mnt->mnt_group_id = old->mnt_group_id;
118162306a36Sopenharmony_ci
118262306a36Sopenharmony_ci	if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) {
118362306a36Sopenharmony_ci		err = mnt_alloc_group_id(mnt);
118462306a36Sopenharmony_ci		if (err)
118562306a36Sopenharmony_ci			goto out_free;
118662306a36Sopenharmony_ci	}
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci	mnt->mnt.mnt_flags = old->mnt.mnt_flags;
118962306a36Sopenharmony_ci	mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci	atomic_inc(&sb->s_active);
119262306a36Sopenharmony_ci	mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt));
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	mnt->mnt.mnt_sb = sb;
119562306a36Sopenharmony_ci	mnt->mnt.mnt_root = dget(root);
119662306a36Sopenharmony_ci	mnt->mnt_mountpoint = mnt->mnt.mnt_root;
119762306a36Sopenharmony_ci	mnt->mnt_parent = mnt;
119862306a36Sopenharmony_ci	lock_mount_hash();
119962306a36Sopenharmony_ci	list_add_tail(&mnt->mnt_instance, &sb->s_mounts);
120062306a36Sopenharmony_ci	unlock_mount_hash();
120162306a36Sopenharmony_ci
120262306a36Sopenharmony_ci	if ((flag & CL_SLAVE) ||
120362306a36Sopenharmony_ci	    ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) {
120462306a36Sopenharmony_ci		list_add(&mnt->mnt_slave, &old->mnt_slave_list);
120562306a36Sopenharmony_ci		mnt->mnt_master = old;
120662306a36Sopenharmony_ci		CLEAR_MNT_SHARED(mnt);
120762306a36Sopenharmony_ci	} else if (!(flag & CL_PRIVATE)) {
120862306a36Sopenharmony_ci		if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old))
120962306a36Sopenharmony_ci			list_add(&mnt->mnt_share, &old->mnt_share);
121062306a36Sopenharmony_ci		if (IS_MNT_SLAVE(old))
121162306a36Sopenharmony_ci			list_add(&mnt->mnt_slave, &old->mnt_slave);
121262306a36Sopenharmony_ci		mnt->mnt_master = old->mnt_master;
121362306a36Sopenharmony_ci	} else {
121462306a36Sopenharmony_ci		CLEAR_MNT_SHARED(mnt);
121562306a36Sopenharmony_ci	}
121662306a36Sopenharmony_ci	if (flag & CL_MAKE_SHARED)
121762306a36Sopenharmony_ci		set_mnt_shared(mnt);
121862306a36Sopenharmony_ci
121962306a36Sopenharmony_ci	/* stick the duplicate mount on the same expiry list
122062306a36Sopenharmony_ci	 * as the original if that was on one */
122162306a36Sopenharmony_ci	if (flag & CL_EXPIRE) {
122262306a36Sopenharmony_ci		if (!list_empty(&old->mnt_expire))
122362306a36Sopenharmony_ci			list_add(&mnt->mnt_expire, &old->mnt_expire);
122462306a36Sopenharmony_ci	}
122562306a36Sopenharmony_ci
122662306a36Sopenharmony_ci	return mnt;
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci out_free:
122962306a36Sopenharmony_ci	mnt_free_id(mnt);
123062306a36Sopenharmony_ci	free_vfsmnt(mnt);
123162306a36Sopenharmony_ci	return ERR_PTR(err);
123262306a36Sopenharmony_ci}
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_cistatic void cleanup_mnt(struct mount *mnt)
123562306a36Sopenharmony_ci{
123662306a36Sopenharmony_ci	struct hlist_node *p;
123762306a36Sopenharmony_ci	struct mount *m;
123862306a36Sopenharmony_ci	/*
123962306a36Sopenharmony_ci	 * The warning here probably indicates that somebody messed
124062306a36Sopenharmony_ci	 * up a mnt_want/drop_write() pair.  If this happens, the
124162306a36Sopenharmony_ci	 * filesystem was probably unable to make r/w->r/o transitions.
124262306a36Sopenharmony_ci	 * The locking used to deal with mnt_count decrement provides barriers,
124362306a36Sopenharmony_ci	 * so mnt_get_writers() below is safe.
124462306a36Sopenharmony_ci	 */
124562306a36Sopenharmony_ci	WARN_ON(mnt_get_writers(mnt));
124662306a36Sopenharmony_ci	if (unlikely(mnt->mnt_pins.first))
124762306a36Sopenharmony_ci		mnt_pin_kill(mnt);
124862306a36Sopenharmony_ci	hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) {
124962306a36Sopenharmony_ci		hlist_del(&m->mnt_umount);
125062306a36Sopenharmony_ci		mntput(&m->mnt);
125162306a36Sopenharmony_ci	}
125262306a36Sopenharmony_ci	fsnotify_vfsmount_delete(&mnt->mnt);
125362306a36Sopenharmony_ci	dput(mnt->mnt.mnt_root);
125462306a36Sopenharmony_ci	deactivate_super(mnt->mnt.mnt_sb);
125562306a36Sopenharmony_ci	mnt_free_id(mnt);
125662306a36Sopenharmony_ci	call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
125762306a36Sopenharmony_ci}
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_cistatic void __cleanup_mnt(struct rcu_head *head)
126062306a36Sopenharmony_ci{
126162306a36Sopenharmony_ci	cleanup_mnt(container_of(head, struct mount, mnt_rcu));
126262306a36Sopenharmony_ci}
126362306a36Sopenharmony_ci
126462306a36Sopenharmony_cistatic LLIST_HEAD(delayed_mntput_list);
126562306a36Sopenharmony_cistatic void delayed_mntput(struct work_struct *unused)
126662306a36Sopenharmony_ci{
126762306a36Sopenharmony_ci	struct llist_node *node = llist_del_all(&delayed_mntput_list);
126862306a36Sopenharmony_ci	struct mount *m, *t;
126962306a36Sopenharmony_ci
127062306a36Sopenharmony_ci	llist_for_each_entry_safe(m, t, node, mnt_llist)
127162306a36Sopenharmony_ci		cleanup_mnt(m);
127262306a36Sopenharmony_ci}
127362306a36Sopenharmony_cistatic DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_cistatic void mntput_no_expire(struct mount *mnt)
127662306a36Sopenharmony_ci{
127762306a36Sopenharmony_ci	LIST_HEAD(list);
127862306a36Sopenharmony_ci	int count;
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	rcu_read_lock();
128162306a36Sopenharmony_ci	if (likely(READ_ONCE(mnt->mnt_ns))) {
128262306a36Sopenharmony_ci		/*
128362306a36Sopenharmony_ci		 * Since we don't do lock_mount_hash() here,
128462306a36Sopenharmony_ci		 * ->mnt_ns can change under us.  However, if it's
128562306a36Sopenharmony_ci		 * non-NULL, then there's a reference that won't
128662306a36Sopenharmony_ci		 * be dropped until after an RCU delay done after
128762306a36Sopenharmony_ci		 * turning ->mnt_ns NULL.  So if we observe it
128862306a36Sopenharmony_ci		 * non-NULL under rcu_read_lock(), the reference
128962306a36Sopenharmony_ci		 * we are dropping is not the final one.
129062306a36Sopenharmony_ci		 */
129162306a36Sopenharmony_ci		mnt_add_count(mnt, -1);
129262306a36Sopenharmony_ci		rcu_read_unlock();
129362306a36Sopenharmony_ci		return;
129462306a36Sopenharmony_ci	}
129562306a36Sopenharmony_ci	lock_mount_hash();
129662306a36Sopenharmony_ci	/*
129762306a36Sopenharmony_ci	 * make sure that if __legitimize_mnt() has not seen us grab
129862306a36Sopenharmony_ci	 * mount_lock, we'll see their refcount increment here.
129962306a36Sopenharmony_ci	 */
130062306a36Sopenharmony_ci	smp_mb();
130162306a36Sopenharmony_ci	mnt_add_count(mnt, -1);
130262306a36Sopenharmony_ci	count = mnt_get_count(mnt);
130362306a36Sopenharmony_ci	if (count != 0) {
130462306a36Sopenharmony_ci		WARN_ON(count < 0);
130562306a36Sopenharmony_ci		rcu_read_unlock();
130662306a36Sopenharmony_ci		unlock_mount_hash();
130762306a36Sopenharmony_ci		return;
130862306a36Sopenharmony_ci	}
130962306a36Sopenharmony_ci	if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) {
131062306a36Sopenharmony_ci		rcu_read_unlock();
131162306a36Sopenharmony_ci		unlock_mount_hash();
131262306a36Sopenharmony_ci		return;
131362306a36Sopenharmony_ci	}
131462306a36Sopenharmony_ci	mnt->mnt.mnt_flags |= MNT_DOOMED;
131562306a36Sopenharmony_ci	rcu_read_unlock();
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	list_del(&mnt->mnt_instance);
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	if (unlikely(!list_empty(&mnt->mnt_mounts))) {
132062306a36Sopenharmony_ci		struct mount *p, *tmp;
132162306a36Sopenharmony_ci		list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts,  mnt_child) {
132262306a36Sopenharmony_ci			__put_mountpoint(unhash_mnt(p), &list);
132362306a36Sopenharmony_ci			hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children);
132462306a36Sopenharmony_ci		}
132562306a36Sopenharmony_ci	}
132662306a36Sopenharmony_ci	unlock_mount_hash();
132762306a36Sopenharmony_ci	shrink_dentry_list(&list);
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci	if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
133062306a36Sopenharmony_ci		struct task_struct *task = current;
133162306a36Sopenharmony_ci		if (likely(!(task->flags & PF_KTHREAD))) {
133262306a36Sopenharmony_ci			init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
133362306a36Sopenharmony_ci			if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME))
133462306a36Sopenharmony_ci				return;
133562306a36Sopenharmony_ci		}
133662306a36Sopenharmony_ci		if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
133762306a36Sopenharmony_ci			schedule_delayed_work(&delayed_mntput_work, 1);
133862306a36Sopenharmony_ci		return;
133962306a36Sopenharmony_ci	}
134062306a36Sopenharmony_ci	cleanup_mnt(mnt);
134162306a36Sopenharmony_ci}
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_civoid mntput(struct vfsmount *mnt)
134462306a36Sopenharmony_ci{
134562306a36Sopenharmony_ci	if (mnt) {
134662306a36Sopenharmony_ci		struct mount *m = real_mount(mnt);
134762306a36Sopenharmony_ci		/* avoid cacheline pingpong, hope gcc doesn't get "smart" */
134862306a36Sopenharmony_ci		if (unlikely(m->mnt_expiry_mark))
134962306a36Sopenharmony_ci			m->mnt_expiry_mark = 0;
135062306a36Sopenharmony_ci		mntput_no_expire(m);
135162306a36Sopenharmony_ci	}
135262306a36Sopenharmony_ci}
135362306a36Sopenharmony_ciEXPORT_SYMBOL(mntput);
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_cistruct vfsmount *mntget(struct vfsmount *mnt)
135662306a36Sopenharmony_ci{
135762306a36Sopenharmony_ci	if (mnt)
135862306a36Sopenharmony_ci		mnt_add_count(real_mount(mnt), 1);
135962306a36Sopenharmony_ci	return mnt;
136062306a36Sopenharmony_ci}
136162306a36Sopenharmony_ciEXPORT_SYMBOL(mntget);
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_ci/*
136462306a36Sopenharmony_ci * Make a mount point inaccessible to new lookups.
136562306a36Sopenharmony_ci * Because there may still be current users, the caller MUST WAIT
136662306a36Sopenharmony_ci * for an RCU grace period before destroying the mount point.
136762306a36Sopenharmony_ci */
136862306a36Sopenharmony_civoid mnt_make_shortterm(struct vfsmount *mnt)
136962306a36Sopenharmony_ci{
137062306a36Sopenharmony_ci	if (mnt)
137162306a36Sopenharmony_ci		real_mount(mnt)->mnt_ns = NULL;
137262306a36Sopenharmony_ci}
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_ci/**
137562306a36Sopenharmony_ci * path_is_mountpoint() - Check if path is a mount in the current namespace.
137662306a36Sopenharmony_ci * @path: path to check
137762306a36Sopenharmony_ci *
137862306a36Sopenharmony_ci *  d_mountpoint() can only be used reliably to establish if a dentry is
137962306a36Sopenharmony_ci *  not mounted in any namespace and that common case is handled inline.
138062306a36Sopenharmony_ci *  d_mountpoint() isn't aware of the possibility there may be multiple
138162306a36Sopenharmony_ci *  mounts using a given dentry in a different namespace. This function
138262306a36Sopenharmony_ci *  checks if the passed in path is a mountpoint rather than the dentry
138362306a36Sopenharmony_ci *  alone.
138462306a36Sopenharmony_ci */
138562306a36Sopenharmony_cibool path_is_mountpoint(const struct path *path)
138662306a36Sopenharmony_ci{
138762306a36Sopenharmony_ci	unsigned seq;
138862306a36Sopenharmony_ci	bool res;
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	if (!d_mountpoint(path->dentry))
139162306a36Sopenharmony_ci		return false;
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_ci	rcu_read_lock();
139462306a36Sopenharmony_ci	do {
139562306a36Sopenharmony_ci		seq = read_seqbegin(&mount_lock);
139662306a36Sopenharmony_ci		res = __path_is_mountpoint(path);
139762306a36Sopenharmony_ci	} while (read_seqretry(&mount_lock, seq));
139862306a36Sopenharmony_ci	rcu_read_unlock();
139962306a36Sopenharmony_ci
140062306a36Sopenharmony_ci	return res;
140162306a36Sopenharmony_ci}
140262306a36Sopenharmony_ciEXPORT_SYMBOL(path_is_mountpoint);
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_cistruct vfsmount *mnt_clone_internal(const struct path *path)
140562306a36Sopenharmony_ci{
140662306a36Sopenharmony_ci	struct mount *p;
140762306a36Sopenharmony_ci	p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE);
140862306a36Sopenharmony_ci	if (IS_ERR(p))
140962306a36Sopenharmony_ci		return ERR_CAST(p);
141062306a36Sopenharmony_ci	p->mnt.mnt_flags |= MNT_INTERNAL;
141162306a36Sopenharmony_ci	return &p->mnt;
141262306a36Sopenharmony_ci}
141362306a36Sopenharmony_ci
141462306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS
141562306a36Sopenharmony_cistatic struct mount *mnt_list_next(struct mnt_namespace *ns,
141662306a36Sopenharmony_ci				   struct list_head *p)
141762306a36Sopenharmony_ci{
141862306a36Sopenharmony_ci	struct mount *mnt, *ret = NULL;
141962306a36Sopenharmony_ci
142062306a36Sopenharmony_ci	lock_ns_list(ns);
142162306a36Sopenharmony_ci	list_for_each_continue(p, &ns->list) {
142262306a36Sopenharmony_ci		mnt = list_entry(p, typeof(*mnt), mnt_list);
142362306a36Sopenharmony_ci		if (!mnt_is_cursor(mnt)) {
142462306a36Sopenharmony_ci			ret = mnt;
142562306a36Sopenharmony_ci			break;
142662306a36Sopenharmony_ci		}
142762306a36Sopenharmony_ci	}
142862306a36Sopenharmony_ci	unlock_ns_list(ns);
142962306a36Sopenharmony_ci
143062306a36Sopenharmony_ci	return ret;
143162306a36Sopenharmony_ci}
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_ci/* iterator; we want it to have access to namespace_sem, thus here... */
143462306a36Sopenharmony_cistatic void *m_start(struct seq_file *m, loff_t *pos)
143562306a36Sopenharmony_ci{
143662306a36Sopenharmony_ci	struct proc_mounts *p = m->private;
143762306a36Sopenharmony_ci	struct list_head *prev;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	down_read(&namespace_sem);
144062306a36Sopenharmony_ci	if (!*pos) {
144162306a36Sopenharmony_ci		prev = &p->ns->list;
144262306a36Sopenharmony_ci	} else {
144362306a36Sopenharmony_ci		prev = &p->cursor.mnt_list;
144462306a36Sopenharmony_ci
144562306a36Sopenharmony_ci		/* Read after we'd reached the end? */
144662306a36Sopenharmony_ci		if (list_empty(prev))
144762306a36Sopenharmony_ci			return NULL;
144862306a36Sopenharmony_ci	}
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci	return mnt_list_next(p->ns, prev);
145162306a36Sopenharmony_ci}
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_cistatic void *m_next(struct seq_file *m, void *v, loff_t *pos)
145462306a36Sopenharmony_ci{
145562306a36Sopenharmony_ci	struct proc_mounts *p = m->private;
145662306a36Sopenharmony_ci	struct mount *mnt = v;
145762306a36Sopenharmony_ci
145862306a36Sopenharmony_ci	++*pos;
145962306a36Sopenharmony_ci	return mnt_list_next(p->ns, &mnt->mnt_list);
146062306a36Sopenharmony_ci}
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_cistatic void m_stop(struct seq_file *m, void *v)
146362306a36Sopenharmony_ci{
146462306a36Sopenharmony_ci	struct proc_mounts *p = m->private;
146562306a36Sopenharmony_ci	struct mount *mnt = v;
146662306a36Sopenharmony_ci
146762306a36Sopenharmony_ci	lock_ns_list(p->ns);
146862306a36Sopenharmony_ci	if (mnt)
146962306a36Sopenharmony_ci		list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list);
147062306a36Sopenharmony_ci	else
147162306a36Sopenharmony_ci		list_del_init(&p->cursor.mnt_list);
147262306a36Sopenharmony_ci	unlock_ns_list(p->ns);
147362306a36Sopenharmony_ci	up_read(&namespace_sem);
147462306a36Sopenharmony_ci}
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_cistatic int m_show(struct seq_file *m, void *v)
147762306a36Sopenharmony_ci{
147862306a36Sopenharmony_ci	struct proc_mounts *p = m->private;
147962306a36Sopenharmony_ci	struct mount *r = v;
148062306a36Sopenharmony_ci	return p->show(m, &r->mnt);
148162306a36Sopenharmony_ci}
148262306a36Sopenharmony_ci
148362306a36Sopenharmony_ciconst struct seq_operations mounts_op = {
148462306a36Sopenharmony_ci	.start	= m_start,
148562306a36Sopenharmony_ci	.next	= m_next,
148662306a36Sopenharmony_ci	.stop	= m_stop,
148762306a36Sopenharmony_ci	.show	= m_show,
148862306a36Sopenharmony_ci};
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_civoid mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor)
149162306a36Sopenharmony_ci{
149262306a36Sopenharmony_ci	down_read(&namespace_sem);
149362306a36Sopenharmony_ci	lock_ns_list(ns);
149462306a36Sopenharmony_ci	list_del(&cursor->mnt_list);
149562306a36Sopenharmony_ci	unlock_ns_list(ns);
149662306a36Sopenharmony_ci	up_read(&namespace_sem);
149762306a36Sopenharmony_ci}
149862306a36Sopenharmony_ci#endif  /* CONFIG_PROC_FS */
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci/**
150162306a36Sopenharmony_ci * may_umount_tree - check if a mount tree is busy
150262306a36Sopenharmony_ci * @m: root of mount tree
150362306a36Sopenharmony_ci *
150462306a36Sopenharmony_ci * This is called to check if a tree of mounts has any
150562306a36Sopenharmony_ci * open files, pwds, chroots or sub mounts that are
150662306a36Sopenharmony_ci * busy.
150762306a36Sopenharmony_ci */
150862306a36Sopenharmony_ciint may_umount_tree(struct vfsmount *m)
150962306a36Sopenharmony_ci{
151062306a36Sopenharmony_ci	struct mount *mnt = real_mount(m);
151162306a36Sopenharmony_ci	int actual_refs = 0;
151262306a36Sopenharmony_ci	int minimum_refs = 0;
151362306a36Sopenharmony_ci	struct mount *p;
151462306a36Sopenharmony_ci	BUG_ON(!m);
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ci	/* write lock needed for mnt_get_count */
151762306a36Sopenharmony_ci	lock_mount_hash();
151862306a36Sopenharmony_ci	for (p = mnt; p; p = next_mnt(p, mnt)) {
151962306a36Sopenharmony_ci		actual_refs += mnt_get_count(p);
152062306a36Sopenharmony_ci		minimum_refs += 2;
152162306a36Sopenharmony_ci	}
152262306a36Sopenharmony_ci	unlock_mount_hash();
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	if (actual_refs > minimum_refs)
152562306a36Sopenharmony_ci		return 0;
152662306a36Sopenharmony_ci
152762306a36Sopenharmony_ci	return 1;
152862306a36Sopenharmony_ci}
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_ciEXPORT_SYMBOL(may_umount_tree);
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci/**
153362306a36Sopenharmony_ci * may_umount - check if a mount point is busy
153462306a36Sopenharmony_ci * @mnt: root of mount
153562306a36Sopenharmony_ci *
153662306a36Sopenharmony_ci * This is called to check if a mount point has any
153762306a36Sopenharmony_ci * open files, pwds, chroots or sub mounts. If the
153862306a36Sopenharmony_ci * mount has sub mounts this will return busy
153962306a36Sopenharmony_ci * regardless of whether the sub mounts are busy.
154062306a36Sopenharmony_ci *
154162306a36Sopenharmony_ci * Doesn't take quota and stuff into account. IOW, in some cases it will
154262306a36Sopenharmony_ci * give false negatives. The main reason why it's here is that we need
154362306a36Sopenharmony_ci * a non-destructive way to look for easily umountable filesystems.
154462306a36Sopenharmony_ci */
154562306a36Sopenharmony_ciint may_umount(struct vfsmount *mnt)
154662306a36Sopenharmony_ci{
154762306a36Sopenharmony_ci	int ret = 1;
154862306a36Sopenharmony_ci	down_read(&namespace_sem);
154962306a36Sopenharmony_ci	lock_mount_hash();
155062306a36Sopenharmony_ci	if (propagate_mount_busy(real_mount(mnt), 2))
155162306a36Sopenharmony_ci		ret = 0;
155262306a36Sopenharmony_ci	unlock_mount_hash();
155362306a36Sopenharmony_ci	up_read(&namespace_sem);
155462306a36Sopenharmony_ci	return ret;
155562306a36Sopenharmony_ci}
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ciEXPORT_SYMBOL(may_umount);
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_cistatic void namespace_unlock(void)
156062306a36Sopenharmony_ci{
156162306a36Sopenharmony_ci	struct hlist_head head;
156262306a36Sopenharmony_ci	struct hlist_node *p;
156362306a36Sopenharmony_ci	struct mount *m;
156462306a36Sopenharmony_ci	LIST_HEAD(list);
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci	hlist_move_list(&unmounted, &head);
156762306a36Sopenharmony_ci	list_splice_init(&ex_mountpoints, &list);
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_ci	up_write(&namespace_sem);
157062306a36Sopenharmony_ci
157162306a36Sopenharmony_ci	shrink_dentry_list(&list);
157262306a36Sopenharmony_ci
157362306a36Sopenharmony_ci	if (likely(hlist_empty(&head)))
157462306a36Sopenharmony_ci		return;
157562306a36Sopenharmony_ci
157662306a36Sopenharmony_ci	synchronize_rcu_expedited();
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_ci	hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
157962306a36Sopenharmony_ci		hlist_del(&m->mnt_umount);
158062306a36Sopenharmony_ci		mntput(&m->mnt);
158162306a36Sopenharmony_ci	}
158262306a36Sopenharmony_ci}
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_cistatic inline void namespace_lock(void)
158562306a36Sopenharmony_ci{
158662306a36Sopenharmony_ci	down_write(&namespace_sem);
158762306a36Sopenharmony_ci}
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_cienum umount_tree_flags {
159062306a36Sopenharmony_ci	UMOUNT_SYNC = 1,
159162306a36Sopenharmony_ci	UMOUNT_PROPAGATE = 2,
159262306a36Sopenharmony_ci	UMOUNT_CONNECTED = 4,
159362306a36Sopenharmony_ci};
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_cistatic bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how)
159662306a36Sopenharmony_ci{
159762306a36Sopenharmony_ci	/* Leaving mounts connected is only valid for lazy umounts */
159862306a36Sopenharmony_ci	if (how & UMOUNT_SYNC)
159962306a36Sopenharmony_ci		return true;
160062306a36Sopenharmony_ci
160162306a36Sopenharmony_ci	/* A mount without a parent has nothing to be connected to */
160262306a36Sopenharmony_ci	if (!mnt_has_parent(mnt))
160362306a36Sopenharmony_ci		return true;
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci	/* Because the reference counting rules change when mounts are
160662306a36Sopenharmony_ci	 * unmounted and connected, umounted mounts may not be
160762306a36Sopenharmony_ci	 * connected to mounted mounts.
160862306a36Sopenharmony_ci	 */
160962306a36Sopenharmony_ci	if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT))
161062306a36Sopenharmony_ci		return true;
161162306a36Sopenharmony_ci
161262306a36Sopenharmony_ci	/* Has it been requested that the mount remain connected? */
161362306a36Sopenharmony_ci	if (how & UMOUNT_CONNECTED)
161462306a36Sopenharmony_ci		return false;
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	/* Is the mount locked such that it needs to remain connected? */
161762306a36Sopenharmony_ci	if (IS_MNT_LOCKED(mnt))
161862306a36Sopenharmony_ci		return false;
161962306a36Sopenharmony_ci
162062306a36Sopenharmony_ci	/* By default disconnect the mount */
162162306a36Sopenharmony_ci	return true;
162262306a36Sopenharmony_ci}
162362306a36Sopenharmony_ci
162462306a36Sopenharmony_ci/*
162562306a36Sopenharmony_ci * mount_lock must be held
162662306a36Sopenharmony_ci * namespace_sem must be held for write
162762306a36Sopenharmony_ci */
162862306a36Sopenharmony_cistatic void umount_tree(struct mount *mnt, enum umount_tree_flags how)
162962306a36Sopenharmony_ci{
163062306a36Sopenharmony_ci	LIST_HEAD(tmp_list);
163162306a36Sopenharmony_ci	struct mount *p;
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci	if (how & UMOUNT_PROPAGATE)
163462306a36Sopenharmony_ci		propagate_mount_unlock(mnt);
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci	/* Gather the mounts to umount */
163762306a36Sopenharmony_ci	for (p = mnt; p; p = next_mnt(p, mnt)) {
163862306a36Sopenharmony_ci		p->mnt.mnt_flags |= MNT_UMOUNT;
163962306a36Sopenharmony_ci		list_move(&p->mnt_list, &tmp_list);
164062306a36Sopenharmony_ci	}
164162306a36Sopenharmony_ci
164262306a36Sopenharmony_ci	/* Hide the mounts from mnt_mounts */
164362306a36Sopenharmony_ci	list_for_each_entry(p, &tmp_list, mnt_list) {
164462306a36Sopenharmony_ci		list_del_init(&p->mnt_child);
164562306a36Sopenharmony_ci	}
164662306a36Sopenharmony_ci
164762306a36Sopenharmony_ci	/* Add propogated mounts to the tmp_list */
164862306a36Sopenharmony_ci	if (how & UMOUNT_PROPAGATE)
164962306a36Sopenharmony_ci		propagate_umount(&tmp_list);
165062306a36Sopenharmony_ci
165162306a36Sopenharmony_ci	while (!list_empty(&tmp_list)) {
165262306a36Sopenharmony_ci		struct mnt_namespace *ns;
165362306a36Sopenharmony_ci		bool disconnect;
165462306a36Sopenharmony_ci		p = list_first_entry(&tmp_list, struct mount, mnt_list);
165562306a36Sopenharmony_ci		list_del_init(&p->mnt_expire);
165662306a36Sopenharmony_ci		list_del_init(&p->mnt_list);
165762306a36Sopenharmony_ci		ns = p->mnt_ns;
165862306a36Sopenharmony_ci		if (ns) {
165962306a36Sopenharmony_ci			ns->mounts--;
166062306a36Sopenharmony_ci			__touch_mnt_namespace(ns);
166162306a36Sopenharmony_ci		}
166262306a36Sopenharmony_ci		p->mnt_ns = NULL;
166362306a36Sopenharmony_ci		if (how & UMOUNT_SYNC)
166462306a36Sopenharmony_ci			p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
166562306a36Sopenharmony_ci
166662306a36Sopenharmony_ci		disconnect = disconnect_mount(p, how);
166762306a36Sopenharmony_ci		if (mnt_has_parent(p)) {
166862306a36Sopenharmony_ci			mnt_add_count(p->mnt_parent, -1);
166962306a36Sopenharmony_ci			if (!disconnect) {
167062306a36Sopenharmony_ci				/* Don't forget about p */
167162306a36Sopenharmony_ci				list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
167262306a36Sopenharmony_ci			} else {
167362306a36Sopenharmony_ci				umount_mnt(p);
167462306a36Sopenharmony_ci			}
167562306a36Sopenharmony_ci		}
167662306a36Sopenharmony_ci		change_mnt_propagation(p, MS_PRIVATE);
167762306a36Sopenharmony_ci		if (disconnect)
167862306a36Sopenharmony_ci			hlist_add_head(&p->mnt_umount, &unmounted);
167962306a36Sopenharmony_ci	}
168062306a36Sopenharmony_ci}
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_cistatic void shrink_submounts(struct mount *mnt);
168362306a36Sopenharmony_ci
168462306a36Sopenharmony_cistatic int do_umount_root(struct super_block *sb)
168562306a36Sopenharmony_ci{
168662306a36Sopenharmony_ci	int ret = 0;
168762306a36Sopenharmony_ci
168862306a36Sopenharmony_ci	down_write(&sb->s_umount);
168962306a36Sopenharmony_ci	if (!sb_rdonly(sb)) {
169062306a36Sopenharmony_ci		struct fs_context *fc;
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci		fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY,
169362306a36Sopenharmony_ci						SB_RDONLY);
169462306a36Sopenharmony_ci		if (IS_ERR(fc)) {
169562306a36Sopenharmony_ci			ret = PTR_ERR(fc);
169662306a36Sopenharmony_ci		} else {
169762306a36Sopenharmony_ci			ret = parse_monolithic_mount_data(fc, NULL);
169862306a36Sopenharmony_ci			if (!ret)
169962306a36Sopenharmony_ci				ret = reconfigure_super(fc);
170062306a36Sopenharmony_ci			put_fs_context(fc);
170162306a36Sopenharmony_ci		}
170262306a36Sopenharmony_ci	}
170362306a36Sopenharmony_ci	up_write(&sb->s_umount);
170462306a36Sopenharmony_ci	return ret;
170562306a36Sopenharmony_ci}
170662306a36Sopenharmony_ci
170762306a36Sopenharmony_cistatic int do_umount(struct mount *mnt, int flags)
170862306a36Sopenharmony_ci{
170962306a36Sopenharmony_ci	struct super_block *sb = mnt->mnt.mnt_sb;
171062306a36Sopenharmony_ci	int retval;
171162306a36Sopenharmony_ci
171262306a36Sopenharmony_ci	retval = security_sb_umount(&mnt->mnt, flags);
171362306a36Sopenharmony_ci	if (retval)
171462306a36Sopenharmony_ci		return retval;
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci	/*
171762306a36Sopenharmony_ci	 * Allow userspace to request a mountpoint be expired rather than
171862306a36Sopenharmony_ci	 * unmounting unconditionally. Unmount only happens if:
171962306a36Sopenharmony_ci	 *  (1) the mark is already set (the mark is cleared by mntput())
172062306a36Sopenharmony_ci	 *  (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount]
172162306a36Sopenharmony_ci	 */
172262306a36Sopenharmony_ci	if (flags & MNT_EXPIRE) {
172362306a36Sopenharmony_ci		if (&mnt->mnt == current->fs->root.mnt ||
172462306a36Sopenharmony_ci		    flags & (MNT_FORCE | MNT_DETACH))
172562306a36Sopenharmony_ci			return -EINVAL;
172662306a36Sopenharmony_ci
172762306a36Sopenharmony_ci		/*
172862306a36Sopenharmony_ci		 * probably don't strictly need the lock here if we examined
172962306a36Sopenharmony_ci		 * all race cases, but it's a slowpath.
173062306a36Sopenharmony_ci		 */
173162306a36Sopenharmony_ci		lock_mount_hash();
173262306a36Sopenharmony_ci		if (mnt_get_count(mnt) != 2) {
173362306a36Sopenharmony_ci			unlock_mount_hash();
173462306a36Sopenharmony_ci			return -EBUSY;
173562306a36Sopenharmony_ci		}
173662306a36Sopenharmony_ci		unlock_mount_hash();
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci		if (!xchg(&mnt->mnt_expiry_mark, 1))
173962306a36Sopenharmony_ci			return -EAGAIN;
174062306a36Sopenharmony_ci	}
174162306a36Sopenharmony_ci
174262306a36Sopenharmony_ci	/*
174362306a36Sopenharmony_ci	 * If we may have to abort operations to get out of this
174462306a36Sopenharmony_ci	 * mount, and they will themselves hold resources we must
174562306a36Sopenharmony_ci	 * allow the fs to do things. In the Unix tradition of
174662306a36Sopenharmony_ci	 * 'Gee thats tricky lets do it in userspace' the umount_begin
174762306a36Sopenharmony_ci	 * might fail to complete on the first run through as other tasks
174862306a36Sopenharmony_ci	 * must return, and the like. Thats for the mount program to worry
174962306a36Sopenharmony_ci	 * about for the moment.
175062306a36Sopenharmony_ci	 */
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	if (flags & MNT_FORCE && sb->s_op->umount_begin) {
175362306a36Sopenharmony_ci		sb->s_op->umount_begin(sb);
175462306a36Sopenharmony_ci	}
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci	/*
175762306a36Sopenharmony_ci	 * No sense to grab the lock for this test, but test itself looks
175862306a36Sopenharmony_ci	 * somewhat bogus. Suggestions for better replacement?
175962306a36Sopenharmony_ci	 * Ho-hum... In principle, we might treat that as umount + switch
176062306a36Sopenharmony_ci	 * to rootfs. GC would eventually take care of the old vfsmount.
176162306a36Sopenharmony_ci	 * Actually it makes sense, especially if rootfs would contain a
176262306a36Sopenharmony_ci	 * /reboot - static binary that would close all descriptors and
176362306a36Sopenharmony_ci	 * call reboot(9). Then init(8) could umount root and exec /reboot.
176462306a36Sopenharmony_ci	 */
176562306a36Sopenharmony_ci	if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) {
176662306a36Sopenharmony_ci		/*
176762306a36Sopenharmony_ci		 * Special case for "unmounting" root ...
176862306a36Sopenharmony_ci		 * we just try to remount it readonly.
176962306a36Sopenharmony_ci		 */
177062306a36Sopenharmony_ci		if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN))
177162306a36Sopenharmony_ci			return -EPERM;
177262306a36Sopenharmony_ci		return do_umount_root(sb);
177362306a36Sopenharmony_ci	}
177462306a36Sopenharmony_ci
177562306a36Sopenharmony_ci	namespace_lock();
177662306a36Sopenharmony_ci	lock_mount_hash();
177762306a36Sopenharmony_ci
177862306a36Sopenharmony_ci	/* Recheck MNT_LOCKED with the locks held */
177962306a36Sopenharmony_ci	retval = -EINVAL;
178062306a36Sopenharmony_ci	if (mnt->mnt.mnt_flags & MNT_LOCKED)
178162306a36Sopenharmony_ci		goto out;
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci	event++;
178462306a36Sopenharmony_ci	if (flags & MNT_DETACH) {
178562306a36Sopenharmony_ci		if (!list_empty(&mnt->mnt_list))
178662306a36Sopenharmony_ci			umount_tree(mnt, UMOUNT_PROPAGATE);
178762306a36Sopenharmony_ci		retval = 0;
178862306a36Sopenharmony_ci	} else {
178962306a36Sopenharmony_ci		shrink_submounts(mnt);
179062306a36Sopenharmony_ci		retval = -EBUSY;
179162306a36Sopenharmony_ci		if (!propagate_mount_busy(mnt, 2)) {
179262306a36Sopenharmony_ci			if (!list_empty(&mnt->mnt_list))
179362306a36Sopenharmony_ci				umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
179462306a36Sopenharmony_ci			retval = 0;
179562306a36Sopenharmony_ci		}
179662306a36Sopenharmony_ci	}
179762306a36Sopenharmony_ciout:
179862306a36Sopenharmony_ci	unlock_mount_hash();
179962306a36Sopenharmony_ci	namespace_unlock();
180062306a36Sopenharmony_ci	return retval;
180162306a36Sopenharmony_ci}
180262306a36Sopenharmony_ci
180362306a36Sopenharmony_ci/*
180462306a36Sopenharmony_ci * __detach_mounts - lazily unmount all mounts on the specified dentry
180562306a36Sopenharmony_ci *
180662306a36Sopenharmony_ci * During unlink, rmdir, and d_drop it is possible to loose the path
180762306a36Sopenharmony_ci * to an existing mountpoint, and wind up leaking the mount.
180862306a36Sopenharmony_ci * detach_mounts allows lazily unmounting those mounts instead of
180962306a36Sopenharmony_ci * leaking them.
181062306a36Sopenharmony_ci *
181162306a36Sopenharmony_ci * The caller may hold dentry->d_inode->i_mutex.
181262306a36Sopenharmony_ci */
181362306a36Sopenharmony_civoid __detach_mounts(struct dentry *dentry)
181462306a36Sopenharmony_ci{
181562306a36Sopenharmony_ci	struct mountpoint *mp;
181662306a36Sopenharmony_ci	struct mount *mnt;
181762306a36Sopenharmony_ci
181862306a36Sopenharmony_ci	namespace_lock();
181962306a36Sopenharmony_ci	lock_mount_hash();
182062306a36Sopenharmony_ci	mp = lookup_mountpoint(dentry);
182162306a36Sopenharmony_ci	if (!mp)
182262306a36Sopenharmony_ci		goto out_unlock;
182362306a36Sopenharmony_ci
182462306a36Sopenharmony_ci	event++;
182562306a36Sopenharmony_ci	while (!hlist_empty(&mp->m_list)) {
182662306a36Sopenharmony_ci		mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
182762306a36Sopenharmony_ci		if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
182862306a36Sopenharmony_ci			umount_mnt(mnt);
182962306a36Sopenharmony_ci			hlist_add_head(&mnt->mnt_umount, &unmounted);
183062306a36Sopenharmony_ci		}
183162306a36Sopenharmony_ci		else umount_tree(mnt, UMOUNT_CONNECTED);
183262306a36Sopenharmony_ci	}
183362306a36Sopenharmony_ci	put_mountpoint(mp);
183462306a36Sopenharmony_ciout_unlock:
183562306a36Sopenharmony_ci	unlock_mount_hash();
183662306a36Sopenharmony_ci	namespace_unlock();
183762306a36Sopenharmony_ci}
183862306a36Sopenharmony_ci
183962306a36Sopenharmony_ci/*
184062306a36Sopenharmony_ci * Is the caller allowed to modify his namespace?
184162306a36Sopenharmony_ci */
184262306a36Sopenharmony_cibool may_mount(void)
184362306a36Sopenharmony_ci{
184462306a36Sopenharmony_ci	return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
184562306a36Sopenharmony_ci}
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci/**
184862306a36Sopenharmony_ci * path_mounted - check whether path is mounted
184962306a36Sopenharmony_ci * @path: path to check
185062306a36Sopenharmony_ci *
185162306a36Sopenharmony_ci * Determine whether @path refers to the root of a mount.
185262306a36Sopenharmony_ci *
185362306a36Sopenharmony_ci * Return: true if @path is the root of a mount, false if not.
185462306a36Sopenharmony_ci */
185562306a36Sopenharmony_cistatic inline bool path_mounted(const struct path *path)
185662306a36Sopenharmony_ci{
185762306a36Sopenharmony_ci	return path->mnt->mnt_root == path->dentry;
185862306a36Sopenharmony_ci}
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_cistatic void warn_mandlock(void)
186162306a36Sopenharmony_ci{
186262306a36Sopenharmony_ci	pr_warn_once("=======================================================\n"
186362306a36Sopenharmony_ci		     "WARNING: The mand mount option has been deprecated and\n"
186462306a36Sopenharmony_ci		     "         and is ignored by this kernel. Remove the mand\n"
186562306a36Sopenharmony_ci		     "         option from the mount to silence this warning.\n"
186662306a36Sopenharmony_ci		     "=======================================================\n");
186762306a36Sopenharmony_ci}
186862306a36Sopenharmony_ci
186962306a36Sopenharmony_cistatic int can_umount(const struct path *path, int flags)
187062306a36Sopenharmony_ci{
187162306a36Sopenharmony_ci	struct mount *mnt = real_mount(path->mnt);
187262306a36Sopenharmony_ci
187362306a36Sopenharmony_ci	if (!may_mount())
187462306a36Sopenharmony_ci		return -EPERM;
187562306a36Sopenharmony_ci	if (!path_mounted(path))
187662306a36Sopenharmony_ci		return -EINVAL;
187762306a36Sopenharmony_ci	if (!check_mnt(mnt))
187862306a36Sopenharmony_ci		return -EINVAL;
187962306a36Sopenharmony_ci	if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
188062306a36Sopenharmony_ci		return -EINVAL;
188162306a36Sopenharmony_ci	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
188262306a36Sopenharmony_ci		return -EPERM;
188362306a36Sopenharmony_ci	return 0;
188462306a36Sopenharmony_ci}
188562306a36Sopenharmony_ci
188662306a36Sopenharmony_ci// caller is responsible for flags being sane
188762306a36Sopenharmony_ciint path_umount(struct path *path, int flags)
188862306a36Sopenharmony_ci{
188962306a36Sopenharmony_ci	struct mount *mnt = real_mount(path->mnt);
189062306a36Sopenharmony_ci	int ret;
189162306a36Sopenharmony_ci
189262306a36Sopenharmony_ci	ret = can_umount(path, flags);
189362306a36Sopenharmony_ci	if (!ret)
189462306a36Sopenharmony_ci		ret = do_umount(mnt, flags);
189562306a36Sopenharmony_ci
189662306a36Sopenharmony_ci	/* we mustn't call path_put() as that would clear mnt_expiry_mark */
189762306a36Sopenharmony_ci	dput(path->dentry);
189862306a36Sopenharmony_ci	mntput_no_expire(mnt);
189962306a36Sopenharmony_ci	return ret;
190062306a36Sopenharmony_ci}
190162306a36Sopenharmony_ci
190262306a36Sopenharmony_cistatic int ksys_umount(char __user *name, int flags)
190362306a36Sopenharmony_ci{
190462306a36Sopenharmony_ci	int lookup_flags = LOOKUP_MOUNTPOINT;
190562306a36Sopenharmony_ci	struct path path;
190662306a36Sopenharmony_ci	int ret;
190762306a36Sopenharmony_ci
190862306a36Sopenharmony_ci	// basic validity checks done first
190962306a36Sopenharmony_ci	if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW))
191062306a36Sopenharmony_ci		return -EINVAL;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	if (!(flags & UMOUNT_NOFOLLOW))
191362306a36Sopenharmony_ci		lookup_flags |= LOOKUP_FOLLOW;
191462306a36Sopenharmony_ci	ret = user_path_at(AT_FDCWD, name, lookup_flags, &path);
191562306a36Sopenharmony_ci	if (ret)
191662306a36Sopenharmony_ci		return ret;
191762306a36Sopenharmony_ci	return path_umount(&path, flags);
191862306a36Sopenharmony_ci}
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ciSYSCALL_DEFINE2(umount, char __user *, name, int, flags)
192162306a36Sopenharmony_ci{
192262306a36Sopenharmony_ci	return ksys_umount(name, flags);
192362306a36Sopenharmony_ci}
192462306a36Sopenharmony_ci
192562306a36Sopenharmony_ci#ifdef __ARCH_WANT_SYS_OLDUMOUNT
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci/*
192862306a36Sopenharmony_ci *	The 2.0 compatible umount. No flags.
192962306a36Sopenharmony_ci */
193062306a36Sopenharmony_ciSYSCALL_DEFINE1(oldumount, char __user *, name)
193162306a36Sopenharmony_ci{
193262306a36Sopenharmony_ci	return ksys_umount(name, 0);
193362306a36Sopenharmony_ci}
193462306a36Sopenharmony_ci
193562306a36Sopenharmony_ci#endif
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_cistatic bool is_mnt_ns_file(struct dentry *dentry)
193862306a36Sopenharmony_ci{
193962306a36Sopenharmony_ci	/* Is this a proxy for a mount namespace? */
194062306a36Sopenharmony_ci	return dentry->d_op == &ns_dentry_operations &&
194162306a36Sopenharmony_ci	       dentry->d_fsdata == &mntns_operations;
194262306a36Sopenharmony_ci}
194362306a36Sopenharmony_ci
194462306a36Sopenharmony_cistatic struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
194562306a36Sopenharmony_ci{
194662306a36Sopenharmony_ci	return container_of(ns, struct mnt_namespace, ns);
194762306a36Sopenharmony_ci}
194862306a36Sopenharmony_ci
194962306a36Sopenharmony_cistruct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
195062306a36Sopenharmony_ci{
195162306a36Sopenharmony_ci	return &mnt->ns;
195262306a36Sopenharmony_ci}
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_cistatic bool mnt_ns_loop(struct dentry *dentry)
195562306a36Sopenharmony_ci{
195662306a36Sopenharmony_ci	/* Could bind mounting the mount namespace inode cause a
195762306a36Sopenharmony_ci	 * mount namespace loop?
195862306a36Sopenharmony_ci	 */
195962306a36Sopenharmony_ci	struct mnt_namespace *mnt_ns;
196062306a36Sopenharmony_ci	if (!is_mnt_ns_file(dentry))
196162306a36Sopenharmony_ci		return false;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
196462306a36Sopenharmony_ci	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
196562306a36Sopenharmony_ci}
196662306a36Sopenharmony_ci
196762306a36Sopenharmony_cistruct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
196862306a36Sopenharmony_ci					int flag)
196962306a36Sopenharmony_ci{
197062306a36Sopenharmony_ci	struct mount *res, *p, *q, *r, *parent;
197162306a36Sopenharmony_ci
197262306a36Sopenharmony_ci	if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt))
197362306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci	if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry))
197662306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	res = q = clone_mnt(mnt, dentry, flag);
197962306a36Sopenharmony_ci	if (IS_ERR(q))
198062306a36Sopenharmony_ci		return q;
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_ci	q->mnt_mountpoint = mnt->mnt_mountpoint;
198362306a36Sopenharmony_ci
198462306a36Sopenharmony_ci	p = mnt;
198562306a36Sopenharmony_ci	list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) {
198662306a36Sopenharmony_ci		struct mount *s;
198762306a36Sopenharmony_ci		if (!is_subdir(r->mnt_mountpoint, dentry))
198862306a36Sopenharmony_ci			continue;
198962306a36Sopenharmony_ci
199062306a36Sopenharmony_ci		for (s = r; s; s = next_mnt(s, r)) {
199162306a36Sopenharmony_ci			if (!(flag & CL_COPY_UNBINDABLE) &&
199262306a36Sopenharmony_ci			    IS_MNT_UNBINDABLE(s)) {
199362306a36Sopenharmony_ci				if (s->mnt.mnt_flags & MNT_LOCKED) {
199462306a36Sopenharmony_ci					/* Both unbindable and locked. */
199562306a36Sopenharmony_ci					q = ERR_PTR(-EPERM);
199662306a36Sopenharmony_ci					goto out;
199762306a36Sopenharmony_ci				} else {
199862306a36Sopenharmony_ci					s = skip_mnt_tree(s);
199962306a36Sopenharmony_ci					continue;
200062306a36Sopenharmony_ci				}
200162306a36Sopenharmony_ci			}
200262306a36Sopenharmony_ci			if (!(flag & CL_COPY_MNT_NS_FILE) &&
200362306a36Sopenharmony_ci			    is_mnt_ns_file(s->mnt.mnt_root)) {
200462306a36Sopenharmony_ci				s = skip_mnt_tree(s);
200562306a36Sopenharmony_ci				continue;
200662306a36Sopenharmony_ci			}
200762306a36Sopenharmony_ci			while (p != s->mnt_parent) {
200862306a36Sopenharmony_ci				p = p->mnt_parent;
200962306a36Sopenharmony_ci				q = q->mnt_parent;
201062306a36Sopenharmony_ci			}
201162306a36Sopenharmony_ci			p = s;
201262306a36Sopenharmony_ci			parent = q;
201362306a36Sopenharmony_ci			q = clone_mnt(p, p->mnt.mnt_root, flag);
201462306a36Sopenharmony_ci			if (IS_ERR(q))
201562306a36Sopenharmony_ci				goto out;
201662306a36Sopenharmony_ci			lock_mount_hash();
201762306a36Sopenharmony_ci			list_add_tail(&q->mnt_list, &res->mnt_list);
201862306a36Sopenharmony_ci			attach_mnt(q, parent, p->mnt_mp, false);
201962306a36Sopenharmony_ci			unlock_mount_hash();
202062306a36Sopenharmony_ci		}
202162306a36Sopenharmony_ci	}
202262306a36Sopenharmony_ci	return res;
202362306a36Sopenharmony_ciout:
202462306a36Sopenharmony_ci	if (res) {
202562306a36Sopenharmony_ci		lock_mount_hash();
202662306a36Sopenharmony_ci		umount_tree(res, UMOUNT_SYNC);
202762306a36Sopenharmony_ci		unlock_mount_hash();
202862306a36Sopenharmony_ci	}
202962306a36Sopenharmony_ci	return q;
203062306a36Sopenharmony_ci}
203162306a36Sopenharmony_ci
203262306a36Sopenharmony_ci/* Caller should check returned pointer for errors */
203362306a36Sopenharmony_ci
203462306a36Sopenharmony_cistruct vfsmount *collect_mounts(const struct path *path)
203562306a36Sopenharmony_ci{
203662306a36Sopenharmony_ci	struct mount *tree;
203762306a36Sopenharmony_ci	namespace_lock();
203862306a36Sopenharmony_ci	if (!check_mnt(real_mount(path->mnt)))
203962306a36Sopenharmony_ci		tree = ERR_PTR(-EINVAL);
204062306a36Sopenharmony_ci	else
204162306a36Sopenharmony_ci		tree = copy_tree(real_mount(path->mnt), path->dentry,
204262306a36Sopenharmony_ci				 CL_COPY_ALL | CL_PRIVATE);
204362306a36Sopenharmony_ci	namespace_unlock();
204462306a36Sopenharmony_ci	if (IS_ERR(tree))
204562306a36Sopenharmony_ci		return ERR_CAST(tree);
204662306a36Sopenharmony_ci	return &tree->mnt;
204762306a36Sopenharmony_ci}
204862306a36Sopenharmony_ci
204962306a36Sopenharmony_cistatic void free_mnt_ns(struct mnt_namespace *);
205062306a36Sopenharmony_cistatic struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
205162306a36Sopenharmony_ci
205262306a36Sopenharmony_civoid dissolve_on_fput(struct vfsmount *mnt)
205362306a36Sopenharmony_ci{
205462306a36Sopenharmony_ci	struct mnt_namespace *ns;
205562306a36Sopenharmony_ci	namespace_lock();
205662306a36Sopenharmony_ci	lock_mount_hash();
205762306a36Sopenharmony_ci	ns = real_mount(mnt)->mnt_ns;
205862306a36Sopenharmony_ci	if (ns) {
205962306a36Sopenharmony_ci		if (is_anon_ns(ns))
206062306a36Sopenharmony_ci			umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
206162306a36Sopenharmony_ci		else
206262306a36Sopenharmony_ci			ns = NULL;
206362306a36Sopenharmony_ci	}
206462306a36Sopenharmony_ci	unlock_mount_hash();
206562306a36Sopenharmony_ci	namespace_unlock();
206662306a36Sopenharmony_ci	if (ns)
206762306a36Sopenharmony_ci		free_mnt_ns(ns);
206862306a36Sopenharmony_ci}
206962306a36Sopenharmony_ci
207062306a36Sopenharmony_civoid drop_collected_mounts(struct vfsmount *mnt)
207162306a36Sopenharmony_ci{
207262306a36Sopenharmony_ci	namespace_lock();
207362306a36Sopenharmony_ci	lock_mount_hash();
207462306a36Sopenharmony_ci	umount_tree(real_mount(mnt), 0);
207562306a36Sopenharmony_ci	unlock_mount_hash();
207662306a36Sopenharmony_ci	namespace_unlock();
207762306a36Sopenharmony_ci}
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_cistatic bool has_locked_children(struct mount *mnt, struct dentry *dentry)
208062306a36Sopenharmony_ci{
208162306a36Sopenharmony_ci	struct mount *child;
208262306a36Sopenharmony_ci
208362306a36Sopenharmony_ci	list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
208462306a36Sopenharmony_ci		if (!is_subdir(child->mnt_mountpoint, dentry))
208562306a36Sopenharmony_ci			continue;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci		if (child->mnt.mnt_flags & MNT_LOCKED)
208862306a36Sopenharmony_ci			return true;
208962306a36Sopenharmony_ci	}
209062306a36Sopenharmony_ci	return false;
209162306a36Sopenharmony_ci}
209262306a36Sopenharmony_ci
209362306a36Sopenharmony_ci/**
209462306a36Sopenharmony_ci * clone_private_mount - create a private clone of a path
209562306a36Sopenharmony_ci * @path: path to clone
209662306a36Sopenharmony_ci *
209762306a36Sopenharmony_ci * This creates a new vfsmount, which will be the clone of @path.  The new mount
209862306a36Sopenharmony_ci * will not be attached anywhere in the namespace and will be private (i.e.
209962306a36Sopenharmony_ci * changes to the originating mount won't be propagated into this).
210062306a36Sopenharmony_ci *
210162306a36Sopenharmony_ci * Release with mntput().
210262306a36Sopenharmony_ci */
210362306a36Sopenharmony_cistruct vfsmount *clone_private_mount(const struct path *path)
210462306a36Sopenharmony_ci{
210562306a36Sopenharmony_ci	struct mount *old_mnt = real_mount(path->mnt);
210662306a36Sopenharmony_ci	struct mount *new_mnt;
210762306a36Sopenharmony_ci
210862306a36Sopenharmony_ci	down_read(&namespace_sem);
210962306a36Sopenharmony_ci	if (IS_MNT_UNBINDABLE(old_mnt))
211062306a36Sopenharmony_ci		goto invalid;
211162306a36Sopenharmony_ci
211262306a36Sopenharmony_ci	if (!check_mnt(old_mnt))
211362306a36Sopenharmony_ci		goto invalid;
211462306a36Sopenharmony_ci
211562306a36Sopenharmony_ci	if (has_locked_children(old_mnt, path->dentry))
211662306a36Sopenharmony_ci		goto invalid;
211762306a36Sopenharmony_ci
211862306a36Sopenharmony_ci	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
211962306a36Sopenharmony_ci	up_read(&namespace_sem);
212062306a36Sopenharmony_ci
212162306a36Sopenharmony_ci	if (IS_ERR(new_mnt))
212262306a36Sopenharmony_ci		return ERR_CAST(new_mnt);
212362306a36Sopenharmony_ci
212462306a36Sopenharmony_ci	/* Longterm mount to be removed by kern_unmount*() */
212562306a36Sopenharmony_ci	new_mnt->mnt_ns = MNT_NS_INTERNAL;
212662306a36Sopenharmony_ci
212762306a36Sopenharmony_ci	return &new_mnt->mnt;
212862306a36Sopenharmony_ci
212962306a36Sopenharmony_ciinvalid:
213062306a36Sopenharmony_ci	up_read(&namespace_sem);
213162306a36Sopenharmony_ci	return ERR_PTR(-EINVAL);
213262306a36Sopenharmony_ci}
213362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clone_private_mount);
213462306a36Sopenharmony_ci
213562306a36Sopenharmony_ciint iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
213662306a36Sopenharmony_ci		   struct vfsmount *root)
213762306a36Sopenharmony_ci{
213862306a36Sopenharmony_ci	struct mount *mnt;
213962306a36Sopenharmony_ci	int res = f(root, arg);
214062306a36Sopenharmony_ci	if (res)
214162306a36Sopenharmony_ci		return res;
214262306a36Sopenharmony_ci	list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) {
214362306a36Sopenharmony_ci		res = f(&mnt->mnt, arg);
214462306a36Sopenharmony_ci		if (res)
214562306a36Sopenharmony_ci			return res;
214662306a36Sopenharmony_ci	}
214762306a36Sopenharmony_ci	return 0;
214862306a36Sopenharmony_ci}
214962306a36Sopenharmony_ci
215062306a36Sopenharmony_cistatic void lock_mnt_tree(struct mount *mnt)
215162306a36Sopenharmony_ci{
215262306a36Sopenharmony_ci	struct mount *p;
215362306a36Sopenharmony_ci
215462306a36Sopenharmony_ci	for (p = mnt; p; p = next_mnt(p, mnt)) {
215562306a36Sopenharmony_ci		int flags = p->mnt.mnt_flags;
215662306a36Sopenharmony_ci		/* Don't allow unprivileged users to change mount flags */
215762306a36Sopenharmony_ci		flags |= MNT_LOCK_ATIME;
215862306a36Sopenharmony_ci
215962306a36Sopenharmony_ci		if (flags & MNT_READONLY)
216062306a36Sopenharmony_ci			flags |= MNT_LOCK_READONLY;
216162306a36Sopenharmony_ci
216262306a36Sopenharmony_ci		if (flags & MNT_NODEV)
216362306a36Sopenharmony_ci			flags |= MNT_LOCK_NODEV;
216462306a36Sopenharmony_ci
216562306a36Sopenharmony_ci		if (flags & MNT_NOSUID)
216662306a36Sopenharmony_ci			flags |= MNT_LOCK_NOSUID;
216762306a36Sopenharmony_ci
216862306a36Sopenharmony_ci		if (flags & MNT_NOEXEC)
216962306a36Sopenharmony_ci			flags |= MNT_LOCK_NOEXEC;
217062306a36Sopenharmony_ci		/* Don't allow unprivileged users to reveal what is under a mount */
217162306a36Sopenharmony_ci		if (list_empty(&p->mnt_expire))
217262306a36Sopenharmony_ci			flags |= MNT_LOCKED;
217362306a36Sopenharmony_ci		p->mnt.mnt_flags = flags;
217462306a36Sopenharmony_ci	}
217562306a36Sopenharmony_ci}
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_cistatic void cleanup_group_ids(struct mount *mnt, struct mount *end)
217862306a36Sopenharmony_ci{
217962306a36Sopenharmony_ci	struct mount *p;
218062306a36Sopenharmony_ci
218162306a36Sopenharmony_ci	for (p = mnt; p != end; p = next_mnt(p, mnt)) {
218262306a36Sopenharmony_ci		if (p->mnt_group_id && !IS_MNT_SHARED(p))
218362306a36Sopenharmony_ci			mnt_release_group_id(p);
218462306a36Sopenharmony_ci	}
218562306a36Sopenharmony_ci}
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_cistatic int invent_group_ids(struct mount *mnt, bool recurse)
218862306a36Sopenharmony_ci{
218962306a36Sopenharmony_ci	struct mount *p;
219062306a36Sopenharmony_ci
219162306a36Sopenharmony_ci	for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) {
219262306a36Sopenharmony_ci		if (!p->mnt_group_id && !IS_MNT_SHARED(p)) {
219362306a36Sopenharmony_ci			int err = mnt_alloc_group_id(p);
219462306a36Sopenharmony_ci			if (err) {
219562306a36Sopenharmony_ci				cleanup_group_ids(mnt, p);
219662306a36Sopenharmony_ci				return err;
219762306a36Sopenharmony_ci			}
219862306a36Sopenharmony_ci		}
219962306a36Sopenharmony_ci	}
220062306a36Sopenharmony_ci
220162306a36Sopenharmony_ci	return 0;
220262306a36Sopenharmony_ci}
220362306a36Sopenharmony_ci
220462306a36Sopenharmony_ciint count_mounts(struct mnt_namespace *ns, struct mount *mnt)
220562306a36Sopenharmony_ci{
220662306a36Sopenharmony_ci	unsigned int max = READ_ONCE(sysctl_mount_max);
220762306a36Sopenharmony_ci	unsigned int mounts = 0;
220862306a36Sopenharmony_ci	struct mount *p;
220962306a36Sopenharmony_ci
221062306a36Sopenharmony_ci	if (ns->mounts >= max)
221162306a36Sopenharmony_ci		return -ENOSPC;
221262306a36Sopenharmony_ci	max -= ns->mounts;
221362306a36Sopenharmony_ci	if (ns->pending_mounts >= max)
221462306a36Sopenharmony_ci		return -ENOSPC;
221562306a36Sopenharmony_ci	max -= ns->pending_mounts;
221662306a36Sopenharmony_ci
221762306a36Sopenharmony_ci	for (p = mnt; p; p = next_mnt(p, mnt))
221862306a36Sopenharmony_ci		mounts++;
221962306a36Sopenharmony_ci
222062306a36Sopenharmony_ci	if (mounts > max)
222162306a36Sopenharmony_ci		return -ENOSPC;
222262306a36Sopenharmony_ci
222362306a36Sopenharmony_ci	ns->pending_mounts += mounts;
222462306a36Sopenharmony_ci	return 0;
222562306a36Sopenharmony_ci}
222662306a36Sopenharmony_ci
222762306a36Sopenharmony_cienum mnt_tree_flags_t {
222862306a36Sopenharmony_ci	MNT_TREE_MOVE = BIT(0),
222962306a36Sopenharmony_ci	MNT_TREE_BENEATH = BIT(1),
223062306a36Sopenharmony_ci};
223162306a36Sopenharmony_ci
223262306a36Sopenharmony_ci/**
223362306a36Sopenharmony_ci * attach_recursive_mnt - attach a source mount tree
223462306a36Sopenharmony_ci * @source_mnt: mount tree to be attached
223562306a36Sopenharmony_ci * @top_mnt:    mount that @source_mnt will be mounted on or mounted beneath
223662306a36Sopenharmony_ci * @dest_mp:    the mountpoint @source_mnt will be mounted at
223762306a36Sopenharmony_ci * @flags:      modify how @source_mnt is supposed to be attached
223862306a36Sopenharmony_ci *
223962306a36Sopenharmony_ci *  NOTE: in the table below explains the semantics when a source mount
224062306a36Sopenharmony_ci *  of a given type is attached to a destination mount of a given type.
224162306a36Sopenharmony_ci * ---------------------------------------------------------------------------
224262306a36Sopenharmony_ci * |         BIND MOUNT OPERATION                                            |
224362306a36Sopenharmony_ci * |**************************************************************************
224462306a36Sopenharmony_ci * | source-->| shared        |       private  |       slave    | unbindable |
224562306a36Sopenharmony_ci * | dest     |               |                |                |            |
224662306a36Sopenharmony_ci * |   |      |               |                |                |            |
224762306a36Sopenharmony_ci * |   v      |               |                |                |            |
224862306a36Sopenharmony_ci * |**************************************************************************
224962306a36Sopenharmony_ci * |  shared  | shared (++)   |     shared (+) |     shared(+++)|  invalid   |
225062306a36Sopenharmony_ci * |          |               |                |                |            |
225162306a36Sopenharmony_ci * |non-shared| shared (+)    |      private   |      slave (*) |  invalid   |
225262306a36Sopenharmony_ci * ***************************************************************************
225362306a36Sopenharmony_ci * A bind operation clones the source mount and mounts the clone on the
225462306a36Sopenharmony_ci * destination mount.
225562306a36Sopenharmony_ci *
225662306a36Sopenharmony_ci * (++)  the cloned mount is propagated to all the mounts in the propagation
225762306a36Sopenharmony_ci * 	 tree of the destination mount and the cloned mount is added to
225862306a36Sopenharmony_ci * 	 the peer group of the source mount.
225962306a36Sopenharmony_ci * (+)   the cloned mount is created under the destination mount and is marked
226062306a36Sopenharmony_ci *       as shared. The cloned mount is added to the peer group of the source
226162306a36Sopenharmony_ci *       mount.
226262306a36Sopenharmony_ci * (+++) the mount is propagated to all the mounts in the propagation tree
226362306a36Sopenharmony_ci *       of the destination mount and the cloned mount is made slave
226462306a36Sopenharmony_ci *       of the same master as that of the source mount. The cloned mount
226562306a36Sopenharmony_ci *       is marked as 'shared and slave'.
226662306a36Sopenharmony_ci * (*)   the cloned mount is made a slave of the same master as that of the
226762306a36Sopenharmony_ci * 	 source mount.
226862306a36Sopenharmony_ci *
226962306a36Sopenharmony_ci * ---------------------------------------------------------------------------
227062306a36Sopenharmony_ci * |         		MOVE MOUNT OPERATION                                 |
227162306a36Sopenharmony_ci * |**************************************************************************
227262306a36Sopenharmony_ci * | source-->| shared        |       private  |       slave    | unbindable |
227362306a36Sopenharmony_ci * | dest     |               |                |                |            |
227462306a36Sopenharmony_ci * |   |      |               |                |                |            |
227562306a36Sopenharmony_ci * |   v      |               |                |                |            |
227662306a36Sopenharmony_ci * |**************************************************************************
227762306a36Sopenharmony_ci * |  shared  | shared (+)    |     shared (+) |    shared(+++) |  invalid   |
227862306a36Sopenharmony_ci * |          |               |                |                |            |
227962306a36Sopenharmony_ci * |non-shared| shared (+*)   |      private   |    slave (*)   | unbindable |
228062306a36Sopenharmony_ci * ***************************************************************************
228162306a36Sopenharmony_ci *
228262306a36Sopenharmony_ci * (+)  the mount is moved to the destination. And is then propagated to
228362306a36Sopenharmony_ci * 	all the mounts in the propagation tree of the destination mount.
228462306a36Sopenharmony_ci * (+*)  the mount is moved to the destination.
228562306a36Sopenharmony_ci * (+++)  the mount is moved to the destination and is then propagated to
228662306a36Sopenharmony_ci * 	all the mounts belonging to the destination mount's propagation tree.
228762306a36Sopenharmony_ci * 	the mount is marked as 'shared and slave'.
228862306a36Sopenharmony_ci * (*)	the mount continues to be a slave at the new location.
228962306a36Sopenharmony_ci *
229062306a36Sopenharmony_ci * if the source mount is a tree, the operations explained above is
229162306a36Sopenharmony_ci * applied to each mount in the tree.
229262306a36Sopenharmony_ci * Must be called without spinlocks held, since this function can sleep
229362306a36Sopenharmony_ci * in allocations.
229462306a36Sopenharmony_ci *
229562306a36Sopenharmony_ci * Context: The function expects namespace_lock() to be held.
229662306a36Sopenharmony_ci * Return: If @source_mnt was successfully attached 0 is returned.
229762306a36Sopenharmony_ci *         Otherwise a negative error code is returned.
229862306a36Sopenharmony_ci */
229962306a36Sopenharmony_cistatic int attach_recursive_mnt(struct mount *source_mnt,
230062306a36Sopenharmony_ci				struct mount *top_mnt,
230162306a36Sopenharmony_ci				struct mountpoint *dest_mp,
230262306a36Sopenharmony_ci				enum mnt_tree_flags_t flags)
230362306a36Sopenharmony_ci{
230462306a36Sopenharmony_ci	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
230562306a36Sopenharmony_ci	HLIST_HEAD(tree_list);
230662306a36Sopenharmony_ci	struct mnt_namespace *ns = top_mnt->mnt_ns;
230762306a36Sopenharmony_ci	struct mountpoint *smp;
230862306a36Sopenharmony_ci	struct mount *child, *dest_mnt, *p;
230962306a36Sopenharmony_ci	struct hlist_node *n;
231062306a36Sopenharmony_ci	int err = 0;
231162306a36Sopenharmony_ci	bool moving = flags & MNT_TREE_MOVE, beneath = flags & MNT_TREE_BENEATH;
231262306a36Sopenharmony_ci
231362306a36Sopenharmony_ci	/*
231462306a36Sopenharmony_ci	 * Preallocate a mountpoint in case the new mounts need to be
231562306a36Sopenharmony_ci	 * mounted beneath mounts on the same mountpoint.
231662306a36Sopenharmony_ci	 */
231762306a36Sopenharmony_ci	smp = get_mountpoint(source_mnt->mnt.mnt_root);
231862306a36Sopenharmony_ci	if (IS_ERR(smp))
231962306a36Sopenharmony_ci		return PTR_ERR(smp);
232062306a36Sopenharmony_ci
232162306a36Sopenharmony_ci	/* Is there space to add these mounts to the mount namespace? */
232262306a36Sopenharmony_ci	if (!moving) {
232362306a36Sopenharmony_ci		err = count_mounts(ns, source_mnt);
232462306a36Sopenharmony_ci		if (err)
232562306a36Sopenharmony_ci			goto out;
232662306a36Sopenharmony_ci	}
232762306a36Sopenharmony_ci
232862306a36Sopenharmony_ci	if (beneath)
232962306a36Sopenharmony_ci		dest_mnt = top_mnt->mnt_parent;
233062306a36Sopenharmony_ci	else
233162306a36Sopenharmony_ci		dest_mnt = top_mnt;
233262306a36Sopenharmony_ci
233362306a36Sopenharmony_ci	if (IS_MNT_SHARED(dest_mnt)) {
233462306a36Sopenharmony_ci		err = invent_group_ids(source_mnt, true);
233562306a36Sopenharmony_ci		if (err)
233662306a36Sopenharmony_ci			goto out;
233762306a36Sopenharmony_ci		err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
233862306a36Sopenharmony_ci	}
233962306a36Sopenharmony_ci	lock_mount_hash();
234062306a36Sopenharmony_ci	if (err)
234162306a36Sopenharmony_ci		goto out_cleanup_ids;
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	if (IS_MNT_SHARED(dest_mnt)) {
234462306a36Sopenharmony_ci		for (p = source_mnt; p; p = next_mnt(p, source_mnt))
234562306a36Sopenharmony_ci			set_mnt_shared(p);
234662306a36Sopenharmony_ci	}
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_ci	if (moving) {
234962306a36Sopenharmony_ci		if (beneath)
235062306a36Sopenharmony_ci			dest_mp = smp;
235162306a36Sopenharmony_ci		unhash_mnt(source_mnt);
235262306a36Sopenharmony_ci		attach_mnt(source_mnt, top_mnt, dest_mp, beneath);
235362306a36Sopenharmony_ci		touch_mnt_namespace(source_mnt->mnt_ns);
235462306a36Sopenharmony_ci	} else {
235562306a36Sopenharmony_ci		if (source_mnt->mnt_ns) {
235662306a36Sopenharmony_ci			/* move from anon - the caller will destroy */
235762306a36Sopenharmony_ci			list_del_init(&source_mnt->mnt_ns->list);
235862306a36Sopenharmony_ci		}
235962306a36Sopenharmony_ci		if (beneath)
236062306a36Sopenharmony_ci			mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp);
236162306a36Sopenharmony_ci		else
236262306a36Sopenharmony_ci			mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
236362306a36Sopenharmony_ci		commit_tree(source_mnt);
236462306a36Sopenharmony_ci	}
236562306a36Sopenharmony_ci
236662306a36Sopenharmony_ci	hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
236762306a36Sopenharmony_ci		struct mount *q;
236862306a36Sopenharmony_ci		hlist_del_init(&child->mnt_hash);
236962306a36Sopenharmony_ci		q = __lookup_mnt(&child->mnt_parent->mnt,
237062306a36Sopenharmony_ci				 child->mnt_mountpoint);
237162306a36Sopenharmony_ci		if (q)
237262306a36Sopenharmony_ci			mnt_change_mountpoint(child, smp, q);
237362306a36Sopenharmony_ci		/* Notice when we are propagating across user namespaces */
237462306a36Sopenharmony_ci		if (child->mnt_parent->mnt_ns->user_ns != user_ns)
237562306a36Sopenharmony_ci			lock_mnt_tree(child);
237662306a36Sopenharmony_ci		child->mnt.mnt_flags &= ~MNT_LOCKED;
237762306a36Sopenharmony_ci		commit_tree(child);
237862306a36Sopenharmony_ci	}
237962306a36Sopenharmony_ci	put_mountpoint(smp);
238062306a36Sopenharmony_ci	unlock_mount_hash();
238162306a36Sopenharmony_ci
238262306a36Sopenharmony_ci	return 0;
238362306a36Sopenharmony_ci
238462306a36Sopenharmony_ci out_cleanup_ids:
238562306a36Sopenharmony_ci	while (!hlist_empty(&tree_list)) {
238662306a36Sopenharmony_ci		child = hlist_entry(tree_list.first, struct mount, mnt_hash);
238762306a36Sopenharmony_ci		child->mnt_parent->mnt_ns->pending_mounts = 0;
238862306a36Sopenharmony_ci		umount_tree(child, UMOUNT_SYNC);
238962306a36Sopenharmony_ci	}
239062306a36Sopenharmony_ci	unlock_mount_hash();
239162306a36Sopenharmony_ci	cleanup_group_ids(source_mnt, NULL);
239262306a36Sopenharmony_ci out:
239362306a36Sopenharmony_ci	ns->pending_mounts = 0;
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ci	read_seqlock_excl(&mount_lock);
239662306a36Sopenharmony_ci	put_mountpoint(smp);
239762306a36Sopenharmony_ci	read_sequnlock_excl(&mount_lock);
239862306a36Sopenharmony_ci
239962306a36Sopenharmony_ci	return err;
240062306a36Sopenharmony_ci}
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci/**
240362306a36Sopenharmony_ci * do_lock_mount - lock mount and mountpoint
240462306a36Sopenharmony_ci * @path:    target path
240562306a36Sopenharmony_ci * @beneath: whether the intention is to mount beneath @path
240662306a36Sopenharmony_ci *
240762306a36Sopenharmony_ci * Follow the mount stack on @path until the top mount @mnt is found. If
240862306a36Sopenharmony_ci * the initial @path->{mnt,dentry} is a mountpoint lookup the first
240962306a36Sopenharmony_ci * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root}
241062306a36Sopenharmony_ci * until nothing is stacked on top of it anymore.
241162306a36Sopenharmony_ci *
241262306a36Sopenharmony_ci * Acquire the inode_lock() on the top mount's ->mnt_root to protect
241362306a36Sopenharmony_ci * against concurrent removal of the new mountpoint from another mount
241462306a36Sopenharmony_ci * namespace.
241562306a36Sopenharmony_ci *
241662306a36Sopenharmony_ci * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint
241762306a36Sopenharmony_ci * @mp on @mnt->mnt_parent must be acquired. This protects against a
241862306a36Sopenharmony_ci * concurrent unlink of @mp->mnt_dentry from another mount namespace
241962306a36Sopenharmony_ci * where @mnt doesn't have a child mount mounted @mp. A concurrent
242062306a36Sopenharmony_ci * removal of @mnt->mnt_root doesn't matter as nothing will be mounted
242162306a36Sopenharmony_ci * on top of it for @beneath.
242262306a36Sopenharmony_ci *
242362306a36Sopenharmony_ci * In addition, @beneath needs to make sure that @mnt hasn't been
242462306a36Sopenharmony_ci * unmounted or moved from its current mountpoint in between dropping
242562306a36Sopenharmony_ci * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt
242662306a36Sopenharmony_ci * being unmounted would be detected later by e.g., calling
242762306a36Sopenharmony_ci * check_mnt(mnt) in the function it's called from. For the @beneath
242862306a36Sopenharmony_ci * case however, it's useful to detect it directly in do_lock_mount().
242962306a36Sopenharmony_ci * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points
243062306a36Sopenharmony_ci * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will
243162306a36Sopenharmony_ci * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL.
243262306a36Sopenharmony_ci *
243362306a36Sopenharmony_ci * Return: Either the target mountpoint on the top mount or the top
243462306a36Sopenharmony_ci *         mount's mountpoint.
243562306a36Sopenharmony_ci */
243662306a36Sopenharmony_cistatic struct mountpoint *do_lock_mount(struct path *path, bool beneath)
243762306a36Sopenharmony_ci{
243862306a36Sopenharmony_ci	struct vfsmount *mnt = path->mnt;
243962306a36Sopenharmony_ci	struct dentry *dentry;
244062306a36Sopenharmony_ci	struct mountpoint *mp = ERR_PTR(-ENOENT);
244162306a36Sopenharmony_ci
244262306a36Sopenharmony_ci	for (;;) {
244362306a36Sopenharmony_ci		struct mount *m;
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_ci		if (beneath) {
244662306a36Sopenharmony_ci			m = real_mount(mnt);
244762306a36Sopenharmony_ci			read_seqlock_excl(&mount_lock);
244862306a36Sopenharmony_ci			dentry = dget(m->mnt_mountpoint);
244962306a36Sopenharmony_ci			read_sequnlock_excl(&mount_lock);
245062306a36Sopenharmony_ci		} else {
245162306a36Sopenharmony_ci			dentry = path->dentry;
245262306a36Sopenharmony_ci		}
245362306a36Sopenharmony_ci
245462306a36Sopenharmony_ci		inode_lock(dentry->d_inode);
245562306a36Sopenharmony_ci		if (unlikely(cant_mount(dentry))) {
245662306a36Sopenharmony_ci			inode_unlock(dentry->d_inode);
245762306a36Sopenharmony_ci			goto out;
245862306a36Sopenharmony_ci		}
245962306a36Sopenharmony_ci
246062306a36Sopenharmony_ci		namespace_lock();
246162306a36Sopenharmony_ci
246262306a36Sopenharmony_ci		if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) {
246362306a36Sopenharmony_ci			namespace_unlock();
246462306a36Sopenharmony_ci			inode_unlock(dentry->d_inode);
246562306a36Sopenharmony_ci			goto out;
246662306a36Sopenharmony_ci		}
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci		mnt = lookup_mnt(path);
246962306a36Sopenharmony_ci		if (likely(!mnt))
247062306a36Sopenharmony_ci			break;
247162306a36Sopenharmony_ci
247262306a36Sopenharmony_ci		namespace_unlock();
247362306a36Sopenharmony_ci		inode_unlock(dentry->d_inode);
247462306a36Sopenharmony_ci		if (beneath)
247562306a36Sopenharmony_ci			dput(dentry);
247662306a36Sopenharmony_ci		path_put(path);
247762306a36Sopenharmony_ci		path->mnt = mnt;
247862306a36Sopenharmony_ci		path->dentry = dget(mnt->mnt_root);
247962306a36Sopenharmony_ci	}
248062306a36Sopenharmony_ci
248162306a36Sopenharmony_ci	mp = get_mountpoint(dentry);
248262306a36Sopenharmony_ci	if (IS_ERR(mp)) {
248362306a36Sopenharmony_ci		namespace_unlock();
248462306a36Sopenharmony_ci		inode_unlock(dentry->d_inode);
248562306a36Sopenharmony_ci	}
248662306a36Sopenharmony_ci
248762306a36Sopenharmony_ciout:
248862306a36Sopenharmony_ci	if (beneath)
248962306a36Sopenharmony_ci		dput(dentry);
249062306a36Sopenharmony_ci
249162306a36Sopenharmony_ci	return mp;
249262306a36Sopenharmony_ci}
249362306a36Sopenharmony_ci
249462306a36Sopenharmony_cistatic inline struct mountpoint *lock_mount(struct path *path)
249562306a36Sopenharmony_ci{
249662306a36Sopenharmony_ci	return do_lock_mount(path, false);
249762306a36Sopenharmony_ci}
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_cistatic void unlock_mount(struct mountpoint *where)
250062306a36Sopenharmony_ci{
250162306a36Sopenharmony_ci	struct dentry *dentry = where->m_dentry;
250262306a36Sopenharmony_ci
250362306a36Sopenharmony_ci	read_seqlock_excl(&mount_lock);
250462306a36Sopenharmony_ci	put_mountpoint(where);
250562306a36Sopenharmony_ci	read_sequnlock_excl(&mount_lock);
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci	namespace_unlock();
250862306a36Sopenharmony_ci	inode_unlock(dentry->d_inode);
250962306a36Sopenharmony_ci}
251062306a36Sopenharmony_ci
251162306a36Sopenharmony_cistatic int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp)
251262306a36Sopenharmony_ci{
251362306a36Sopenharmony_ci	if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER)
251462306a36Sopenharmony_ci		return -EINVAL;
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_ci	if (d_is_dir(mp->m_dentry) !=
251762306a36Sopenharmony_ci	      d_is_dir(mnt->mnt.mnt_root))
251862306a36Sopenharmony_ci		return -ENOTDIR;
251962306a36Sopenharmony_ci
252062306a36Sopenharmony_ci	return attach_recursive_mnt(mnt, p, mp, 0);
252162306a36Sopenharmony_ci}
252262306a36Sopenharmony_ci
252362306a36Sopenharmony_ci/*
252462306a36Sopenharmony_ci * Sanity check the flags to change_mnt_propagation.
252562306a36Sopenharmony_ci */
252662306a36Sopenharmony_ci
252762306a36Sopenharmony_cistatic int flags_to_propagation_type(int ms_flags)
252862306a36Sopenharmony_ci{
252962306a36Sopenharmony_ci	int type = ms_flags & ~(MS_REC | MS_SILENT);
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_ci	/* Fail if any non-propagation flags are set */
253262306a36Sopenharmony_ci	if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
253362306a36Sopenharmony_ci		return 0;
253462306a36Sopenharmony_ci	/* Only one propagation flag should be set */
253562306a36Sopenharmony_ci	if (!is_power_of_2(type))
253662306a36Sopenharmony_ci		return 0;
253762306a36Sopenharmony_ci	return type;
253862306a36Sopenharmony_ci}
253962306a36Sopenharmony_ci
254062306a36Sopenharmony_ci/*
254162306a36Sopenharmony_ci * recursively change the type of the mountpoint.
254262306a36Sopenharmony_ci */
254362306a36Sopenharmony_cistatic int do_change_type(struct path *path, int ms_flags)
254462306a36Sopenharmony_ci{
254562306a36Sopenharmony_ci	struct mount *m;
254662306a36Sopenharmony_ci	struct mount *mnt = real_mount(path->mnt);
254762306a36Sopenharmony_ci	int recurse = ms_flags & MS_REC;
254862306a36Sopenharmony_ci	int type;
254962306a36Sopenharmony_ci	int err = 0;
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci	if (!path_mounted(path))
255262306a36Sopenharmony_ci		return -EINVAL;
255362306a36Sopenharmony_ci
255462306a36Sopenharmony_ci	type = flags_to_propagation_type(ms_flags);
255562306a36Sopenharmony_ci	if (!type)
255662306a36Sopenharmony_ci		return -EINVAL;
255762306a36Sopenharmony_ci
255862306a36Sopenharmony_ci	namespace_lock();
255962306a36Sopenharmony_ci	if (type == MS_SHARED) {
256062306a36Sopenharmony_ci		err = invent_group_ids(mnt, recurse);
256162306a36Sopenharmony_ci		if (err)
256262306a36Sopenharmony_ci			goto out_unlock;
256362306a36Sopenharmony_ci	}
256462306a36Sopenharmony_ci
256562306a36Sopenharmony_ci	lock_mount_hash();
256662306a36Sopenharmony_ci	for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
256762306a36Sopenharmony_ci		change_mnt_propagation(m, type);
256862306a36Sopenharmony_ci	unlock_mount_hash();
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci out_unlock:
257162306a36Sopenharmony_ci	namespace_unlock();
257262306a36Sopenharmony_ci	return err;
257362306a36Sopenharmony_ci}
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_cistatic struct mount *__do_loopback(struct path *old_path, int recurse)
257662306a36Sopenharmony_ci{
257762306a36Sopenharmony_ci	struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_ci	if (IS_MNT_UNBINDABLE(old))
258062306a36Sopenharmony_ci		return mnt;
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci	if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
258362306a36Sopenharmony_ci		return mnt;
258462306a36Sopenharmony_ci
258562306a36Sopenharmony_ci	if (!recurse && has_locked_children(old, old_path->dentry))
258662306a36Sopenharmony_ci		return mnt;
258762306a36Sopenharmony_ci
258862306a36Sopenharmony_ci	if (recurse)
258962306a36Sopenharmony_ci		mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
259062306a36Sopenharmony_ci	else
259162306a36Sopenharmony_ci		mnt = clone_mnt(old, old_path->dentry, 0);
259262306a36Sopenharmony_ci
259362306a36Sopenharmony_ci	if (!IS_ERR(mnt))
259462306a36Sopenharmony_ci		mnt->mnt.mnt_flags &= ~MNT_LOCKED;
259562306a36Sopenharmony_ci
259662306a36Sopenharmony_ci	return mnt;
259762306a36Sopenharmony_ci}
259862306a36Sopenharmony_ci
259962306a36Sopenharmony_ci/*
260062306a36Sopenharmony_ci * do loopback mount.
260162306a36Sopenharmony_ci */
260262306a36Sopenharmony_cistatic int do_loopback(struct path *path, const char *old_name,
260362306a36Sopenharmony_ci				int recurse)
260462306a36Sopenharmony_ci{
260562306a36Sopenharmony_ci	struct path old_path;
260662306a36Sopenharmony_ci	struct mount *mnt = NULL, *parent;
260762306a36Sopenharmony_ci	struct mountpoint *mp;
260862306a36Sopenharmony_ci	int err;
260962306a36Sopenharmony_ci	if (!old_name || !*old_name)
261062306a36Sopenharmony_ci		return -EINVAL;
261162306a36Sopenharmony_ci	err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path);
261262306a36Sopenharmony_ci	if (err)
261362306a36Sopenharmony_ci		return err;
261462306a36Sopenharmony_ci
261562306a36Sopenharmony_ci	err = -EINVAL;
261662306a36Sopenharmony_ci	if (mnt_ns_loop(old_path.dentry))
261762306a36Sopenharmony_ci		goto out;
261862306a36Sopenharmony_ci
261962306a36Sopenharmony_ci	mp = lock_mount(path);
262062306a36Sopenharmony_ci	if (IS_ERR(mp)) {
262162306a36Sopenharmony_ci		err = PTR_ERR(mp);
262262306a36Sopenharmony_ci		goto out;
262362306a36Sopenharmony_ci	}
262462306a36Sopenharmony_ci
262562306a36Sopenharmony_ci	parent = real_mount(path->mnt);
262662306a36Sopenharmony_ci	if (!check_mnt(parent))
262762306a36Sopenharmony_ci		goto out2;
262862306a36Sopenharmony_ci
262962306a36Sopenharmony_ci	mnt = __do_loopback(&old_path, recurse);
263062306a36Sopenharmony_ci	if (IS_ERR(mnt)) {
263162306a36Sopenharmony_ci		err = PTR_ERR(mnt);
263262306a36Sopenharmony_ci		goto out2;
263362306a36Sopenharmony_ci	}
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci	err = graft_tree(mnt, parent, mp);
263662306a36Sopenharmony_ci	if (err) {
263762306a36Sopenharmony_ci		lock_mount_hash();
263862306a36Sopenharmony_ci		umount_tree(mnt, UMOUNT_SYNC);
263962306a36Sopenharmony_ci		unlock_mount_hash();
264062306a36Sopenharmony_ci	}
264162306a36Sopenharmony_ciout2:
264262306a36Sopenharmony_ci	unlock_mount(mp);
264362306a36Sopenharmony_ciout:
264462306a36Sopenharmony_ci	path_put(&old_path);
264562306a36Sopenharmony_ci	return err;
264662306a36Sopenharmony_ci}
264762306a36Sopenharmony_ci
264862306a36Sopenharmony_cistatic struct file *open_detached_copy(struct path *path, bool recursive)
264962306a36Sopenharmony_ci{
265062306a36Sopenharmony_ci	struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
265162306a36Sopenharmony_ci	struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
265262306a36Sopenharmony_ci	struct mount *mnt, *p;
265362306a36Sopenharmony_ci	struct file *file;
265462306a36Sopenharmony_ci
265562306a36Sopenharmony_ci	if (IS_ERR(ns))
265662306a36Sopenharmony_ci		return ERR_CAST(ns);
265762306a36Sopenharmony_ci
265862306a36Sopenharmony_ci	namespace_lock();
265962306a36Sopenharmony_ci	mnt = __do_loopback(path, recursive);
266062306a36Sopenharmony_ci	if (IS_ERR(mnt)) {
266162306a36Sopenharmony_ci		namespace_unlock();
266262306a36Sopenharmony_ci		free_mnt_ns(ns);
266362306a36Sopenharmony_ci		return ERR_CAST(mnt);
266462306a36Sopenharmony_ci	}
266562306a36Sopenharmony_ci
266662306a36Sopenharmony_ci	lock_mount_hash();
266762306a36Sopenharmony_ci	for (p = mnt; p; p = next_mnt(p, mnt)) {
266862306a36Sopenharmony_ci		p->mnt_ns = ns;
266962306a36Sopenharmony_ci		ns->mounts++;
267062306a36Sopenharmony_ci	}
267162306a36Sopenharmony_ci	ns->root = mnt;
267262306a36Sopenharmony_ci	list_add_tail(&ns->list, &mnt->mnt_list);
267362306a36Sopenharmony_ci	mntget(&mnt->mnt);
267462306a36Sopenharmony_ci	unlock_mount_hash();
267562306a36Sopenharmony_ci	namespace_unlock();
267662306a36Sopenharmony_ci
267762306a36Sopenharmony_ci	mntput(path->mnt);
267862306a36Sopenharmony_ci	path->mnt = &mnt->mnt;
267962306a36Sopenharmony_ci	file = dentry_open(path, O_PATH, current_cred());
268062306a36Sopenharmony_ci	if (IS_ERR(file))
268162306a36Sopenharmony_ci		dissolve_on_fput(path->mnt);
268262306a36Sopenharmony_ci	else
268362306a36Sopenharmony_ci		file->f_mode |= FMODE_NEED_UNMOUNT;
268462306a36Sopenharmony_ci	return file;
268562306a36Sopenharmony_ci}
268662306a36Sopenharmony_ci
268762306a36Sopenharmony_ciSYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
268862306a36Sopenharmony_ci{
268962306a36Sopenharmony_ci	struct file *file;
269062306a36Sopenharmony_ci	struct path path;
269162306a36Sopenharmony_ci	int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
269262306a36Sopenharmony_ci	bool detached = flags & OPEN_TREE_CLONE;
269362306a36Sopenharmony_ci	int error;
269462306a36Sopenharmony_ci	int fd;
269562306a36Sopenharmony_ci
269662306a36Sopenharmony_ci	BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
269762306a36Sopenharmony_ci
269862306a36Sopenharmony_ci	if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
269962306a36Sopenharmony_ci		      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
270062306a36Sopenharmony_ci		      OPEN_TREE_CLOEXEC))
270162306a36Sopenharmony_ci		return -EINVAL;
270262306a36Sopenharmony_ci
270362306a36Sopenharmony_ci	if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
270462306a36Sopenharmony_ci		return -EINVAL;
270562306a36Sopenharmony_ci
270662306a36Sopenharmony_ci	if (flags & AT_NO_AUTOMOUNT)
270762306a36Sopenharmony_ci		lookup_flags &= ~LOOKUP_AUTOMOUNT;
270862306a36Sopenharmony_ci	if (flags & AT_SYMLINK_NOFOLLOW)
270962306a36Sopenharmony_ci		lookup_flags &= ~LOOKUP_FOLLOW;
271062306a36Sopenharmony_ci	if (flags & AT_EMPTY_PATH)
271162306a36Sopenharmony_ci		lookup_flags |= LOOKUP_EMPTY;
271262306a36Sopenharmony_ci
271362306a36Sopenharmony_ci	if (detached && !may_mount())
271462306a36Sopenharmony_ci		return -EPERM;
271562306a36Sopenharmony_ci
271662306a36Sopenharmony_ci	fd = get_unused_fd_flags(flags & O_CLOEXEC);
271762306a36Sopenharmony_ci	if (fd < 0)
271862306a36Sopenharmony_ci		return fd;
271962306a36Sopenharmony_ci
272062306a36Sopenharmony_ci	error = user_path_at(dfd, filename, lookup_flags, &path);
272162306a36Sopenharmony_ci	if (unlikely(error)) {
272262306a36Sopenharmony_ci		file = ERR_PTR(error);
272362306a36Sopenharmony_ci	} else {
272462306a36Sopenharmony_ci		if (detached)
272562306a36Sopenharmony_ci			file = open_detached_copy(&path, flags & AT_RECURSIVE);
272662306a36Sopenharmony_ci		else
272762306a36Sopenharmony_ci			file = dentry_open(&path, O_PATH, current_cred());
272862306a36Sopenharmony_ci		path_put(&path);
272962306a36Sopenharmony_ci	}
273062306a36Sopenharmony_ci	if (IS_ERR(file)) {
273162306a36Sopenharmony_ci		put_unused_fd(fd);
273262306a36Sopenharmony_ci		return PTR_ERR(file);
273362306a36Sopenharmony_ci	}
273462306a36Sopenharmony_ci	fd_install(fd, file);
273562306a36Sopenharmony_ci	return fd;
273662306a36Sopenharmony_ci}
273762306a36Sopenharmony_ci
273862306a36Sopenharmony_ci/*
273962306a36Sopenharmony_ci * Don't allow locked mount flags to be cleared.
274062306a36Sopenharmony_ci *
274162306a36Sopenharmony_ci * No locks need to be held here while testing the various MNT_LOCK
274262306a36Sopenharmony_ci * flags because those flags can never be cleared once they are set.
274362306a36Sopenharmony_ci */
274462306a36Sopenharmony_cistatic bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags)
274562306a36Sopenharmony_ci{
274662306a36Sopenharmony_ci	unsigned int fl = mnt->mnt.mnt_flags;
274762306a36Sopenharmony_ci
274862306a36Sopenharmony_ci	if ((fl & MNT_LOCK_READONLY) &&
274962306a36Sopenharmony_ci	    !(mnt_flags & MNT_READONLY))
275062306a36Sopenharmony_ci		return false;
275162306a36Sopenharmony_ci
275262306a36Sopenharmony_ci	if ((fl & MNT_LOCK_NODEV) &&
275362306a36Sopenharmony_ci	    !(mnt_flags & MNT_NODEV))
275462306a36Sopenharmony_ci		return false;
275562306a36Sopenharmony_ci
275662306a36Sopenharmony_ci	if ((fl & MNT_LOCK_NOSUID) &&
275762306a36Sopenharmony_ci	    !(mnt_flags & MNT_NOSUID))
275862306a36Sopenharmony_ci		return false;
275962306a36Sopenharmony_ci
276062306a36Sopenharmony_ci	if ((fl & MNT_LOCK_NOEXEC) &&
276162306a36Sopenharmony_ci	    !(mnt_flags & MNT_NOEXEC))
276262306a36Sopenharmony_ci		return false;
276362306a36Sopenharmony_ci
276462306a36Sopenharmony_ci	if ((fl & MNT_LOCK_ATIME) &&
276562306a36Sopenharmony_ci	    ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK)))
276662306a36Sopenharmony_ci		return false;
276762306a36Sopenharmony_ci
276862306a36Sopenharmony_ci	return true;
276962306a36Sopenharmony_ci}
277062306a36Sopenharmony_ci
277162306a36Sopenharmony_cistatic int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
277262306a36Sopenharmony_ci{
277362306a36Sopenharmony_ci	bool readonly_request = (mnt_flags & MNT_READONLY);
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci	if (readonly_request == __mnt_is_readonly(&mnt->mnt))
277662306a36Sopenharmony_ci		return 0;
277762306a36Sopenharmony_ci
277862306a36Sopenharmony_ci	if (readonly_request)
277962306a36Sopenharmony_ci		return mnt_make_readonly(mnt);
278062306a36Sopenharmony_ci
278162306a36Sopenharmony_ci	mnt->mnt.mnt_flags &= ~MNT_READONLY;
278262306a36Sopenharmony_ci	return 0;
278362306a36Sopenharmony_ci}
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_cistatic void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
278662306a36Sopenharmony_ci{
278762306a36Sopenharmony_ci	mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
278862306a36Sopenharmony_ci	mnt->mnt.mnt_flags = mnt_flags;
278962306a36Sopenharmony_ci	touch_mnt_namespace(mnt->mnt_ns);
279062306a36Sopenharmony_ci}
279162306a36Sopenharmony_ci
279262306a36Sopenharmony_cistatic void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
279362306a36Sopenharmony_ci{
279462306a36Sopenharmony_ci	struct super_block *sb = mnt->mnt_sb;
279562306a36Sopenharmony_ci
279662306a36Sopenharmony_ci	if (!__mnt_is_readonly(mnt) &&
279762306a36Sopenharmony_ci	   (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
279862306a36Sopenharmony_ci	   (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
279962306a36Sopenharmony_ci		char *buf = (char *)__get_free_page(GFP_KERNEL);
280062306a36Sopenharmony_ci		char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
280162306a36Sopenharmony_ci
280262306a36Sopenharmony_ci		pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n",
280362306a36Sopenharmony_ci			sb->s_type->name,
280462306a36Sopenharmony_ci			is_mounted(mnt) ? "remounted" : "mounted",
280562306a36Sopenharmony_ci			mntpath, &sb->s_time_max,
280662306a36Sopenharmony_ci			(unsigned long long)sb->s_time_max);
280762306a36Sopenharmony_ci
280862306a36Sopenharmony_ci		free_page((unsigned long)buf);
280962306a36Sopenharmony_ci		sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
281062306a36Sopenharmony_ci	}
281162306a36Sopenharmony_ci}
281262306a36Sopenharmony_ci
281362306a36Sopenharmony_ci/*
281462306a36Sopenharmony_ci * Handle reconfiguration of the mountpoint only without alteration of the
281562306a36Sopenharmony_ci * superblock it refers to.  This is triggered by specifying MS_REMOUNT|MS_BIND
281662306a36Sopenharmony_ci * to mount(2).
281762306a36Sopenharmony_ci */
281862306a36Sopenharmony_cistatic int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
281962306a36Sopenharmony_ci{
282062306a36Sopenharmony_ci	struct super_block *sb = path->mnt->mnt_sb;
282162306a36Sopenharmony_ci	struct mount *mnt = real_mount(path->mnt);
282262306a36Sopenharmony_ci	int ret;
282362306a36Sopenharmony_ci
282462306a36Sopenharmony_ci	if (!check_mnt(mnt))
282562306a36Sopenharmony_ci		return -EINVAL;
282662306a36Sopenharmony_ci
282762306a36Sopenharmony_ci	if (!path_mounted(path))
282862306a36Sopenharmony_ci		return -EINVAL;
282962306a36Sopenharmony_ci
283062306a36Sopenharmony_ci	if (!can_change_locked_flags(mnt, mnt_flags))
283162306a36Sopenharmony_ci		return -EPERM;
283262306a36Sopenharmony_ci
283362306a36Sopenharmony_ci	/*
283462306a36Sopenharmony_ci	 * We're only checking whether the superblock is read-only not
283562306a36Sopenharmony_ci	 * changing it, so only take down_read(&sb->s_umount).
283662306a36Sopenharmony_ci	 */
283762306a36Sopenharmony_ci	down_read(&sb->s_umount);
283862306a36Sopenharmony_ci	lock_mount_hash();
283962306a36Sopenharmony_ci	ret = change_mount_ro_state(mnt, mnt_flags);
284062306a36Sopenharmony_ci	if (ret == 0)
284162306a36Sopenharmony_ci		set_mount_attributes(mnt, mnt_flags);
284262306a36Sopenharmony_ci	unlock_mount_hash();
284362306a36Sopenharmony_ci	up_read(&sb->s_umount);
284462306a36Sopenharmony_ci
284562306a36Sopenharmony_ci	mnt_warn_timestamp_expiry(path, &mnt->mnt);
284662306a36Sopenharmony_ci
284762306a36Sopenharmony_ci	return ret;
284862306a36Sopenharmony_ci}
284962306a36Sopenharmony_ci
285062306a36Sopenharmony_ci/*
285162306a36Sopenharmony_ci * change filesystem flags. dir should be a physical root of filesystem.
285262306a36Sopenharmony_ci * If you've mounted a non-root directory somewhere and want to do remount
285362306a36Sopenharmony_ci * on it - tough luck.
285462306a36Sopenharmony_ci */
285562306a36Sopenharmony_cistatic int do_remount(struct path *path, int ms_flags, int sb_flags,
285662306a36Sopenharmony_ci		      int mnt_flags, void *data)
285762306a36Sopenharmony_ci{
285862306a36Sopenharmony_ci	int err;
285962306a36Sopenharmony_ci	struct super_block *sb = path->mnt->mnt_sb;
286062306a36Sopenharmony_ci	struct mount *mnt = real_mount(path->mnt);
286162306a36Sopenharmony_ci	struct fs_context *fc;
286262306a36Sopenharmony_ci
286362306a36Sopenharmony_ci	if (!check_mnt(mnt))
286462306a36Sopenharmony_ci		return -EINVAL;
286562306a36Sopenharmony_ci
286662306a36Sopenharmony_ci	if (!path_mounted(path))
286762306a36Sopenharmony_ci		return -EINVAL;
286862306a36Sopenharmony_ci
286962306a36Sopenharmony_ci	if (!can_change_locked_flags(mnt, mnt_flags))
287062306a36Sopenharmony_ci		return -EPERM;
287162306a36Sopenharmony_ci
287262306a36Sopenharmony_ci	fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK);
287362306a36Sopenharmony_ci	if (IS_ERR(fc))
287462306a36Sopenharmony_ci		return PTR_ERR(fc);
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci	/*
287762306a36Sopenharmony_ci	 * Indicate to the filesystem that the remount request is coming
287862306a36Sopenharmony_ci	 * from the legacy mount system call.
287962306a36Sopenharmony_ci	 */
288062306a36Sopenharmony_ci	fc->oldapi = true;
288162306a36Sopenharmony_ci
288262306a36Sopenharmony_ci	err = parse_monolithic_mount_data(fc, data);
288362306a36Sopenharmony_ci	if (!err) {
288462306a36Sopenharmony_ci		down_write(&sb->s_umount);
288562306a36Sopenharmony_ci		err = -EPERM;
288662306a36Sopenharmony_ci		if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
288762306a36Sopenharmony_ci			err = reconfigure_super(fc);
288862306a36Sopenharmony_ci			if (!err) {
288962306a36Sopenharmony_ci				lock_mount_hash();
289062306a36Sopenharmony_ci				set_mount_attributes(mnt, mnt_flags);
289162306a36Sopenharmony_ci				unlock_mount_hash();
289262306a36Sopenharmony_ci			}
289362306a36Sopenharmony_ci		}
289462306a36Sopenharmony_ci		up_write(&sb->s_umount);
289562306a36Sopenharmony_ci	}
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_ci	mnt_warn_timestamp_expiry(path, &mnt->mnt);
289862306a36Sopenharmony_ci
289962306a36Sopenharmony_ci	put_fs_context(fc);
290062306a36Sopenharmony_ci	return err;
290162306a36Sopenharmony_ci}
290262306a36Sopenharmony_ci
290362306a36Sopenharmony_cistatic inline int tree_contains_unbindable(struct mount *mnt)
290462306a36Sopenharmony_ci{
290562306a36Sopenharmony_ci	struct mount *p;
290662306a36Sopenharmony_ci	for (p = mnt; p; p = next_mnt(p, mnt)) {
290762306a36Sopenharmony_ci		if (IS_MNT_UNBINDABLE(p))
290862306a36Sopenharmony_ci			return 1;
290962306a36Sopenharmony_ci	}
291062306a36Sopenharmony_ci	return 0;
291162306a36Sopenharmony_ci}
291262306a36Sopenharmony_ci
291362306a36Sopenharmony_ci/*
291462306a36Sopenharmony_ci * Check that there aren't references to earlier/same mount namespaces in the
291562306a36Sopenharmony_ci * specified subtree.  Such references can act as pins for mount namespaces
291662306a36Sopenharmony_ci * that aren't checked by the mount-cycle checking code, thereby allowing
291762306a36Sopenharmony_ci * cycles to be made.
291862306a36Sopenharmony_ci */
291962306a36Sopenharmony_cistatic bool check_for_nsfs_mounts(struct mount *subtree)
292062306a36Sopenharmony_ci{
292162306a36Sopenharmony_ci	struct mount *p;
292262306a36Sopenharmony_ci	bool ret = false;
292362306a36Sopenharmony_ci
292462306a36Sopenharmony_ci	lock_mount_hash();
292562306a36Sopenharmony_ci	for (p = subtree; p; p = next_mnt(p, subtree))
292662306a36Sopenharmony_ci		if (mnt_ns_loop(p->mnt.mnt_root))
292762306a36Sopenharmony_ci			goto out;
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci	ret = true;
293062306a36Sopenharmony_ciout:
293162306a36Sopenharmony_ci	unlock_mount_hash();
293262306a36Sopenharmony_ci	return ret;
293362306a36Sopenharmony_ci}
293462306a36Sopenharmony_ci
293562306a36Sopenharmony_cistatic int do_set_group(struct path *from_path, struct path *to_path)
293662306a36Sopenharmony_ci{
293762306a36Sopenharmony_ci	struct mount *from, *to;
293862306a36Sopenharmony_ci	int err;
293962306a36Sopenharmony_ci
294062306a36Sopenharmony_ci	from = real_mount(from_path->mnt);
294162306a36Sopenharmony_ci	to = real_mount(to_path->mnt);
294262306a36Sopenharmony_ci
294362306a36Sopenharmony_ci	namespace_lock();
294462306a36Sopenharmony_ci
294562306a36Sopenharmony_ci	err = -EINVAL;
294662306a36Sopenharmony_ci	/* To and From must be mounted */
294762306a36Sopenharmony_ci	if (!is_mounted(&from->mnt))
294862306a36Sopenharmony_ci		goto out;
294962306a36Sopenharmony_ci	if (!is_mounted(&to->mnt))
295062306a36Sopenharmony_ci		goto out;
295162306a36Sopenharmony_ci
295262306a36Sopenharmony_ci	err = -EPERM;
295362306a36Sopenharmony_ci	/* We should be allowed to modify mount namespaces of both mounts */
295462306a36Sopenharmony_ci	if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
295562306a36Sopenharmony_ci		goto out;
295662306a36Sopenharmony_ci	if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
295762306a36Sopenharmony_ci		goto out;
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci	err = -EINVAL;
296062306a36Sopenharmony_ci	/* To and From paths should be mount roots */
296162306a36Sopenharmony_ci	if (!path_mounted(from_path))
296262306a36Sopenharmony_ci		goto out;
296362306a36Sopenharmony_ci	if (!path_mounted(to_path))
296462306a36Sopenharmony_ci		goto out;
296562306a36Sopenharmony_ci
296662306a36Sopenharmony_ci	/* Setting sharing groups is only allowed across same superblock */
296762306a36Sopenharmony_ci	if (from->mnt.mnt_sb != to->mnt.mnt_sb)
296862306a36Sopenharmony_ci		goto out;
296962306a36Sopenharmony_ci
297062306a36Sopenharmony_ci	/* From mount root should be wider than To mount root */
297162306a36Sopenharmony_ci	if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
297262306a36Sopenharmony_ci		goto out;
297362306a36Sopenharmony_ci
297462306a36Sopenharmony_ci	/* From mount should not have locked children in place of To's root */
297562306a36Sopenharmony_ci	if (has_locked_children(from, to->mnt.mnt_root))
297662306a36Sopenharmony_ci		goto out;
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci	/* Setting sharing groups is only allowed on private mounts */
297962306a36Sopenharmony_ci	if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
298062306a36Sopenharmony_ci		goto out;
298162306a36Sopenharmony_ci
298262306a36Sopenharmony_ci	/* From should not be private */
298362306a36Sopenharmony_ci	if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
298462306a36Sopenharmony_ci		goto out;
298562306a36Sopenharmony_ci
298662306a36Sopenharmony_ci	if (IS_MNT_SLAVE(from)) {
298762306a36Sopenharmony_ci		struct mount *m = from->mnt_master;
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci		list_add(&to->mnt_slave, &m->mnt_slave_list);
299062306a36Sopenharmony_ci		to->mnt_master = m;
299162306a36Sopenharmony_ci	}
299262306a36Sopenharmony_ci
299362306a36Sopenharmony_ci	if (IS_MNT_SHARED(from)) {
299462306a36Sopenharmony_ci		to->mnt_group_id = from->mnt_group_id;
299562306a36Sopenharmony_ci		list_add(&to->mnt_share, &from->mnt_share);
299662306a36Sopenharmony_ci		lock_mount_hash();
299762306a36Sopenharmony_ci		set_mnt_shared(to);
299862306a36Sopenharmony_ci		unlock_mount_hash();
299962306a36Sopenharmony_ci	}
300062306a36Sopenharmony_ci
300162306a36Sopenharmony_ci	err = 0;
300262306a36Sopenharmony_ciout:
300362306a36Sopenharmony_ci	namespace_unlock();
300462306a36Sopenharmony_ci	return err;
300562306a36Sopenharmony_ci}
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci/**
300862306a36Sopenharmony_ci * path_overmounted - check if path is overmounted
300962306a36Sopenharmony_ci * @path: path to check
301062306a36Sopenharmony_ci *
301162306a36Sopenharmony_ci * Check if path is overmounted, i.e., if there's a mount on top of
301262306a36Sopenharmony_ci * @path->mnt with @path->dentry as mountpoint.
301362306a36Sopenharmony_ci *
301462306a36Sopenharmony_ci * Context: This function expects namespace_lock() to be held.
301562306a36Sopenharmony_ci * Return: If path is overmounted true is returned, false if not.
301662306a36Sopenharmony_ci */
301762306a36Sopenharmony_cistatic inline bool path_overmounted(const struct path *path)
301862306a36Sopenharmony_ci{
301962306a36Sopenharmony_ci	rcu_read_lock();
302062306a36Sopenharmony_ci	if (unlikely(__lookup_mnt(path->mnt, path->dentry))) {
302162306a36Sopenharmony_ci		rcu_read_unlock();
302262306a36Sopenharmony_ci		return true;
302362306a36Sopenharmony_ci	}
302462306a36Sopenharmony_ci	rcu_read_unlock();
302562306a36Sopenharmony_ci	return false;
302662306a36Sopenharmony_ci}
302762306a36Sopenharmony_ci
302862306a36Sopenharmony_ci/**
302962306a36Sopenharmony_ci * can_move_mount_beneath - check that we can mount beneath the top mount
303062306a36Sopenharmony_ci * @from: mount to mount beneath
303162306a36Sopenharmony_ci * @to:   mount under which to mount
303262306a36Sopenharmony_ci *
303362306a36Sopenharmony_ci * - Make sure that @to->dentry is actually the root of a mount under
303462306a36Sopenharmony_ci *   which we can mount another mount.
303562306a36Sopenharmony_ci * - Make sure that nothing can be mounted beneath the caller's current
303662306a36Sopenharmony_ci *   root or the rootfs of the namespace.
303762306a36Sopenharmony_ci * - Make sure that the caller can unmount the topmost mount ensuring
303862306a36Sopenharmony_ci *   that the caller could reveal the underlying mountpoint.
303962306a36Sopenharmony_ci * - Ensure that nothing has been mounted on top of @from before we
304062306a36Sopenharmony_ci *   grabbed @namespace_sem to avoid creating pointless shadow mounts.
304162306a36Sopenharmony_ci * - Prevent mounting beneath a mount if the propagation relationship
304262306a36Sopenharmony_ci *   between the source mount, parent mount, and top mount would lead to
304362306a36Sopenharmony_ci *   nonsensical mount trees.
304462306a36Sopenharmony_ci *
304562306a36Sopenharmony_ci * Context: This function expects namespace_lock() to be held.
304662306a36Sopenharmony_ci * Return: On success 0, and on error a negative error code is returned.
304762306a36Sopenharmony_ci */
304862306a36Sopenharmony_cistatic int can_move_mount_beneath(const struct path *from,
304962306a36Sopenharmony_ci				  const struct path *to,
305062306a36Sopenharmony_ci				  const struct mountpoint *mp)
305162306a36Sopenharmony_ci{
305262306a36Sopenharmony_ci	struct mount *mnt_from = real_mount(from->mnt),
305362306a36Sopenharmony_ci		     *mnt_to = real_mount(to->mnt),
305462306a36Sopenharmony_ci		     *parent_mnt_to = mnt_to->mnt_parent;
305562306a36Sopenharmony_ci
305662306a36Sopenharmony_ci	if (!mnt_has_parent(mnt_to))
305762306a36Sopenharmony_ci		return -EINVAL;
305862306a36Sopenharmony_ci
305962306a36Sopenharmony_ci	if (!path_mounted(to))
306062306a36Sopenharmony_ci		return -EINVAL;
306162306a36Sopenharmony_ci
306262306a36Sopenharmony_ci	if (IS_MNT_LOCKED(mnt_to))
306362306a36Sopenharmony_ci		return -EINVAL;
306462306a36Sopenharmony_ci
306562306a36Sopenharmony_ci	/* Avoid creating shadow mounts during mount propagation. */
306662306a36Sopenharmony_ci	if (path_overmounted(from))
306762306a36Sopenharmony_ci		return -EINVAL;
306862306a36Sopenharmony_ci
306962306a36Sopenharmony_ci	/*
307062306a36Sopenharmony_ci	 * Mounting beneath the rootfs only makes sense when the
307162306a36Sopenharmony_ci	 * semantics of pivot_root(".", ".") are used.
307262306a36Sopenharmony_ci	 */
307362306a36Sopenharmony_ci	if (&mnt_to->mnt == current->fs->root.mnt)
307462306a36Sopenharmony_ci		return -EINVAL;
307562306a36Sopenharmony_ci	if (parent_mnt_to == current->nsproxy->mnt_ns->root)
307662306a36Sopenharmony_ci		return -EINVAL;
307762306a36Sopenharmony_ci
307862306a36Sopenharmony_ci	for (struct mount *p = mnt_from; mnt_has_parent(p); p = p->mnt_parent)
307962306a36Sopenharmony_ci		if (p == mnt_to)
308062306a36Sopenharmony_ci			return -EINVAL;
308162306a36Sopenharmony_ci
308262306a36Sopenharmony_ci	/*
308362306a36Sopenharmony_ci	 * If the parent mount propagates to the child mount this would
308462306a36Sopenharmony_ci	 * mean mounting @mnt_from on @mnt_to->mnt_parent and then
308562306a36Sopenharmony_ci	 * propagating a copy @c of @mnt_from on top of @mnt_to. This
308662306a36Sopenharmony_ci	 * defeats the whole purpose of mounting beneath another mount.
308762306a36Sopenharmony_ci	 */
308862306a36Sopenharmony_ci	if (propagation_would_overmount(parent_mnt_to, mnt_to, mp))
308962306a36Sopenharmony_ci		return -EINVAL;
309062306a36Sopenharmony_ci
309162306a36Sopenharmony_ci	/*
309262306a36Sopenharmony_ci	 * If @mnt_to->mnt_parent propagates to @mnt_from this would
309362306a36Sopenharmony_ci	 * mean propagating a copy @c of @mnt_from on top of @mnt_from.
309462306a36Sopenharmony_ci	 * Afterwards @mnt_from would be mounted on top of
309562306a36Sopenharmony_ci	 * @mnt_to->mnt_parent and @mnt_to would be unmounted from
309662306a36Sopenharmony_ci	 * @mnt->mnt_parent and remounted on @mnt_from. But since @c is
309762306a36Sopenharmony_ci	 * already mounted on @mnt_from, @mnt_to would ultimately be
309862306a36Sopenharmony_ci	 * remounted on top of @c. Afterwards, @mnt_from would be
309962306a36Sopenharmony_ci	 * covered by a copy @c of @mnt_from and @c would be covered by
310062306a36Sopenharmony_ci	 * @mnt_from itself. This defeats the whole purpose of mounting
310162306a36Sopenharmony_ci	 * @mnt_from beneath @mnt_to.
310262306a36Sopenharmony_ci	 */
310362306a36Sopenharmony_ci	if (propagation_would_overmount(parent_mnt_to, mnt_from, mp))
310462306a36Sopenharmony_ci		return -EINVAL;
310562306a36Sopenharmony_ci
310662306a36Sopenharmony_ci	return 0;
310762306a36Sopenharmony_ci}
310862306a36Sopenharmony_ci
310962306a36Sopenharmony_cistatic int do_move_mount(struct path *old_path, struct path *new_path,
311062306a36Sopenharmony_ci			 bool beneath)
311162306a36Sopenharmony_ci{
311262306a36Sopenharmony_ci	struct mnt_namespace *ns;
311362306a36Sopenharmony_ci	struct mount *p;
311462306a36Sopenharmony_ci	struct mount *old;
311562306a36Sopenharmony_ci	struct mount *parent;
311662306a36Sopenharmony_ci	struct mountpoint *mp, *old_mp;
311762306a36Sopenharmony_ci	int err;
311862306a36Sopenharmony_ci	bool attached;
311962306a36Sopenharmony_ci	enum mnt_tree_flags_t flags = 0;
312062306a36Sopenharmony_ci
312162306a36Sopenharmony_ci	mp = do_lock_mount(new_path, beneath);
312262306a36Sopenharmony_ci	if (IS_ERR(mp))
312362306a36Sopenharmony_ci		return PTR_ERR(mp);
312462306a36Sopenharmony_ci
312562306a36Sopenharmony_ci	old = real_mount(old_path->mnt);
312662306a36Sopenharmony_ci	p = real_mount(new_path->mnt);
312762306a36Sopenharmony_ci	parent = old->mnt_parent;
312862306a36Sopenharmony_ci	attached = mnt_has_parent(old);
312962306a36Sopenharmony_ci	if (attached)
313062306a36Sopenharmony_ci		flags |= MNT_TREE_MOVE;
313162306a36Sopenharmony_ci	old_mp = old->mnt_mp;
313262306a36Sopenharmony_ci	ns = old->mnt_ns;
313362306a36Sopenharmony_ci
313462306a36Sopenharmony_ci	err = -EINVAL;
313562306a36Sopenharmony_ci	/* The mountpoint must be in our namespace. */
313662306a36Sopenharmony_ci	if (!check_mnt(p))
313762306a36Sopenharmony_ci		goto out;
313862306a36Sopenharmony_ci
313962306a36Sopenharmony_ci	/* The thing moved must be mounted... */
314062306a36Sopenharmony_ci	if (!is_mounted(&old->mnt))
314162306a36Sopenharmony_ci		goto out;
314262306a36Sopenharmony_ci
314362306a36Sopenharmony_ci	/* ... and either ours or the root of anon namespace */
314462306a36Sopenharmony_ci	if (!(attached ? check_mnt(old) : is_anon_ns(ns)))
314562306a36Sopenharmony_ci		goto out;
314662306a36Sopenharmony_ci
314762306a36Sopenharmony_ci	if (old->mnt.mnt_flags & MNT_LOCKED)
314862306a36Sopenharmony_ci		goto out;
314962306a36Sopenharmony_ci
315062306a36Sopenharmony_ci	if (!path_mounted(old_path))
315162306a36Sopenharmony_ci		goto out;
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_ci	if (d_is_dir(new_path->dentry) !=
315462306a36Sopenharmony_ci	    d_is_dir(old_path->dentry))
315562306a36Sopenharmony_ci		goto out;
315662306a36Sopenharmony_ci	/*
315762306a36Sopenharmony_ci	 * Don't move a mount residing in a shared parent.
315862306a36Sopenharmony_ci	 */
315962306a36Sopenharmony_ci	if (attached && IS_MNT_SHARED(parent))
316062306a36Sopenharmony_ci		goto out;
316162306a36Sopenharmony_ci
316262306a36Sopenharmony_ci	if (beneath) {
316362306a36Sopenharmony_ci		err = can_move_mount_beneath(old_path, new_path, mp);
316462306a36Sopenharmony_ci		if (err)
316562306a36Sopenharmony_ci			goto out;
316662306a36Sopenharmony_ci
316762306a36Sopenharmony_ci		err = -EINVAL;
316862306a36Sopenharmony_ci		p = p->mnt_parent;
316962306a36Sopenharmony_ci		flags |= MNT_TREE_BENEATH;
317062306a36Sopenharmony_ci	}
317162306a36Sopenharmony_ci
317262306a36Sopenharmony_ci	/*
317362306a36Sopenharmony_ci	 * Don't move a mount tree containing unbindable mounts to a destination
317462306a36Sopenharmony_ci	 * mount which is shared.
317562306a36Sopenharmony_ci	 */
317662306a36Sopenharmony_ci	if (IS_MNT_SHARED(p) && tree_contains_unbindable(old))
317762306a36Sopenharmony_ci		goto out;
317862306a36Sopenharmony_ci	err = -ELOOP;
317962306a36Sopenharmony_ci	if (!check_for_nsfs_mounts(old))
318062306a36Sopenharmony_ci		goto out;
318162306a36Sopenharmony_ci	for (; mnt_has_parent(p); p = p->mnt_parent)
318262306a36Sopenharmony_ci		if (p == old)
318362306a36Sopenharmony_ci			goto out;
318462306a36Sopenharmony_ci
318562306a36Sopenharmony_ci	err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, flags);
318662306a36Sopenharmony_ci	if (err)
318762306a36Sopenharmony_ci		goto out;
318862306a36Sopenharmony_ci
318962306a36Sopenharmony_ci	/* if the mount is moved, it should no longer be expire
319062306a36Sopenharmony_ci	 * automatically */
319162306a36Sopenharmony_ci	list_del_init(&old->mnt_expire);
319262306a36Sopenharmony_ci	if (attached)
319362306a36Sopenharmony_ci		put_mountpoint(old_mp);
319462306a36Sopenharmony_ciout:
319562306a36Sopenharmony_ci	unlock_mount(mp);
319662306a36Sopenharmony_ci	if (!err) {
319762306a36Sopenharmony_ci		if (attached)
319862306a36Sopenharmony_ci			mntput_no_expire(parent);
319962306a36Sopenharmony_ci		else
320062306a36Sopenharmony_ci			free_mnt_ns(ns);
320162306a36Sopenharmony_ci	}
320262306a36Sopenharmony_ci	return err;
320362306a36Sopenharmony_ci}
320462306a36Sopenharmony_ci
320562306a36Sopenharmony_cistatic int do_move_mount_old(struct path *path, const char *old_name)
320662306a36Sopenharmony_ci{
320762306a36Sopenharmony_ci	struct path old_path;
320862306a36Sopenharmony_ci	int err;
320962306a36Sopenharmony_ci
321062306a36Sopenharmony_ci	if (!old_name || !*old_name)
321162306a36Sopenharmony_ci		return -EINVAL;
321262306a36Sopenharmony_ci
321362306a36Sopenharmony_ci	err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
321462306a36Sopenharmony_ci	if (err)
321562306a36Sopenharmony_ci		return err;
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci	err = do_move_mount(&old_path, path, false);
321862306a36Sopenharmony_ci	path_put(&old_path);
321962306a36Sopenharmony_ci	return err;
322062306a36Sopenharmony_ci}
322162306a36Sopenharmony_ci
322262306a36Sopenharmony_ci/*
322362306a36Sopenharmony_ci * add a mount into a namespace's mount tree
322462306a36Sopenharmony_ci */
322562306a36Sopenharmony_cistatic int do_add_mount(struct mount *newmnt, struct mountpoint *mp,
322662306a36Sopenharmony_ci			const struct path *path, int mnt_flags)
322762306a36Sopenharmony_ci{
322862306a36Sopenharmony_ci	struct mount *parent = real_mount(path->mnt);
322962306a36Sopenharmony_ci
323062306a36Sopenharmony_ci	mnt_flags &= ~MNT_INTERNAL_FLAGS;
323162306a36Sopenharmony_ci
323262306a36Sopenharmony_ci	if (unlikely(!check_mnt(parent))) {
323362306a36Sopenharmony_ci		/* that's acceptable only for automounts done in private ns */
323462306a36Sopenharmony_ci		if (!(mnt_flags & MNT_SHRINKABLE))
323562306a36Sopenharmony_ci			return -EINVAL;
323662306a36Sopenharmony_ci		/* ... and for those we'd better have mountpoint still alive */
323762306a36Sopenharmony_ci		if (!parent->mnt_ns)
323862306a36Sopenharmony_ci			return -EINVAL;
323962306a36Sopenharmony_ci	}
324062306a36Sopenharmony_ci
324162306a36Sopenharmony_ci	/* Refuse the same filesystem on the same mount point */
324262306a36Sopenharmony_ci	if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path))
324362306a36Sopenharmony_ci		return -EBUSY;
324462306a36Sopenharmony_ci
324562306a36Sopenharmony_ci	if (d_is_symlink(newmnt->mnt.mnt_root))
324662306a36Sopenharmony_ci		return -EINVAL;
324762306a36Sopenharmony_ci
324862306a36Sopenharmony_ci	newmnt->mnt.mnt_flags = mnt_flags;
324962306a36Sopenharmony_ci	return graft_tree(newmnt, parent, mp);
325062306a36Sopenharmony_ci}
325162306a36Sopenharmony_ci
325262306a36Sopenharmony_cistatic bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags);
325362306a36Sopenharmony_ci
325462306a36Sopenharmony_ci/*
325562306a36Sopenharmony_ci * Create a new mount using a superblock configuration and request it
325662306a36Sopenharmony_ci * be added to the namespace tree.
325762306a36Sopenharmony_ci */
325862306a36Sopenharmony_cistatic int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint,
325962306a36Sopenharmony_ci			   unsigned int mnt_flags)
326062306a36Sopenharmony_ci{
326162306a36Sopenharmony_ci	struct vfsmount *mnt;
326262306a36Sopenharmony_ci	struct mountpoint *mp;
326362306a36Sopenharmony_ci	struct super_block *sb = fc->root->d_sb;
326462306a36Sopenharmony_ci	int error;
326562306a36Sopenharmony_ci
326662306a36Sopenharmony_ci	error = security_sb_kern_mount(sb);
326762306a36Sopenharmony_ci	if (!error && mount_too_revealing(sb, &mnt_flags))
326862306a36Sopenharmony_ci		error = -EPERM;
326962306a36Sopenharmony_ci
327062306a36Sopenharmony_ci	if (unlikely(error)) {
327162306a36Sopenharmony_ci		fc_drop_locked(fc);
327262306a36Sopenharmony_ci		return error;
327362306a36Sopenharmony_ci	}
327462306a36Sopenharmony_ci
327562306a36Sopenharmony_ci	up_write(&sb->s_umount);
327662306a36Sopenharmony_ci
327762306a36Sopenharmony_ci	mnt = vfs_create_mount(fc);
327862306a36Sopenharmony_ci	if (IS_ERR(mnt))
327962306a36Sopenharmony_ci		return PTR_ERR(mnt);
328062306a36Sopenharmony_ci
328162306a36Sopenharmony_ci	mnt_warn_timestamp_expiry(mountpoint, mnt);
328262306a36Sopenharmony_ci
328362306a36Sopenharmony_ci	mp = lock_mount(mountpoint);
328462306a36Sopenharmony_ci	if (IS_ERR(mp)) {
328562306a36Sopenharmony_ci		mntput(mnt);
328662306a36Sopenharmony_ci		return PTR_ERR(mp);
328762306a36Sopenharmony_ci	}
328862306a36Sopenharmony_ci	error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags);
328962306a36Sopenharmony_ci	unlock_mount(mp);
329062306a36Sopenharmony_ci	if (error < 0)
329162306a36Sopenharmony_ci		mntput(mnt);
329262306a36Sopenharmony_ci	return error;
329362306a36Sopenharmony_ci}
329462306a36Sopenharmony_ci
329562306a36Sopenharmony_ci/*
329662306a36Sopenharmony_ci * create a new mount for userspace and request it to be added into the
329762306a36Sopenharmony_ci * namespace's tree
329862306a36Sopenharmony_ci */
329962306a36Sopenharmony_cistatic int do_new_mount(struct path *path, const char *fstype, int sb_flags,
330062306a36Sopenharmony_ci			int mnt_flags, const char *name, void *data)
330162306a36Sopenharmony_ci{
330262306a36Sopenharmony_ci	struct file_system_type *type;
330362306a36Sopenharmony_ci	struct fs_context *fc;
330462306a36Sopenharmony_ci	const char *subtype = NULL;
330562306a36Sopenharmony_ci	int err = 0;
330662306a36Sopenharmony_ci
330762306a36Sopenharmony_ci	if (!fstype)
330862306a36Sopenharmony_ci		return -EINVAL;
330962306a36Sopenharmony_ci
331062306a36Sopenharmony_ci	type = get_fs_type(fstype);
331162306a36Sopenharmony_ci	if (!type)
331262306a36Sopenharmony_ci		return -ENODEV;
331362306a36Sopenharmony_ci
331462306a36Sopenharmony_ci	if (type->fs_flags & FS_HAS_SUBTYPE) {
331562306a36Sopenharmony_ci		subtype = strchr(fstype, '.');
331662306a36Sopenharmony_ci		if (subtype) {
331762306a36Sopenharmony_ci			subtype++;
331862306a36Sopenharmony_ci			if (!*subtype) {
331962306a36Sopenharmony_ci				put_filesystem(type);
332062306a36Sopenharmony_ci				return -EINVAL;
332162306a36Sopenharmony_ci			}
332262306a36Sopenharmony_ci		}
332362306a36Sopenharmony_ci	}
332462306a36Sopenharmony_ci
332562306a36Sopenharmony_ci	fc = fs_context_for_mount(type, sb_flags);
332662306a36Sopenharmony_ci	put_filesystem(type);
332762306a36Sopenharmony_ci	if (IS_ERR(fc))
332862306a36Sopenharmony_ci		return PTR_ERR(fc);
332962306a36Sopenharmony_ci
333062306a36Sopenharmony_ci	/*
333162306a36Sopenharmony_ci	 * Indicate to the filesystem that the mount request is coming
333262306a36Sopenharmony_ci	 * from the legacy mount system call.
333362306a36Sopenharmony_ci	 */
333462306a36Sopenharmony_ci	fc->oldapi = true;
333562306a36Sopenharmony_ci
333662306a36Sopenharmony_ci	if (subtype)
333762306a36Sopenharmony_ci		err = vfs_parse_fs_string(fc, "subtype",
333862306a36Sopenharmony_ci					  subtype, strlen(subtype));
333962306a36Sopenharmony_ci	if (!err && name)
334062306a36Sopenharmony_ci		err = vfs_parse_fs_string(fc, "source", name, strlen(name));
334162306a36Sopenharmony_ci	if (!err)
334262306a36Sopenharmony_ci		err = parse_monolithic_mount_data(fc, data);
334362306a36Sopenharmony_ci	if (!err && !mount_capable(fc))
334462306a36Sopenharmony_ci		err = -EPERM;
334562306a36Sopenharmony_ci	if (!err)
334662306a36Sopenharmony_ci		err = vfs_get_tree(fc);
334762306a36Sopenharmony_ci	if (!err)
334862306a36Sopenharmony_ci		err = do_new_mount_fc(fc, path, mnt_flags);
334962306a36Sopenharmony_ci
335062306a36Sopenharmony_ci	put_fs_context(fc);
335162306a36Sopenharmony_ci	return err;
335262306a36Sopenharmony_ci}
335362306a36Sopenharmony_ci
335462306a36Sopenharmony_ciint finish_automount(struct vfsmount *m, const struct path *path)
335562306a36Sopenharmony_ci{
335662306a36Sopenharmony_ci	struct dentry *dentry = path->dentry;
335762306a36Sopenharmony_ci	struct mountpoint *mp;
335862306a36Sopenharmony_ci	struct mount *mnt;
335962306a36Sopenharmony_ci	int err;
336062306a36Sopenharmony_ci
336162306a36Sopenharmony_ci	if (!m)
336262306a36Sopenharmony_ci		return 0;
336362306a36Sopenharmony_ci	if (IS_ERR(m))
336462306a36Sopenharmony_ci		return PTR_ERR(m);
336562306a36Sopenharmony_ci
336662306a36Sopenharmony_ci	mnt = real_mount(m);
336762306a36Sopenharmony_ci	/* The new mount record should have at least 2 refs to prevent it being
336862306a36Sopenharmony_ci	 * expired before we get a chance to add it
336962306a36Sopenharmony_ci	 */
337062306a36Sopenharmony_ci	BUG_ON(mnt_get_count(mnt) < 2);
337162306a36Sopenharmony_ci
337262306a36Sopenharmony_ci	if (m->mnt_sb == path->mnt->mnt_sb &&
337362306a36Sopenharmony_ci	    m->mnt_root == dentry) {
337462306a36Sopenharmony_ci		err = -ELOOP;
337562306a36Sopenharmony_ci		goto discard;
337662306a36Sopenharmony_ci	}
337762306a36Sopenharmony_ci
337862306a36Sopenharmony_ci	/*
337962306a36Sopenharmony_ci	 * we don't want to use lock_mount() - in this case finding something
338062306a36Sopenharmony_ci	 * that overmounts our mountpoint to be means "quitely drop what we've
338162306a36Sopenharmony_ci	 * got", not "try to mount it on top".
338262306a36Sopenharmony_ci	 */
338362306a36Sopenharmony_ci	inode_lock(dentry->d_inode);
338462306a36Sopenharmony_ci	namespace_lock();
338562306a36Sopenharmony_ci	if (unlikely(cant_mount(dentry))) {
338662306a36Sopenharmony_ci		err = -ENOENT;
338762306a36Sopenharmony_ci		goto discard_locked;
338862306a36Sopenharmony_ci	}
338962306a36Sopenharmony_ci	if (path_overmounted(path)) {
339062306a36Sopenharmony_ci		err = 0;
339162306a36Sopenharmony_ci		goto discard_locked;
339262306a36Sopenharmony_ci	}
339362306a36Sopenharmony_ci	mp = get_mountpoint(dentry);
339462306a36Sopenharmony_ci	if (IS_ERR(mp)) {
339562306a36Sopenharmony_ci		err = PTR_ERR(mp);
339662306a36Sopenharmony_ci		goto discard_locked;
339762306a36Sopenharmony_ci	}
339862306a36Sopenharmony_ci
339962306a36Sopenharmony_ci	err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE);
340062306a36Sopenharmony_ci	unlock_mount(mp);
340162306a36Sopenharmony_ci	if (unlikely(err))
340262306a36Sopenharmony_ci		goto discard;
340362306a36Sopenharmony_ci	mntput(m);
340462306a36Sopenharmony_ci	return 0;
340562306a36Sopenharmony_ci
340662306a36Sopenharmony_cidiscard_locked:
340762306a36Sopenharmony_ci	namespace_unlock();
340862306a36Sopenharmony_ci	inode_unlock(dentry->d_inode);
340962306a36Sopenharmony_cidiscard:
341062306a36Sopenharmony_ci	/* remove m from any expiration list it may be on */
341162306a36Sopenharmony_ci	if (!list_empty(&mnt->mnt_expire)) {
341262306a36Sopenharmony_ci		namespace_lock();
341362306a36Sopenharmony_ci		list_del_init(&mnt->mnt_expire);
341462306a36Sopenharmony_ci		namespace_unlock();
341562306a36Sopenharmony_ci	}
341662306a36Sopenharmony_ci	mntput(m);
341762306a36Sopenharmony_ci	mntput(m);
341862306a36Sopenharmony_ci	return err;
341962306a36Sopenharmony_ci}
342062306a36Sopenharmony_ci
342162306a36Sopenharmony_ci/**
342262306a36Sopenharmony_ci * mnt_set_expiry - Put a mount on an expiration list
342362306a36Sopenharmony_ci * @mnt: The mount to list.
342462306a36Sopenharmony_ci * @expiry_list: The list to add the mount to.
342562306a36Sopenharmony_ci */
342662306a36Sopenharmony_civoid mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list)
342762306a36Sopenharmony_ci{
342862306a36Sopenharmony_ci	namespace_lock();
342962306a36Sopenharmony_ci
343062306a36Sopenharmony_ci	list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list);
343162306a36Sopenharmony_ci
343262306a36Sopenharmony_ci	namespace_unlock();
343362306a36Sopenharmony_ci}
343462306a36Sopenharmony_ciEXPORT_SYMBOL(mnt_set_expiry);
343562306a36Sopenharmony_ci
343662306a36Sopenharmony_ci/*
343762306a36Sopenharmony_ci * process a list of expirable mountpoints with the intent of discarding any
343862306a36Sopenharmony_ci * mountpoints that aren't in use and haven't been touched since last we came
343962306a36Sopenharmony_ci * here
344062306a36Sopenharmony_ci */
344162306a36Sopenharmony_civoid mark_mounts_for_expiry(struct list_head *mounts)
344262306a36Sopenharmony_ci{
344362306a36Sopenharmony_ci	struct mount *mnt, *next;
344462306a36Sopenharmony_ci	LIST_HEAD(graveyard);
344562306a36Sopenharmony_ci
344662306a36Sopenharmony_ci	if (list_empty(mounts))
344762306a36Sopenharmony_ci		return;
344862306a36Sopenharmony_ci
344962306a36Sopenharmony_ci	namespace_lock();
345062306a36Sopenharmony_ci	lock_mount_hash();
345162306a36Sopenharmony_ci
345262306a36Sopenharmony_ci	/* extract from the expiration list every vfsmount that matches the
345362306a36Sopenharmony_ci	 * following criteria:
345462306a36Sopenharmony_ci	 * - only referenced by its parent vfsmount
345562306a36Sopenharmony_ci	 * - still marked for expiry (marked on the last call here; marks are
345662306a36Sopenharmony_ci	 *   cleared by mntput())
345762306a36Sopenharmony_ci	 */
345862306a36Sopenharmony_ci	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
345962306a36Sopenharmony_ci		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
346062306a36Sopenharmony_ci			propagate_mount_busy(mnt, 1))
346162306a36Sopenharmony_ci			continue;
346262306a36Sopenharmony_ci		list_move(&mnt->mnt_expire, &graveyard);
346362306a36Sopenharmony_ci	}
346462306a36Sopenharmony_ci	while (!list_empty(&graveyard)) {
346562306a36Sopenharmony_ci		mnt = list_first_entry(&graveyard, struct mount, mnt_expire);
346662306a36Sopenharmony_ci		touch_mnt_namespace(mnt->mnt_ns);
346762306a36Sopenharmony_ci		umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC);
346862306a36Sopenharmony_ci	}
346962306a36Sopenharmony_ci	unlock_mount_hash();
347062306a36Sopenharmony_ci	namespace_unlock();
347162306a36Sopenharmony_ci}
347262306a36Sopenharmony_ci
347362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mark_mounts_for_expiry);
347462306a36Sopenharmony_ci
347562306a36Sopenharmony_ci/*
347662306a36Sopenharmony_ci * Ripoff of 'select_parent()'
347762306a36Sopenharmony_ci *
347862306a36Sopenharmony_ci * search the list of submounts for a given mountpoint, and move any
347962306a36Sopenharmony_ci * shrinkable submounts to the 'graveyard' list.
348062306a36Sopenharmony_ci */
348162306a36Sopenharmony_cistatic int select_submounts(struct mount *parent, struct list_head *graveyard)
348262306a36Sopenharmony_ci{
348362306a36Sopenharmony_ci	struct mount *this_parent = parent;
348462306a36Sopenharmony_ci	struct list_head *next;
348562306a36Sopenharmony_ci	int found = 0;
348662306a36Sopenharmony_ci
348762306a36Sopenharmony_cirepeat:
348862306a36Sopenharmony_ci	next = this_parent->mnt_mounts.next;
348962306a36Sopenharmony_ciresume:
349062306a36Sopenharmony_ci	while (next != &this_parent->mnt_mounts) {
349162306a36Sopenharmony_ci		struct list_head *tmp = next;
349262306a36Sopenharmony_ci		struct mount *mnt = list_entry(tmp, struct mount, mnt_child);
349362306a36Sopenharmony_ci
349462306a36Sopenharmony_ci		next = tmp->next;
349562306a36Sopenharmony_ci		if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE))
349662306a36Sopenharmony_ci			continue;
349762306a36Sopenharmony_ci		/*
349862306a36Sopenharmony_ci		 * Descend a level if the d_mounts list is non-empty.
349962306a36Sopenharmony_ci		 */
350062306a36Sopenharmony_ci		if (!list_empty(&mnt->mnt_mounts)) {
350162306a36Sopenharmony_ci			this_parent = mnt;
350262306a36Sopenharmony_ci			goto repeat;
350362306a36Sopenharmony_ci		}
350462306a36Sopenharmony_ci
350562306a36Sopenharmony_ci		if (!propagate_mount_busy(mnt, 1)) {
350662306a36Sopenharmony_ci			list_move_tail(&mnt->mnt_expire, graveyard);
350762306a36Sopenharmony_ci			found++;
350862306a36Sopenharmony_ci		}
350962306a36Sopenharmony_ci	}
351062306a36Sopenharmony_ci	/*
351162306a36Sopenharmony_ci	 * All done at this level ... ascend and resume the search
351262306a36Sopenharmony_ci	 */
351362306a36Sopenharmony_ci	if (this_parent != parent) {
351462306a36Sopenharmony_ci		next = this_parent->mnt_child.next;
351562306a36Sopenharmony_ci		this_parent = this_parent->mnt_parent;
351662306a36Sopenharmony_ci		goto resume;
351762306a36Sopenharmony_ci	}
351862306a36Sopenharmony_ci	return found;
351962306a36Sopenharmony_ci}
352062306a36Sopenharmony_ci
352162306a36Sopenharmony_ci/*
352262306a36Sopenharmony_ci * process a list of expirable mountpoints with the intent of discarding any
352362306a36Sopenharmony_ci * submounts of a specific parent mountpoint
352462306a36Sopenharmony_ci *
352562306a36Sopenharmony_ci * mount_lock must be held for write
352662306a36Sopenharmony_ci */
352762306a36Sopenharmony_cistatic void shrink_submounts(struct mount *mnt)
352862306a36Sopenharmony_ci{
352962306a36Sopenharmony_ci	LIST_HEAD(graveyard);
353062306a36Sopenharmony_ci	struct mount *m;
353162306a36Sopenharmony_ci
353262306a36Sopenharmony_ci	/* extract submounts of 'mountpoint' from the expiration list */
353362306a36Sopenharmony_ci	while (select_submounts(mnt, &graveyard)) {
353462306a36Sopenharmony_ci		while (!list_empty(&graveyard)) {
353562306a36Sopenharmony_ci			m = list_first_entry(&graveyard, struct mount,
353662306a36Sopenharmony_ci						mnt_expire);
353762306a36Sopenharmony_ci			touch_mnt_namespace(m->mnt_ns);
353862306a36Sopenharmony_ci			umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC);
353962306a36Sopenharmony_ci		}
354062306a36Sopenharmony_ci	}
354162306a36Sopenharmony_ci}
354262306a36Sopenharmony_ci
354362306a36Sopenharmony_cistatic void *copy_mount_options(const void __user * data)
354462306a36Sopenharmony_ci{
354562306a36Sopenharmony_ci	char *copy;
354662306a36Sopenharmony_ci	unsigned left, offset;
354762306a36Sopenharmony_ci
354862306a36Sopenharmony_ci	if (!data)
354962306a36Sopenharmony_ci		return NULL;
355062306a36Sopenharmony_ci
355162306a36Sopenharmony_ci	copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
355262306a36Sopenharmony_ci	if (!copy)
355362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
355462306a36Sopenharmony_ci
355562306a36Sopenharmony_ci	left = copy_from_user(copy, data, PAGE_SIZE);
355662306a36Sopenharmony_ci
355762306a36Sopenharmony_ci	/*
355862306a36Sopenharmony_ci	 * Not all architectures have an exact copy_from_user(). Resort to
355962306a36Sopenharmony_ci	 * byte at a time.
356062306a36Sopenharmony_ci	 */
356162306a36Sopenharmony_ci	offset = PAGE_SIZE - left;
356262306a36Sopenharmony_ci	while (left) {
356362306a36Sopenharmony_ci		char c;
356462306a36Sopenharmony_ci		if (get_user(c, (const char __user *)data + offset))
356562306a36Sopenharmony_ci			break;
356662306a36Sopenharmony_ci		copy[offset] = c;
356762306a36Sopenharmony_ci		left--;
356862306a36Sopenharmony_ci		offset++;
356962306a36Sopenharmony_ci	}
357062306a36Sopenharmony_ci
357162306a36Sopenharmony_ci	if (left == PAGE_SIZE) {
357262306a36Sopenharmony_ci		kfree(copy);
357362306a36Sopenharmony_ci		return ERR_PTR(-EFAULT);
357462306a36Sopenharmony_ci	}
357562306a36Sopenharmony_ci
357662306a36Sopenharmony_ci	return copy;
357762306a36Sopenharmony_ci}
357862306a36Sopenharmony_ci
357962306a36Sopenharmony_cistatic char *copy_mount_string(const void __user *data)
358062306a36Sopenharmony_ci{
358162306a36Sopenharmony_ci	return data ? strndup_user(data, PATH_MAX) : NULL;
358262306a36Sopenharmony_ci}
358362306a36Sopenharmony_ci
358462306a36Sopenharmony_ci/*
358562306a36Sopenharmony_ci * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
358662306a36Sopenharmony_ci * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
358762306a36Sopenharmony_ci *
358862306a36Sopenharmony_ci * data is a (void *) that can point to any structure up to
358962306a36Sopenharmony_ci * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
359062306a36Sopenharmony_ci * information (or be NULL).
359162306a36Sopenharmony_ci *
359262306a36Sopenharmony_ci * Pre-0.97 versions of mount() didn't have a flags word.
359362306a36Sopenharmony_ci * When the flags word was introduced its top half was required
359462306a36Sopenharmony_ci * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
359562306a36Sopenharmony_ci * Therefore, if this magic number is present, it carries no information
359662306a36Sopenharmony_ci * and must be discarded.
359762306a36Sopenharmony_ci */
359862306a36Sopenharmony_ciint path_mount(const char *dev_name, struct path *path,
359962306a36Sopenharmony_ci		const char *type_page, unsigned long flags, void *data_page)
360062306a36Sopenharmony_ci{
360162306a36Sopenharmony_ci	unsigned int mnt_flags = 0, sb_flags;
360262306a36Sopenharmony_ci	int ret;
360362306a36Sopenharmony_ci
360462306a36Sopenharmony_ci	/* Discard magic */
360562306a36Sopenharmony_ci	if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
360662306a36Sopenharmony_ci		flags &= ~MS_MGC_MSK;
360762306a36Sopenharmony_ci
360862306a36Sopenharmony_ci	/* Basic sanity checks */
360962306a36Sopenharmony_ci	if (data_page)
361062306a36Sopenharmony_ci		((char *)data_page)[PAGE_SIZE - 1] = 0;
361162306a36Sopenharmony_ci
361262306a36Sopenharmony_ci	if (flags & MS_NOUSER)
361362306a36Sopenharmony_ci		return -EINVAL;
361462306a36Sopenharmony_ci
361562306a36Sopenharmony_ci	ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
361662306a36Sopenharmony_ci	if (ret)
361762306a36Sopenharmony_ci		return ret;
361862306a36Sopenharmony_ci	if (!may_mount())
361962306a36Sopenharmony_ci		return -EPERM;
362062306a36Sopenharmony_ci	if (flags & SB_MANDLOCK)
362162306a36Sopenharmony_ci		warn_mandlock();
362262306a36Sopenharmony_ci
362362306a36Sopenharmony_ci	/* Default to relatime unless overriden */
362462306a36Sopenharmony_ci	if (!(flags & MS_NOATIME))
362562306a36Sopenharmony_ci		mnt_flags |= MNT_RELATIME;
362662306a36Sopenharmony_ci
362762306a36Sopenharmony_ci	/* Separate the per-mountpoint flags */
362862306a36Sopenharmony_ci	if (flags & MS_NOSUID)
362962306a36Sopenharmony_ci		mnt_flags |= MNT_NOSUID;
363062306a36Sopenharmony_ci	if (flags & MS_NODEV)
363162306a36Sopenharmony_ci		mnt_flags |= MNT_NODEV;
363262306a36Sopenharmony_ci	if (flags & MS_NOEXEC)
363362306a36Sopenharmony_ci		mnt_flags |= MNT_NOEXEC;
363462306a36Sopenharmony_ci	if (flags & MS_NOATIME)
363562306a36Sopenharmony_ci		mnt_flags |= MNT_NOATIME;
363662306a36Sopenharmony_ci	if (flags & MS_NODIRATIME)
363762306a36Sopenharmony_ci		mnt_flags |= MNT_NODIRATIME;
363862306a36Sopenharmony_ci	if (flags & MS_STRICTATIME)
363962306a36Sopenharmony_ci		mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
364062306a36Sopenharmony_ci	if (flags & MS_RDONLY)
364162306a36Sopenharmony_ci		mnt_flags |= MNT_READONLY;
364262306a36Sopenharmony_ci	if (flags & MS_NOSYMFOLLOW)
364362306a36Sopenharmony_ci		mnt_flags |= MNT_NOSYMFOLLOW;
364462306a36Sopenharmony_ci
364562306a36Sopenharmony_ci	/* The default atime for remount is preservation */
364662306a36Sopenharmony_ci	if ((flags & MS_REMOUNT) &&
364762306a36Sopenharmony_ci	    ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
364862306a36Sopenharmony_ci		       MS_STRICTATIME)) == 0)) {
364962306a36Sopenharmony_ci		mnt_flags &= ~MNT_ATIME_MASK;
365062306a36Sopenharmony_ci		mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
365162306a36Sopenharmony_ci	}
365262306a36Sopenharmony_ci
365362306a36Sopenharmony_ci	sb_flags = flags & (SB_RDONLY |
365462306a36Sopenharmony_ci			    SB_SYNCHRONOUS |
365562306a36Sopenharmony_ci			    SB_MANDLOCK |
365662306a36Sopenharmony_ci			    SB_DIRSYNC |
365762306a36Sopenharmony_ci			    SB_SILENT |
365862306a36Sopenharmony_ci			    SB_POSIXACL |
365962306a36Sopenharmony_ci			    SB_LAZYTIME |
366062306a36Sopenharmony_ci			    SB_I_VERSION);
366162306a36Sopenharmony_ci
366262306a36Sopenharmony_ci	if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
366362306a36Sopenharmony_ci		return do_reconfigure_mnt(path, mnt_flags);
366462306a36Sopenharmony_ci	if (flags & MS_REMOUNT)
366562306a36Sopenharmony_ci		return do_remount(path, flags, sb_flags, mnt_flags, data_page);
366662306a36Sopenharmony_ci	if (flags & MS_BIND)
366762306a36Sopenharmony_ci		return do_loopback(path, dev_name, flags & MS_REC);
366862306a36Sopenharmony_ci	if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
366962306a36Sopenharmony_ci		return do_change_type(path, flags);
367062306a36Sopenharmony_ci	if (flags & MS_MOVE)
367162306a36Sopenharmony_ci		return do_move_mount_old(path, dev_name);
367262306a36Sopenharmony_ci
367362306a36Sopenharmony_ci	return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
367462306a36Sopenharmony_ci			    data_page);
367562306a36Sopenharmony_ci}
367662306a36Sopenharmony_ci
367762306a36Sopenharmony_cilong do_mount(const char *dev_name, const char __user *dir_name,
367862306a36Sopenharmony_ci		const char *type_page, unsigned long flags, void *data_page)
367962306a36Sopenharmony_ci{
368062306a36Sopenharmony_ci	struct path path;
368162306a36Sopenharmony_ci	int ret;
368262306a36Sopenharmony_ci
368362306a36Sopenharmony_ci	ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path);
368462306a36Sopenharmony_ci	if (ret)
368562306a36Sopenharmony_ci		return ret;
368662306a36Sopenharmony_ci	ret = path_mount(dev_name, &path, type_page, flags, data_page);
368762306a36Sopenharmony_ci	path_put(&path);
368862306a36Sopenharmony_ci	return ret;
368962306a36Sopenharmony_ci}
369062306a36Sopenharmony_ci
369162306a36Sopenharmony_cistatic struct ucounts *inc_mnt_namespaces(struct user_namespace *ns)
369262306a36Sopenharmony_ci{
369362306a36Sopenharmony_ci	return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES);
369462306a36Sopenharmony_ci}
369562306a36Sopenharmony_ci
369662306a36Sopenharmony_cistatic void dec_mnt_namespaces(struct ucounts *ucounts)
369762306a36Sopenharmony_ci{
369862306a36Sopenharmony_ci	dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES);
369962306a36Sopenharmony_ci}
370062306a36Sopenharmony_ci
370162306a36Sopenharmony_cistatic void free_mnt_ns(struct mnt_namespace *ns)
370262306a36Sopenharmony_ci{
370362306a36Sopenharmony_ci	if (!is_anon_ns(ns))
370462306a36Sopenharmony_ci		ns_free_inum(&ns->ns);
370562306a36Sopenharmony_ci	dec_mnt_namespaces(ns->ucounts);
370662306a36Sopenharmony_ci	put_user_ns(ns->user_ns);
370762306a36Sopenharmony_ci	kfree(ns);
370862306a36Sopenharmony_ci}
370962306a36Sopenharmony_ci
371062306a36Sopenharmony_ci/*
371162306a36Sopenharmony_ci * Assign a sequence number so we can detect when we attempt to bind
371262306a36Sopenharmony_ci * mount a reference to an older mount namespace into the current
371362306a36Sopenharmony_ci * mount namespace, preventing reference counting loops.  A 64bit
371462306a36Sopenharmony_ci * number incrementing at 10Ghz will take 12,427 years to wrap which
371562306a36Sopenharmony_ci * is effectively never, so we can ignore the possibility.
371662306a36Sopenharmony_ci */
371762306a36Sopenharmony_cistatic atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
371862306a36Sopenharmony_ci
371962306a36Sopenharmony_cistatic struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
372062306a36Sopenharmony_ci{
372162306a36Sopenharmony_ci	struct mnt_namespace *new_ns;
372262306a36Sopenharmony_ci	struct ucounts *ucounts;
372362306a36Sopenharmony_ci	int ret;
372462306a36Sopenharmony_ci
372562306a36Sopenharmony_ci	ucounts = inc_mnt_namespaces(user_ns);
372662306a36Sopenharmony_ci	if (!ucounts)
372762306a36Sopenharmony_ci		return ERR_PTR(-ENOSPC);
372862306a36Sopenharmony_ci
372962306a36Sopenharmony_ci	new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
373062306a36Sopenharmony_ci	if (!new_ns) {
373162306a36Sopenharmony_ci		dec_mnt_namespaces(ucounts);
373262306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
373362306a36Sopenharmony_ci	}
373462306a36Sopenharmony_ci	if (!anon) {
373562306a36Sopenharmony_ci		ret = ns_alloc_inum(&new_ns->ns);
373662306a36Sopenharmony_ci		if (ret) {
373762306a36Sopenharmony_ci			kfree(new_ns);
373862306a36Sopenharmony_ci			dec_mnt_namespaces(ucounts);
373962306a36Sopenharmony_ci			return ERR_PTR(ret);
374062306a36Sopenharmony_ci		}
374162306a36Sopenharmony_ci	}
374262306a36Sopenharmony_ci	new_ns->ns.ops = &mntns_operations;
374362306a36Sopenharmony_ci	if (!anon)
374462306a36Sopenharmony_ci		new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
374562306a36Sopenharmony_ci	refcount_set(&new_ns->ns.count, 1);
374662306a36Sopenharmony_ci	INIT_LIST_HEAD(&new_ns->list);
374762306a36Sopenharmony_ci	init_waitqueue_head(&new_ns->poll);
374862306a36Sopenharmony_ci	spin_lock_init(&new_ns->ns_lock);
374962306a36Sopenharmony_ci	new_ns->user_ns = get_user_ns(user_ns);
375062306a36Sopenharmony_ci	new_ns->ucounts = ucounts;
375162306a36Sopenharmony_ci	return new_ns;
375262306a36Sopenharmony_ci}
375362306a36Sopenharmony_ci
375462306a36Sopenharmony_ci__latent_entropy
375562306a36Sopenharmony_cistruct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
375662306a36Sopenharmony_ci		struct user_namespace *user_ns, struct fs_struct *new_fs)
375762306a36Sopenharmony_ci{
375862306a36Sopenharmony_ci	struct mnt_namespace *new_ns;
375962306a36Sopenharmony_ci	struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
376062306a36Sopenharmony_ci	struct mount *p, *q;
376162306a36Sopenharmony_ci	struct mount *old;
376262306a36Sopenharmony_ci	struct mount *new;
376362306a36Sopenharmony_ci	int copy_flags;
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_ci	BUG_ON(!ns);
376662306a36Sopenharmony_ci
376762306a36Sopenharmony_ci	if (likely(!(flags & CLONE_NEWNS))) {
376862306a36Sopenharmony_ci		get_mnt_ns(ns);
376962306a36Sopenharmony_ci		return ns;
377062306a36Sopenharmony_ci	}
377162306a36Sopenharmony_ci
377262306a36Sopenharmony_ci	old = ns->root;
377362306a36Sopenharmony_ci
377462306a36Sopenharmony_ci	new_ns = alloc_mnt_ns(user_ns, false);
377562306a36Sopenharmony_ci	if (IS_ERR(new_ns))
377662306a36Sopenharmony_ci		return new_ns;
377762306a36Sopenharmony_ci
377862306a36Sopenharmony_ci	namespace_lock();
377962306a36Sopenharmony_ci	/* First pass: copy the tree topology */
378062306a36Sopenharmony_ci	copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE;
378162306a36Sopenharmony_ci	if (user_ns != ns->user_ns)
378262306a36Sopenharmony_ci		copy_flags |= CL_SHARED_TO_SLAVE;
378362306a36Sopenharmony_ci	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
378462306a36Sopenharmony_ci	if (IS_ERR(new)) {
378562306a36Sopenharmony_ci		namespace_unlock();
378662306a36Sopenharmony_ci		free_mnt_ns(new_ns);
378762306a36Sopenharmony_ci		return ERR_CAST(new);
378862306a36Sopenharmony_ci	}
378962306a36Sopenharmony_ci	if (user_ns != ns->user_ns) {
379062306a36Sopenharmony_ci		lock_mount_hash();
379162306a36Sopenharmony_ci		lock_mnt_tree(new);
379262306a36Sopenharmony_ci		unlock_mount_hash();
379362306a36Sopenharmony_ci	}
379462306a36Sopenharmony_ci	new_ns->root = new;
379562306a36Sopenharmony_ci	list_add_tail(&new_ns->list, &new->mnt_list);
379662306a36Sopenharmony_ci
379762306a36Sopenharmony_ci	/*
379862306a36Sopenharmony_ci	 * Second pass: switch the tsk->fs->* elements and mark new vfsmounts
379962306a36Sopenharmony_ci	 * as belonging to new namespace.  We have already acquired a private
380062306a36Sopenharmony_ci	 * fs_struct, so tsk->fs->lock is not needed.
380162306a36Sopenharmony_ci	 */
380262306a36Sopenharmony_ci	p = old;
380362306a36Sopenharmony_ci	q = new;
380462306a36Sopenharmony_ci	while (p) {
380562306a36Sopenharmony_ci		q->mnt_ns = new_ns;
380662306a36Sopenharmony_ci		new_ns->mounts++;
380762306a36Sopenharmony_ci		if (new_fs) {
380862306a36Sopenharmony_ci			if (&p->mnt == new_fs->root.mnt) {
380962306a36Sopenharmony_ci				new_fs->root.mnt = mntget(&q->mnt);
381062306a36Sopenharmony_ci				rootmnt = &p->mnt;
381162306a36Sopenharmony_ci			}
381262306a36Sopenharmony_ci			if (&p->mnt == new_fs->pwd.mnt) {
381362306a36Sopenharmony_ci				new_fs->pwd.mnt = mntget(&q->mnt);
381462306a36Sopenharmony_ci				pwdmnt = &p->mnt;
381562306a36Sopenharmony_ci			}
381662306a36Sopenharmony_ci		}
381762306a36Sopenharmony_ci		p = next_mnt(p, old);
381862306a36Sopenharmony_ci		q = next_mnt(q, new);
381962306a36Sopenharmony_ci		if (!q)
382062306a36Sopenharmony_ci			break;
382162306a36Sopenharmony_ci		// an mntns binding we'd skipped?
382262306a36Sopenharmony_ci		while (p->mnt.mnt_root != q->mnt.mnt_root)
382362306a36Sopenharmony_ci			p = next_mnt(skip_mnt_tree(p), old);
382462306a36Sopenharmony_ci	}
382562306a36Sopenharmony_ci	namespace_unlock();
382662306a36Sopenharmony_ci
382762306a36Sopenharmony_ci	if (rootmnt)
382862306a36Sopenharmony_ci		mntput(rootmnt);
382962306a36Sopenharmony_ci	if (pwdmnt)
383062306a36Sopenharmony_ci		mntput(pwdmnt);
383162306a36Sopenharmony_ci
383262306a36Sopenharmony_ci	return new_ns;
383362306a36Sopenharmony_ci}
383462306a36Sopenharmony_ci
383562306a36Sopenharmony_cistruct dentry *mount_subtree(struct vfsmount *m, const char *name)
383662306a36Sopenharmony_ci{
383762306a36Sopenharmony_ci	struct mount *mnt = real_mount(m);
383862306a36Sopenharmony_ci	struct mnt_namespace *ns;
383962306a36Sopenharmony_ci	struct super_block *s;
384062306a36Sopenharmony_ci	struct path path;
384162306a36Sopenharmony_ci	int err;
384262306a36Sopenharmony_ci
384362306a36Sopenharmony_ci	ns = alloc_mnt_ns(&init_user_ns, true);
384462306a36Sopenharmony_ci	if (IS_ERR(ns)) {
384562306a36Sopenharmony_ci		mntput(m);
384662306a36Sopenharmony_ci		return ERR_CAST(ns);
384762306a36Sopenharmony_ci	}
384862306a36Sopenharmony_ci	mnt->mnt_ns = ns;
384962306a36Sopenharmony_ci	ns->root = mnt;
385062306a36Sopenharmony_ci	ns->mounts++;
385162306a36Sopenharmony_ci	list_add(&mnt->mnt_list, &ns->list);
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	err = vfs_path_lookup(m->mnt_root, m,
385462306a36Sopenharmony_ci			name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path);
385562306a36Sopenharmony_ci
385662306a36Sopenharmony_ci	put_mnt_ns(ns);
385762306a36Sopenharmony_ci
385862306a36Sopenharmony_ci	if (err)
385962306a36Sopenharmony_ci		return ERR_PTR(err);
386062306a36Sopenharmony_ci
386162306a36Sopenharmony_ci	/* trade a vfsmount reference for active sb one */
386262306a36Sopenharmony_ci	s = path.mnt->mnt_sb;
386362306a36Sopenharmony_ci	atomic_inc(&s->s_active);
386462306a36Sopenharmony_ci	mntput(path.mnt);
386562306a36Sopenharmony_ci	/* lock the sucker */
386662306a36Sopenharmony_ci	down_write(&s->s_umount);
386762306a36Sopenharmony_ci	/* ... and return the root of (sub)tree on it */
386862306a36Sopenharmony_ci	return path.dentry;
386962306a36Sopenharmony_ci}
387062306a36Sopenharmony_ciEXPORT_SYMBOL(mount_subtree);
387162306a36Sopenharmony_ci
387262306a36Sopenharmony_ciSYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
387362306a36Sopenharmony_ci		char __user *, type, unsigned long, flags, void __user *, data)
387462306a36Sopenharmony_ci{
387562306a36Sopenharmony_ci	int ret;
387662306a36Sopenharmony_ci	char *kernel_type;
387762306a36Sopenharmony_ci	char *kernel_dev;
387862306a36Sopenharmony_ci	void *options;
387962306a36Sopenharmony_ci
388062306a36Sopenharmony_ci	kernel_type = copy_mount_string(type);
388162306a36Sopenharmony_ci	ret = PTR_ERR(kernel_type);
388262306a36Sopenharmony_ci	if (IS_ERR(kernel_type))
388362306a36Sopenharmony_ci		goto out_type;
388462306a36Sopenharmony_ci
388562306a36Sopenharmony_ci	kernel_dev = copy_mount_string(dev_name);
388662306a36Sopenharmony_ci	ret = PTR_ERR(kernel_dev);
388762306a36Sopenharmony_ci	if (IS_ERR(kernel_dev))
388862306a36Sopenharmony_ci		goto out_dev;
388962306a36Sopenharmony_ci
389062306a36Sopenharmony_ci	options = copy_mount_options(data);
389162306a36Sopenharmony_ci	ret = PTR_ERR(options);
389262306a36Sopenharmony_ci	if (IS_ERR(options))
389362306a36Sopenharmony_ci		goto out_data;
389462306a36Sopenharmony_ci
389562306a36Sopenharmony_ci	ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
389662306a36Sopenharmony_ci
389762306a36Sopenharmony_ci	kfree(options);
389862306a36Sopenharmony_ciout_data:
389962306a36Sopenharmony_ci	kfree(kernel_dev);
390062306a36Sopenharmony_ciout_dev:
390162306a36Sopenharmony_ci	kfree(kernel_type);
390262306a36Sopenharmony_ciout_type:
390362306a36Sopenharmony_ci	return ret;
390462306a36Sopenharmony_ci}
390562306a36Sopenharmony_ci
390662306a36Sopenharmony_ci#define FSMOUNT_VALID_FLAGS                                                    \
390762306a36Sopenharmony_ci	(MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV |            \
390862306a36Sopenharmony_ci	 MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME |       \
390962306a36Sopenharmony_ci	 MOUNT_ATTR_NOSYMFOLLOW)
391062306a36Sopenharmony_ci
391162306a36Sopenharmony_ci#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
391262306a36Sopenharmony_ci
391362306a36Sopenharmony_ci#define MOUNT_SETATTR_PROPAGATION_FLAGS \
391462306a36Sopenharmony_ci	(MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
391562306a36Sopenharmony_ci
391662306a36Sopenharmony_cistatic unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
391762306a36Sopenharmony_ci{
391862306a36Sopenharmony_ci	unsigned int mnt_flags = 0;
391962306a36Sopenharmony_ci
392062306a36Sopenharmony_ci	if (attr_flags & MOUNT_ATTR_RDONLY)
392162306a36Sopenharmony_ci		mnt_flags |= MNT_READONLY;
392262306a36Sopenharmony_ci	if (attr_flags & MOUNT_ATTR_NOSUID)
392362306a36Sopenharmony_ci		mnt_flags |= MNT_NOSUID;
392462306a36Sopenharmony_ci	if (attr_flags & MOUNT_ATTR_NODEV)
392562306a36Sopenharmony_ci		mnt_flags |= MNT_NODEV;
392662306a36Sopenharmony_ci	if (attr_flags & MOUNT_ATTR_NOEXEC)
392762306a36Sopenharmony_ci		mnt_flags |= MNT_NOEXEC;
392862306a36Sopenharmony_ci	if (attr_flags & MOUNT_ATTR_NODIRATIME)
392962306a36Sopenharmony_ci		mnt_flags |= MNT_NODIRATIME;
393062306a36Sopenharmony_ci	if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW)
393162306a36Sopenharmony_ci		mnt_flags |= MNT_NOSYMFOLLOW;
393262306a36Sopenharmony_ci
393362306a36Sopenharmony_ci	return mnt_flags;
393462306a36Sopenharmony_ci}
393562306a36Sopenharmony_ci
393662306a36Sopenharmony_ci/*
393762306a36Sopenharmony_ci * Create a kernel mount representation for a new, prepared superblock
393862306a36Sopenharmony_ci * (specified by fs_fd) and attach to an open_tree-like file descriptor.
393962306a36Sopenharmony_ci */
394062306a36Sopenharmony_ciSYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
394162306a36Sopenharmony_ci		unsigned int, attr_flags)
394262306a36Sopenharmony_ci{
394362306a36Sopenharmony_ci	struct mnt_namespace *ns;
394462306a36Sopenharmony_ci	struct fs_context *fc;
394562306a36Sopenharmony_ci	struct file *file;
394662306a36Sopenharmony_ci	struct path newmount;
394762306a36Sopenharmony_ci	struct mount *mnt;
394862306a36Sopenharmony_ci	struct fd f;
394962306a36Sopenharmony_ci	unsigned int mnt_flags = 0;
395062306a36Sopenharmony_ci	long ret;
395162306a36Sopenharmony_ci
395262306a36Sopenharmony_ci	if (!may_mount())
395362306a36Sopenharmony_ci		return -EPERM;
395462306a36Sopenharmony_ci
395562306a36Sopenharmony_ci	if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
395662306a36Sopenharmony_ci		return -EINVAL;
395762306a36Sopenharmony_ci
395862306a36Sopenharmony_ci	if (attr_flags & ~FSMOUNT_VALID_FLAGS)
395962306a36Sopenharmony_ci		return -EINVAL;
396062306a36Sopenharmony_ci
396162306a36Sopenharmony_ci	mnt_flags = attr_flags_to_mnt_flags(attr_flags);
396262306a36Sopenharmony_ci
396362306a36Sopenharmony_ci	switch (attr_flags & MOUNT_ATTR__ATIME) {
396462306a36Sopenharmony_ci	case MOUNT_ATTR_STRICTATIME:
396562306a36Sopenharmony_ci		break;
396662306a36Sopenharmony_ci	case MOUNT_ATTR_NOATIME:
396762306a36Sopenharmony_ci		mnt_flags |= MNT_NOATIME;
396862306a36Sopenharmony_ci		break;
396962306a36Sopenharmony_ci	case MOUNT_ATTR_RELATIME:
397062306a36Sopenharmony_ci		mnt_flags |= MNT_RELATIME;
397162306a36Sopenharmony_ci		break;
397262306a36Sopenharmony_ci	default:
397362306a36Sopenharmony_ci		return -EINVAL;
397462306a36Sopenharmony_ci	}
397562306a36Sopenharmony_ci
397662306a36Sopenharmony_ci	f = fdget(fs_fd);
397762306a36Sopenharmony_ci	if (!f.file)
397862306a36Sopenharmony_ci		return -EBADF;
397962306a36Sopenharmony_ci
398062306a36Sopenharmony_ci	ret = -EINVAL;
398162306a36Sopenharmony_ci	if (f.file->f_op != &fscontext_fops)
398262306a36Sopenharmony_ci		goto err_fsfd;
398362306a36Sopenharmony_ci
398462306a36Sopenharmony_ci	fc = f.file->private_data;
398562306a36Sopenharmony_ci
398662306a36Sopenharmony_ci	ret = mutex_lock_interruptible(&fc->uapi_mutex);
398762306a36Sopenharmony_ci	if (ret < 0)
398862306a36Sopenharmony_ci		goto err_fsfd;
398962306a36Sopenharmony_ci
399062306a36Sopenharmony_ci	/* There must be a valid superblock or we can't mount it */
399162306a36Sopenharmony_ci	ret = -EINVAL;
399262306a36Sopenharmony_ci	if (!fc->root)
399362306a36Sopenharmony_ci		goto err_unlock;
399462306a36Sopenharmony_ci
399562306a36Sopenharmony_ci	ret = -EPERM;
399662306a36Sopenharmony_ci	if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
399762306a36Sopenharmony_ci		pr_warn("VFS: Mount too revealing\n");
399862306a36Sopenharmony_ci		goto err_unlock;
399962306a36Sopenharmony_ci	}
400062306a36Sopenharmony_ci
400162306a36Sopenharmony_ci	ret = -EBUSY;
400262306a36Sopenharmony_ci	if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
400362306a36Sopenharmony_ci		goto err_unlock;
400462306a36Sopenharmony_ci
400562306a36Sopenharmony_ci	if (fc->sb_flags & SB_MANDLOCK)
400662306a36Sopenharmony_ci		warn_mandlock();
400762306a36Sopenharmony_ci
400862306a36Sopenharmony_ci	newmount.mnt = vfs_create_mount(fc);
400962306a36Sopenharmony_ci	if (IS_ERR(newmount.mnt)) {
401062306a36Sopenharmony_ci		ret = PTR_ERR(newmount.mnt);
401162306a36Sopenharmony_ci		goto err_unlock;
401262306a36Sopenharmony_ci	}
401362306a36Sopenharmony_ci	newmount.dentry = dget(fc->root);
401462306a36Sopenharmony_ci	newmount.mnt->mnt_flags = mnt_flags;
401562306a36Sopenharmony_ci
401662306a36Sopenharmony_ci	/* We've done the mount bit - now move the file context into more or
401762306a36Sopenharmony_ci	 * less the same state as if we'd done an fspick().  We don't want to
401862306a36Sopenharmony_ci	 * do any memory allocation or anything like that at this point as we
401962306a36Sopenharmony_ci	 * don't want to have to handle any errors incurred.
402062306a36Sopenharmony_ci	 */
402162306a36Sopenharmony_ci	vfs_clean_context(fc);
402262306a36Sopenharmony_ci
402362306a36Sopenharmony_ci	ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
402462306a36Sopenharmony_ci	if (IS_ERR(ns)) {
402562306a36Sopenharmony_ci		ret = PTR_ERR(ns);
402662306a36Sopenharmony_ci		goto err_path;
402762306a36Sopenharmony_ci	}
402862306a36Sopenharmony_ci	mnt = real_mount(newmount.mnt);
402962306a36Sopenharmony_ci	mnt->mnt_ns = ns;
403062306a36Sopenharmony_ci	ns->root = mnt;
403162306a36Sopenharmony_ci	ns->mounts = 1;
403262306a36Sopenharmony_ci	list_add(&mnt->mnt_list, &ns->list);
403362306a36Sopenharmony_ci	mntget(newmount.mnt);
403462306a36Sopenharmony_ci
403562306a36Sopenharmony_ci	/* Attach to an apparent O_PATH fd with a note that we need to unmount
403662306a36Sopenharmony_ci	 * it, not just simply put it.
403762306a36Sopenharmony_ci	 */
403862306a36Sopenharmony_ci	file = dentry_open(&newmount, O_PATH, fc->cred);
403962306a36Sopenharmony_ci	if (IS_ERR(file)) {
404062306a36Sopenharmony_ci		dissolve_on_fput(newmount.mnt);
404162306a36Sopenharmony_ci		ret = PTR_ERR(file);
404262306a36Sopenharmony_ci		goto err_path;
404362306a36Sopenharmony_ci	}
404462306a36Sopenharmony_ci	file->f_mode |= FMODE_NEED_UNMOUNT;
404562306a36Sopenharmony_ci
404662306a36Sopenharmony_ci	ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
404762306a36Sopenharmony_ci	if (ret >= 0)
404862306a36Sopenharmony_ci		fd_install(ret, file);
404962306a36Sopenharmony_ci	else
405062306a36Sopenharmony_ci		fput(file);
405162306a36Sopenharmony_ci
405262306a36Sopenharmony_cierr_path:
405362306a36Sopenharmony_ci	path_put(&newmount);
405462306a36Sopenharmony_cierr_unlock:
405562306a36Sopenharmony_ci	mutex_unlock(&fc->uapi_mutex);
405662306a36Sopenharmony_cierr_fsfd:
405762306a36Sopenharmony_ci	fdput(f);
405862306a36Sopenharmony_ci	return ret;
405962306a36Sopenharmony_ci}
406062306a36Sopenharmony_ci
406162306a36Sopenharmony_ci/*
406262306a36Sopenharmony_ci * Move a mount from one place to another.  In combination with
406362306a36Sopenharmony_ci * fsopen()/fsmount() this is used to install a new mount and in combination
406462306a36Sopenharmony_ci * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy
406562306a36Sopenharmony_ci * a mount subtree.
406662306a36Sopenharmony_ci *
406762306a36Sopenharmony_ci * Note the flags value is a combination of MOVE_MOUNT_* flags.
406862306a36Sopenharmony_ci */
406962306a36Sopenharmony_ciSYSCALL_DEFINE5(move_mount,
407062306a36Sopenharmony_ci		int, from_dfd, const char __user *, from_pathname,
407162306a36Sopenharmony_ci		int, to_dfd, const char __user *, to_pathname,
407262306a36Sopenharmony_ci		unsigned int, flags)
407362306a36Sopenharmony_ci{
407462306a36Sopenharmony_ci	struct path from_path, to_path;
407562306a36Sopenharmony_ci	unsigned int lflags;
407662306a36Sopenharmony_ci	int ret = 0;
407762306a36Sopenharmony_ci
407862306a36Sopenharmony_ci	if (!may_mount())
407962306a36Sopenharmony_ci		return -EPERM;
408062306a36Sopenharmony_ci
408162306a36Sopenharmony_ci	if (flags & ~MOVE_MOUNT__MASK)
408262306a36Sopenharmony_ci		return -EINVAL;
408362306a36Sopenharmony_ci
408462306a36Sopenharmony_ci	if ((flags & (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP)) ==
408562306a36Sopenharmony_ci	    (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP))
408662306a36Sopenharmony_ci		return -EINVAL;
408762306a36Sopenharmony_ci
408862306a36Sopenharmony_ci	/* If someone gives a pathname, they aren't permitted to move
408962306a36Sopenharmony_ci	 * from an fd that requires unmount as we can't get at the flag
409062306a36Sopenharmony_ci	 * to clear it afterwards.
409162306a36Sopenharmony_ci	 */
409262306a36Sopenharmony_ci	lflags = 0;
409362306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_F_SYMLINKS)	lflags |= LOOKUP_FOLLOW;
409462306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_F_AUTOMOUNTS)	lflags |= LOOKUP_AUTOMOUNT;
409562306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_F_EMPTY_PATH)	lflags |= LOOKUP_EMPTY;
409662306a36Sopenharmony_ci
409762306a36Sopenharmony_ci	ret = user_path_at(from_dfd, from_pathname, lflags, &from_path);
409862306a36Sopenharmony_ci	if (ret < 0)
409962306a36Sopenharmony_ci		return ret;
410062306a36Sopenharmony_ci
410162306a36Sopenharmony_ci	lflags = 0;
410262306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_T_SYMLINKS)	lflags |= LOOKUP_FOLLOW;
410362306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_T_AUTOMOUNTS)	lflags |= LOOKUP_AUTOMOUNT;
410462306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_T_EMPTY_PATH)	lflags |= LOOKUP_EMPTY;
410562306a36Sopenharmony_ci
410662306a36Sopenharmony_ci	ret = user_path_at(to_dfd, to_pathname, lflags, &to_path);
410762306a36Sopenharmony_ci	if (ret < 0)
410862306a36Sopenharmony_ci		goto out_from;
410962306a36Sopenharmony_ci
411062306a36Sopenharmony_ci	ret = security_move_mount(&from_path, &to_path);
411162306a36Sopenharmony_ci	if (ret < 0)
411262306a36Sopenharmony_ci		goto out_to;
411362306a36Sopenharmony_ci
411462306a36Sopenharmony_ci	if (flags & MOVE_MOUNT_SET_GROUP)
411562306a36Sopenharmony_ci		ret = do_set_group(&from_path, &to_path);
411662306a36Sopenharmony_ci	else
411762306a36Sopenharmony_ci		ret = do_move_mount(&from_path, &to_path,
411862306a36Sopenharmony_ci				    (flags & MOVE_MOUNT_BENEATH));
411962306a36Sopenharmony_ci
412062306a36Sopenharmony_ciout_to:
412162306a36Sopenharmony_ci	path_put(&to_path);
412262306a36Sopenharmony_ciout_from:
412362306a36Sopenharmony_ci	path_put(&from_path);
412462306a36Sopenharmony_ci	return ret;
412562306a36Sopenharmony_ci}
412662306a36Sopenharmony_ci
412762306a36Sopenharmony_ci/*
412862306a36Sopenharmony_ci * Return true if path is reachable from root
412962306a36Sopenharmony_ci *
413062306a36Sopenharmony_ci * namespace_sem or mount_lock is held
413162306a36Sopenharmony_ci */
413262306a36Sopenharmony_cibool is_path_reachable(struct mount *mnt, struct dentry *dentry,
413362306a36Sopenharmony_ci			 const struct path *root)
413462306a36Sopenharmony_ci{
413562306a36Sopenharmony_ci	while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) {
413662306a36Sopenharmony_ci		dentry = mnt->mnt_mountpoint;
413762306a36Sopenharmony_ci		mnt = mnt->mnt_parent;
413862306a36Sopenharmony_ci	}
413962306a36Sopenharmony_ci	return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry);
414062306a36Sopenharmony_ci}
414162306a36Sopenharmony_ci
414262306a36Sopenharmony_cibool path_is_under(const struct path *path1, const struct path *path2)
414362306a36Sopenharmony_ci{
414462306a36Sopenharmony_ci	bool res;
414562306a36Sopenharmony_ci	read_seqlock_excl(&mount_lock);
414662306a36Sopenharmony_ci	res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2);
414762306a36Sopenharmony_ci	read_sequnlock_excl(&mount_lock);
414862306a36Sopenharmony_ci	return res;
414962306a36Sopenharmony_ci}
415062306a36Sopenharmony_ciEXPORT_SYMBOL(path_is_under);
415162306a36Sopenharmony_ci
415262306a36Sopenharmony_ci/*
415362306a36Sopenharmony_ci * pivot_root Semantics:
415462306a36Sopenharmony_ci * Moves the root file system of the current process to the directory put_old,
415562306a36Sopenharmony_ci * makes new_root as the new root file system of the current process, and sets
415662306a36Sopenharmony_ci * root/cwd of all processes which had them on the current root to new_root.
415762306a36Sopenharmony_ci *
415862306a36Sopenharmony_ci * Restrictions:
415962306a36Sopenharmony_ci * The new_root and put_old must be directories, and  must not be on the
416062306a36Sopenharmony_ci * same file  system as the current process root. The put_old  must  be
416162306a36Sopenharmony_ci * underneath new_root,  i.e. adding a non-zero number of /.. to the string
416262306a36Sopenharmony_ci * pointed to by put_old must yield the same directory as new_root. No other
416362306a36Sopenharmony_ci * file system may be mounted on put_old. After all, new_root is a mountpoint.
416462306a36Sopenharmony_ci *
416562306a36Sopenharmony_ci * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem.
416662306a36Sopenharmony_ci * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives
416762306a36Sopenharmony_ci * in this situation.
416862306a36Sopenharmony_ci *
416962306a36Sopenharmony_ci * Notes:
417062306a36Sopenharmony_ci *  - we don't move root/cwd if they are not at the root (reason: if something
417162306a36Sopenharmony_ci *    cared enough to change them, it's probably wrong to force them elsewhere)
417262306a36Sopenharmony_ci *  - it's okay to pick a root that isn't the root of a file system, e.g.
417362306a36Sopenharmony_ci *    /nfs/my_root where /nfs is the mount point. It must be a mountpoint,
417462306a36Sopenharmony_ci *    though, so you may need to say mount --bind /nfs/my_root /nfs/my_root
417562306a36Sopenharmony_ci *    first.
417662306a36Sopenharmony_ci */
417762306a36Sopenharmony_ciSYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
417862306a36Sopenharmony_ci		const char __user *, put_old)
417962306a36Sopenharmony_ci{
418062306a36Sopenharmony_ci	struct path new, old, root;
418162306a36Sopenharmony_ci	struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent;
418262306a36Sopenharmony_ci	struct mountpoint *old_mp, *root_mp;
418362306a36Sopenharmony_ci	int error;
418462306a36Sopenharmony_ci
418562306a36Sopenharmony_ci	if (!may_mount())
418662306a36Sopenharmony_ci		return -EPERM;
418762306a36Sopenharmony_ci
418862306a36Sopenharmony_ci	error = user_path_at(AT_FDCWD, new_root,
418962306a36Sopenharmony_ci			     LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new);
419062306a36Sopenharmony_ci	if (error)
419162306a36Sopenharmony_ci		goto out0;
419262306a36Sopenharmony_ci
419362306a36Sopenharmony_ci	error = user_path_at(AT_FDCWD, put_old,
419462306a36Sopenharmony_ci			     LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old);
419562306a36Sopenharmony_ci	if (error)
419662306a36Sopenharmony_ci		goto out1;
419762306a36Sopenharmony_ci
419862306a36Sopenharmony_ci	error = security_sb_pivotroot(&old, &new);
419962306a36Sopenharmony_ci	if (error)
420062306a36Sopenharmony_ci		goto out2;
420162306a36Sopenharmony_ci
420262306a36Sopenharmony_ci	get_fs_root(current->fs, &root);
420362306a36Sopenharmony_ci	old_mp = lock_mount(&old);
420462306a36Sopenharmony_ci	error = PTR_ERR(old_mp);
420562306a36Sopenharmony_ci	if (IS_ERR(old_mp))
420662306a36Sopenharmony_ci		goto out3;
420762306a36Sopenharmony_ci
420862306a36Sopenharmony_ci	error = -EINVAL;
420962306a36Sopenharmony_ci	new_mnt = real_mount(new.mnt);
421062306a36Sopenharmony_ci	root_mnt = real_mount(root.mnt);
421162306a36Sopenharmony_ci	old_mnt = real_mount(old.mnt);
421262306a36Sopenharmony_ci	ex_parent = new_mnt->mnt_parent;
421362306a36Sopenharmony_ci	root_parent = root_mnt->mnt_parent;
421462306a36Sopenharmony_ci	if (IS_MNT_SHARED(old_mnt) ||
421562306a36Sopenharmony_ci		IS_MNT_SHARED(ex_parent) ||
421662306a36Sopenharmony_ci		IS_MNT_SHARED(root_parent))
421762306a36Sopenharmony_ci		goto out4;
421862306a36Sopenharmony_ci	if (!check_mnt(root_mnt) || !check_mnt(new_mnt))
421962306a36Sopenharmony_ci		goto out4;
422062306a36Sopenharmony_ci	if (new_mnt->mnt.mnt_flags & MNT_LOCKED)
422162306a36Sopenharmony_ci		goto out4;
422262306a36Sopenharmony_ci	error = -ENOENT;
422362306a36Sopenharmony_ci	if (d_unlinked(new.dentry))
422462306a36Sopenharmony_ci		goto out4;
422562306a36Sopenharmony_ci	error = -EBUSY;
422662306a36Sopenharmony_ci	if (new_mnt == root_mnt || old_mnt == root_mnt)
422762306a36Sopenharmony_ci		goto out4; /* loop, on the same file system  */
422862306a36Sopenharmony_ci	error = -EINVAL;
422962306a36Sopenharmony_ci	if (!path_mounted(&root))
423062306a36Sopenharmony_ci		goto out4; /* not a mountpoint */
423162306a36Sopenharmony_ci	if (!mnt_has_parent(root_mnt))
423262306a36Sopenharmony_ci		goto out4; /* not attached */
423362306a36Sopenharmony_ci	if (!path_mounted(&new))
423462306a36Sopenharmony_ci		goto out4; /* not a mountpoint */
423562306a36Sopenharmony_ci	if (!mnt_has_parent(new_mnt))
423662306a36Sopenharmony_ci		goto out4; /* not attached */
423762306a36Sopenharmony_ci	/* make sure we can reach put_old from new_root */
423862306a36Sopenharmony_ci	if (!is_path_reachable(old_mnt, old.dentry, &new))
423962306a36Sopenharmony_ci		goto out4;
424062306a36Sopenharmony_ci	/* make certain new is below the root */
424162306a36Sopenharmony_ci	if (!is_path_reachable(new_mnt, new.dentry, &root))
424262306a36Sopenharmony_ci		goto out4;
424362306a36Sopenharmony_ci	lock_mount_hash();
424462306a36Sopenharmony_ci	umount_mnt(new_mnt);
424562306a36Sopenharmony_ci	root_mp = unhash_mnt(root_mnt);  /* we'll need its mountpoint */
424662306a36Sopenharmony_ci	if (root_mnt->mnt.mnt_flags & MNT_LOCKED) {
424762306a36Sopenharmony_ci		new_mnt->mnt.mnt_flags |= MNT_LOCKED;
424862306a36Sopenharmony_ci		root_mnt->mnt.mnt_flags &= ~MNT_LOCKED;
424962306a36Sopenharmony_ci	}
425062306a36Sopenharmony_ci	/* mount old root on put_old */
425162306a36Sopenharmony_ci	attach_mnt(root_mnt, old_mnt, old_mp, false);
425262306a36Sopenharmony_ci	/* mount new_root on / */
425362306a36Sopenharmony_ci	attach_mnt(new_mnt, root_parent, root_mp, false);
425462306a36Sopenharmony_ci	mnt_add_count(root_parent, -1);
425562306a36Sopenharmony_ci	touch_mnt_namespace(current->nsproxy->mnt_ns);
425662306a36Sopenharmony_ci	/* A moved mount should not expire automatically */
425762306a36Sopenharmony_ci	list_del_init(&new_mnt->mnt_expire);
425862306a36Sopenharmony_ci	put_mountpoint(root_mp);
425962306a36Sopenharmony_ci	unlock_mount_hash();
426062306a36Sopenharmony_ci	chroot_fs_refs(&root, &new);
426162306a36Sopenharmony_ci	error = 0;
426262306a36Sopenharmony_ciout4:
426362306a36Sopenharmony_ci	unlock_mount(old_mp);
426462306a36Sopenharmony_ci	if (!error)
426562306a36Sopenharmony_ci		mntput_no_expire(ex_parent);
426662306a36Sopenharmony_ciout3:
426762306a36Sopenharmony_ci	path_put(&root);
426862306a36Sopenharmony_ciout2:
426962306a36Sopenharmony_ci	path_put(&old);
427062306a36Sopenharmony_ciout1:
427162306a36Sopenharmony_ci	path_put(&new);
427262306a36Sopenharmony_ciout0:
427362306a36Sopenharmony_ci	return error;
427462306a36Sopenharmony_ci}
427562306a36Sopenharmony_ci
427662306a36Sopenharmony_cistatic unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
427762306a36Sopenharmony_ci{
427862306a36Sopenharmony_ci	unsigned int flags = mnt->mnt.mnt_flags;
427962306a36Sopenharmony_ci
428062306a36Sopenharmony_ci	/*  flags to clear */
428162306a36Sopenharmony_ci	flags &= ~kattr->attr_clr;
428262306a36Sopenharmony_ci	/* flags to raise */
428362306a36Sopenharmony_ci	flags |= kattr->attr_set;
428462306a36Sopenharmony_ci
428562306a36Sopenharmony_ci	return flags;
428662306a36Sopenharmony_ci}
428762306a36Sopenharmony_ci
428862306a36Sopenharmony_cistatic int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
428962306a36Sopenharmony_ci{
429062306a36Sopenharmony_ci	struct vfsmount *m = &mnt->mnt;
429162306a36Sopenharmony_ci	struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
429262306a36Sopenharmony_ci
429362306a36Sopenharmony_ci	if (!kattr->mnt_idmap)
429462306a36Sopenharmony_ci		return 0;
429562306a36Sopenharmony_ci
429662306a36Sopenharmony_ci	/*
429762306a36Sopenharmony_ci	 * Creating an idmapped mount with the filesystem wide idmapping
429862306a36Sopenharmony_ci	 * doesn't make sense so block that. We don't allow mushy semantics.
429962306a36Sopenharmony_ci	 */
430062306a36Sopenharmony_ci	if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb))
430162306a36Sopenharmony_ci		return -EINVAL;
430262306a36Sopenharmony_ci
430362306a36Sopenharmony_ci	/*
430462306a36Sopenharmony_ci	 * Once a mount has been idmapped we don't allow it to change its
430562306a36Sopenharmony_ci	 * mapping. It makes things simpler and callers can just create
430662306a36Sopenharmony_ci	 * another bind-mount they can idmap if they want to.
430762306a36Sopenharmony_ci	 */
430862306a36Sopenharmony_ci	if (is_idmapped_mnt(m))
430962306a36Sopenharmony_ci		return -EPERM;
431062306a36Sopenharmony_ci
431162306a36Sopenharmony_ci	/* The underlying filesystem doesn't support idmapped mounts yet. */
431262306a36Sopenharmony_ci	if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
431362306a36Sopenharmony_ci		return -EINVAL;
431462306a36Sopenharmony_ci
431562306a36Sopenharmony_ci	/* We're not controlling the superblock. */
431662306a36Sopenharmony_ci	if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
431762306a36Sopenharmony_ci		return -EPERM;
431862306a36Sopenharmony_ci
431962306a36Sopenharmony_ci	/* Mount has already been visible in the filesystem hierarchy. */
432062306a36Sopenharmony_ci	if (!is_anon_ns(mnt->mnt_ns))
432162306a36Sopenharmony_ci		return -EINVAL;
432262306a36Sopenharmony_ci
432362306a36Sopenharmony_ci	return 0;
432462306a36Sopenharmony_ci}
432562306a36Sopenharmony_ci
432662306a36Sopenharmony_ci/**
432762306a36Sopenharmony_ci * mnt_allow_writers() - check whether the attribute change allows writers
432862306a36Sopenharmony_ci * @kattr: the new mount attributes
432962306a36Sopenharmony_ci * @mnt: the mount to which @kattr will be applied
433062306a36Sopenharmony_ci *
433162306a36Sopenharmony_ci * Check whether thew new mount attributes in @kattr allow concurrent writers.
433262306a36Sopenharmony_ci *
433362306a36Sopenharmony_ci * Return: true if writers need to be held, false if not
433462306a36Sopenharmony_ci */
433562306a36Sopenharmony_cistatic inline bool mnt_allow_writers(const struct mount_kattr *kattr,
433662306a36Sopenharmony_ci				     const struct mount *mnt)
433762306a36Sopenharmony_ci{
433862306a36Sopenharmony_ci	return (!(kattr->attr_set & MNT_READONLY) ||
433962306a36Sopenharmony_ci		(mnt->mnt.mnt_flags & MNT_READONLY)) &&
434062306a36Sopenharmony_ci	       !kattr->mnt_idmap;
434162306a36Sopenharmony_ci}
434262306a36Sopenharmony_ci
434362306a36Sopenharmony_cistatic int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt)
434462306a36Sopenharmony_ci{
434562306a36Sopenharmony_ci	struct mount *m;
434662306a36Sopenharmony_ci	int err;
434762306a36Sopenharmony_ci
434862306a36Sopenharmony_ci	for (m = mnt; m; m = next_mnt(m, mnt)) {
434962306a36Sopenharmony_ci		if (!can_change_locked_flags(m, recalc_flags(kattr, m))) {
435062306a36Sopenharmony_ci			err = -EPERM;
435162306a36Sopenharmony_ci			break;
435262306a36Sopenharmony_ci		}
435362306a36Sopenharmony_ci
435462306a36Sopenharmony_ci		err = can_idmap_mount(kattr, m);
435562306a36Sopenharmony_ci		if (err)
435662306a36Sopenharmony_ci			break;
435762306a36Sopenharmony_ci
435862306a36Sopenharmony_ci		if (!mnt_allow_writers(kattr, m)) {
435962306a36Sopenharmony_ci			err = mnt_hold_writers(m);
436062306a36Sopenharmony_ci			if (err)
436162306a36Sopenharmony_ci				break;
436262306a36Sopenharmony_ci		}
436362306a36Sopenharmony_ci
436462306a36Sopenharmony_ci		if (!kattr->recurse)
436562306a36Sopenharmony_ci			return 0;
436662306a36Sopenharmony_ci	}
436762306a36Sopenharmony_ci
436862306a36Sopenharmony_ci	if (err) {
436962306a36Sopenharmony_ci		struct mount *p;
437062306a36Sopenharmony_ci
437162306a36Sopenharmony_ci		/*
437262306a36Sopenharmony_ci		 * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will
437362306a36Sopenharmony_ci		 * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all
437462306a36Sopenharmony_ci		 * mounts and needs to take care to include the first mount.
437562306a36Sopenharmony_ci		 */
437662306a36Sopenharmony_ci		for (p = mnt; p; p = next_mnt(p, mnt)) {
437762306a36Sopenharmony_ci			/* If we had to hold writers unblock them. */
437862306a36Sopenharmony_ci			if (p->mnt.mnt_flags & MNT_WRITE_HOLD)
437962306a36Sopenharmony_ci				mnt_unhold_writers(p);
438062306a36Sopenharmony_ci
438162306a36Sopenharmony_ci			/*
438262306a36Sopenharmony_ci			 * We're done once the first mount we changed got
438362306a36Sopenharmony_ci			 * MNT_WRITE_HOLD unset.
438462306a36Sopenharmony_ci			 */
438562306a36Sopenharmony_ci			if (p == m)
438662306a36Sopenharmony_ci				break;
438762306a36Sopenharmony_ci		}
438862306a36Sopenharmony_ci	}
438962306a36Sopenharmony_ci	return err;
439062306a36Sopenharmony_ci}
439162306a36Sopenharmony_ci
439262306a36Sopenharmony_cistatic void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
439362306a36Sopenharmony_ci{
439462306a36Sopenharmony_ci	if (!kattr->mnt_idmap)
439562306a36Sopenharmony_ci		return;
439662306a36Sopenharmony_ci
439762306a36Sopenharmony_ci	/*
439862306a36Sopenharmony_ci	 * Pairs with smp_load_acquire() in mnt_idmap().
439962306a36Sopenharmony_ci	 *
440062306a36Sopenharmony_ci	 * Since we only allow a mount to change the idmapping once and
440162306a36Sopenharmony_ci	 * verified this in can_idmap_mount() we know that the mount has
440262306a36Sopenharmony_ci	 * @nop_mnt_idmap attached to it. So there's no need to drop any
440362306a36Sopenharmony_ci	 * references.
440462306a36Sopenharmony_ci	 */
440562306a36Sopenharmony_ci	smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap));
440662306a36Sopenharmony_ci}
440762306a36Sopenharmony_ci
440862306a36Sopenharmony_cistatic void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt)
440962306a36Sopenharmony_ci{
441062306a36Sopenharmony_ci	struct mount *m;
441162306a36Sopenharmony_ci
441262306a36Sopenharmony_ci	for (m = mnt; m; m = next_mnt(m, mnt)) {
441362306a36Sopenharmony_ci		unsigned int flags;
441462306a36Sopenharmony_ci
441562306a36Sopenharmony_ci		do_idmap_mount(kattr, m);
441662306a36Sopenharmony_ci		flags = recalc_flags(kattr, m);
441762306a36Sopenharmony_ci		WRITE_ONCE(m->mnt.mnt_flags, flags);
441862306a36Sopenharmony_ci
441962306a36Sopenharmony_ci		/* If we had to hold writers unblock them. */
442062306a36Sopenharmony_ci		if (m->mnt.mnt_flags & MNT_WRITE_HOLD)
442162306a36Sopenharmony_ci			mnt_unhold_writers(m);
442262306a36Sopenharmony_ci
442362306a36Sopenharmony_ci		if (kattr->propagation)
442462306a36Sopenharmony_ci			change_mnt_propagation(m, kattr->propagation);
442562306a36Sopenharmony_ci		if (!kattr->recurse)
442662306a36Sopenharmony_ci			break;
442762306a36Sopenharmony_ci	}
442862306a36Sopenharmony_ci	touch_mnt_namespace(mnt->mnt_ns);
442962306a36Sopenharmony_ci}
443062306a36Sopenharmony_ci
443162306a36Sopenharmony_cistatic int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
443262306a36Sopenharmony_ci{
443362306a36Sopenharmony_ci	struct mount *mnt = real_mount(path->mnt);
443462306a36Sopenharmony_ci	int err = 0;
443562306a36Sopenharmony_ci
443662306a36Sopenharmony_ci	if (!path_mounted(path))
443762306a36Sopenharmony_ci		return -EINVAL;
443862306a36Sopenharmony_ci
443962306a36Sopenharmony_ci	if (kattr->mnt_userns) {
444062306a36Sopenharmony_ci		struct mnt_idmap *mnt_idmap;
444162306a36Sopenharmony_ci
444262306a36Sopenharmony_ci		mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns);
444362306a36Sopenharmony_ci		if (IS_ERR(mnt_idmap))
444462306a36Sopenharmony_ci			return PTR_ERR(mnt_idmap);
444562306a36Sopenharmony_ci		kattr->mnt_idmap = mnt_idmap;
444662306a36Sopenharmony_ci	}
444762306a36Sopenharmony_ci
444862306a36Sopenharmony_ci	if (kattr->propagation) {
444962306a36Sopenharmony_ci		/*
445062306a36Sopenharmony_ci		 * Only take namespace_lock() if we're actually changing
445162306a36Sopenharmony_ci		 * propagation.
445262306a36Sopenharmony_ci		 */
445362306a36Sopenharmony_ci		namespace_lock();
445462306a36Sopenharmony_ci		if (kattr->propagation == MS_SHARED) {
445562306a36Sopenharmony_ci			err = invent_group_ids(mnt, kattr->recurse);
445662306a36Sopenharmony_ci			if (err) {
445762306a36Sopenharmony_ci				namespace_unlock();
445862306a36Sopenharmony_ci				return err;
445962306a36Sopenharmony_ci			}
446062306a36Sopenharmony_ci		}
446162306a36Sopenharmony_ci	}
446262306a36Sopenharmony_ci
446362306a36Sopenharmony_ci	err = -EINVAL;
446462306a36Sopenharmony_ci	lock_mount_hash();
446562306a36Sopenharmony_ci
446662306a36Sopenharmony_ci	/* Ensure that this isn't anything purely vfs internal. */
446762306a36Sopenharmony_ci	if (!is_mounted(&mnt->mnt))
446862306a36Sopenharmony_ci		goto out;
446962306a36Sopenharmony_ci
447062306a36Sopenharmony_ci	/*
447162306a36Sopenharmony_ci	 * If this is an attached mount make sure it's located in the callers
447262306a36Sopenharmony_ci	 * mount namespace. If it's not don't let the caller interact with it.
447362306a36Sopenharmony_ci	 *
447462306a36Sopenharmony_ci	 * If this mount doesn't have a parent it's most often simply a
447562306a36Sopenharmony_ci	 * detached mount with an anonymous mount namespace. IOW, something
447662306a36Sopenharmony_ci	 * that's simply not attached yet. But there are apparently also users
447762306a36Sopenharmony_ci	 * that do change mount properties on the rootfs itself. That obviously
447862306a36Sopenharmony_ci	 * neither has a parent nor is it a detached mount so we cannot
447962306a36Sopenharmony_ci	 * unconditionally check for detached mounts.
448062306a36Sopenharmony_ci	 */
448162306a36Sopenharmony_ci	if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt))
448262306a36Sopenharmony_ci		goto out;
448362306a36Sopenharmony_ci
448462306a36Sopenharmony_ci	/*
448562306a36Sopenharmony_ci	 * First, we get the mount tree in a shape where we can change mount
448662306a36Sopenharmony_ci	 * properties without failure. If we succeeded to do so we commit all
448762306a36Sopenharmony_ci	 * changes and if we failed we clean up.
448862306a36Sopenharmony_ci	 */
448962306a36Sopenharmony_ci	err = mount_setattr_prepare(kattr, mnt);
449062306a36Sopenharmony_ci	if (!err)
449162306a36Sopenharmony_ci		mount_setattr_commit(kattr, mnt);
449262306a36Sopenharmony_ci
449362306a36Sopenharmony_ciout:
449462306a36Sopenharmony_ci	unlock_mount_hash();
449562306a36Sopenharmony_ci
449662306a36Sopenharmony_ci	if (kattr->propagation) {
449762306a36Sopenharmony_ci		if (err)
449862306a36Sopenharmony_ci			cleanup_group_ids(mnt, NULL);
449962306a36Sopenharmony_ci		namespace_unlock();
450062306a36Sopenharmony_ci	}
450162306a36Sopenharmony_ci
450262306a36Sopenharmony_ci	return err;
450362306a36Sopenharmony_ci}
450462306a36Sopenharmony_ci
450562306a36Sopenharmony_cistatic int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
450662306a36Sopenharmony_ci				struct mount_kattr *kattr, unsigned int flags)
450762306a36Sopenharmony_ci{
450862306a36Sopenharmony_ci	int err = 0;
450962306a36Sopenharmony_ci	struct ns_common *ns;
451062306a36Sopenharmony_ci	struct user_namespace *mnt_userns;
451162306a36Sopenharmony_ci	struct fd f;
451262306a36Sopenharmony_ci
451362306a36Sopenharmony_ci	if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
451462306a36Sopenharmony_ci		return 0;
451562306a36Sopenharmony_ci
451662306a36Sopenharmony_ci	/*
451762306a36Sopenharmony_ci	 * We currently do not support clearing an idmapped mount. If this ever
451862306a36Sopenharmony_ci	 * is a use-case we can revisit this but for now let's keep it simple
451962306a36Sopenharmony_ci	 * and not allow it.
452062306a36Sopenharmony_ci	 */
452162306a36Sopenharmony_ci	if (attr->attr_clr & MOUNT_ATTR_IDMAP)
452262306a36Sopenharmony_ci		return -EINVAL;
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci	if (attr->userns_fd > INT_MAX)
452562306a36Sopenharmony_ci		return -EINVAL;
452662306a36Sopenharmony_ci
452762306a36Sopenharmony_ci	f = fdget(attr->userns_fd);
452862306a36Sopenharmony_ci	if (!f.file)
452962306a36Sopenharmony_ci		return -EBADF;
453062306a36Sopenharmony_ci
453162306a36Sopenharmony_ci	if (!proc_ns_file(f.file)) {
453262306a36Sopenharmony_ci		err = -EINVAL;
453362306a36Sopenharmony_ci		goto out_fput;
453462306a36Sopenharmony_ci	}
453562306a36Sopenharmony_ci
453662306a36Sopenharmony_ci	ns = get_proc_ns(file_inode(f.file));
453762306a36Sopenharmony_ci	if (ns->ops->type != CLONE_NEWUSER) {
453862306a36Sopenharmony_ci		err = -EINVAL;
453962306a36Sopenharmony_ci		goto out_fput;
454062306a36Sopenharmony_ci	}
454162306a36Sopenharmony_ci
454262306a36Sopenharmony_ci	/*
454362306a36Sopenharmony_ci	 * The initial idmapping cannot be used to create an idmapped
454462306a36Sopenharmony_ci	 * mount. We use the initial idmapping as an indicator of a mount
454562306a36Sopenharmony_ci	 * that is not idmapped. It can simply be passed into helpers that
454662306a36Sopenharmony_ci	 * are aware of idmapped mounts as a convenient shortcut. A user
454762306a36Sopenharmony_ci	 * can just create a dedicated identity mapping to achieve the same
454862306a36Sopenharmony_ci	 * result.
454962306a36Sopenharmony_ci	 */
455062306a36Sopenharmony_ci	mnt_userns = container_of(ns, struct user_namespace, ns);
455162306a36Sopenharmony_ci	if (mnt_userns == &init_user_ns) {
455262306a36Sopenharmony_ci		err = -EPERM;
455362306a36Sopenharmony_ci		goto out_fput;
455462306a36Sopenharmony_ci	}
455562306a36Sopenharmony_ci
455662306a36Sopenharmony_ci	/* We're not controlling the target namespace. */
455762306a36Sopenharmony_ci	if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) {
455862306a36Sopenharmony_ci		err = -EPERM;
455962306a36Sopenharmony_ci		goto out_fput;
456062306a36Sopenharmony_ci	}
456162306a36Sopenharmony_ci
456262306a36Sopenharmony_ci	kattr->mnt_userns = get_user_ns(mnt_userns);
456362306a36Sopenharmony_ci
456462306a36Sopenharmony_ciout_fput:
456562306a36Sopenharmony_ci	fdput(f);
456662306a36Sopenharmony_ci	return err;
456762306a36Sopenharmony_ci}
456862306a36Sopenharmony_ci
456962306a36Sopenharmony_cistatic int build_mount_kattr(const struct mount_attr *attr, size_t usize,
457062306a36Sopenharmony_ci			     struct mount_kattr *kattr, unsigned int flags)
457162306a36Sopenharmony_ci{
457262306a36Sopenharmony_ci	unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
457362306a36Sopenharmony_ci
457462306a36Sopenharmony_ci	if (flags & AT_NO_AUTOMOUNT)
457562306a36Sopenharmony_ci		lookup_flags &= ~LOOKUP_AUTOMOUNT;
457662306a36Sopenharmony_ci	if (flags & AT_SYMLINK_NOFOLLOW)
457762306a36Sopenharmony_ci		lookup_flags &= ~LOOKUP_FOLLOW;
457862306a36Sopenharmony_ci	if (flags & AT_EMPTY_PATH)
457962306a36Sopenharmony_ci		lookup_flags |= LOOKUP_EMPTY;
458062306a36Sopenharmony_ci
458162306a36Sopenharmony_ci	*kattr = (struct mount_kattr) {
458262306a36Sopenharmony_ci		.lookup_flags	= lookup_flags,
458362306a36Sopenharmony_ci		.recurse	= !!(flags & AT_RECURSIVE),
458462306a36Sopenharmony_ci	};
458562306a36Sopenharmony_ci
458662306a36Sopenharmony_ci	if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
458762306a36Sopenharmony_ci		return -EINVAL;
458862306a36Sopenharmony_ci	if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
458962306a36Sopenharmony_ci		return -EINVAL;
459062306a36Sopenharmony_ci	kattr->propagation = attr->propagation;
459162306a36Sopenharmony_ci
459262306a36Sopenharmony_ci	if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
459362306a36Sopenharmony_ci		return -EINVAL;
459462306a36Sopenharmony_ci
459562306a36Sopenharmony_ci	kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
459662306a36Sopenharmony_ci	kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
459762306a36Sopenharmony_ci
459862306a36Sopenharmony_ci	/*
459962306a36Sopenharmony_ci	 * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap,
460062306a36Sopenharmony_ci	 * users wanting to transition to a different atime setting cannot
460162306a36Sopenharmony_ci	 * simply specify the atime setting in @attr_set, but must also
460262306a36Sopenharmony_ci	 * specify MOUNT_ATTR__ATIME in the @attr_clr field.
460362306a36Sopenharmony_ci	 * So ensure that MOUNT_ATTR__ATIME can't be partially set in
460462306a36Sopenharmony_ci	 * @attr_clr and that @attr_set can't have any atime bits set if
460562306a36Sopenharmony_ci	 * MOUNT_ATTR__ATIME isn't set in @attr_clr.
460662306a36Sopenharmony_ci	 */
460762306a36Sopenharmony_ci	if (attr->attr_clr & MOUNT_ATTR__ATIME) {
460862306a36Sopenharmony_ci		if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
460962306a36Sopenharmony_ci			return -EINVAL;
461062306a36Sopenharmony_ci
461162306a36Sopenharmony_ci		/*
461262306a36Sopenharmony_ci		 * Clear all previous time settings as they are mutually
461362306a36Sopenharmony_ci		 * exclusive.
461462306a36Sopenharmony_ci		 */
461562306a36Sopenharmony_ci		kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
461662306a36Sopenharmony_ci		switch (attr->attr_set & MOUNT_ATTR__ATIME) {
461762306a36Sopenharmony_ci		case MOUNT_ATTR_RELATIME:
461862306a36Sopenharmony_ci			kattr->attr_set |= MNT_RELATIME;
461962306a36Sopenharmony_ci			break;
462062306a36Sopenharmony_ci		case MOUNT_ATTR_NOATIME:
462162306a36Sopenharmony_ci			kattr->attr_set |= MNT_NOATIME;
462262306a36Sopenharmony_ci			break;
462362306a36Sopenharmony_ci		case MOUNT_ATTR_STRICTATIME:
462462306a36Sopenharmony_ci			break;
462562306a36Sopenharmony_ci		default:
462662306a36Sopenharmony_ci			return -EINVAL;
462762306a36Sopenharmony_ci		}
462862306a36Sopenharmony_ci	} else {
462962306a36Sopenharmony_ci		if (attr->attr_set & MOUNT_ATTR__ATIME)
463062306a36Sopenharmony_ci			return -EINVAL;
463162306a36Sopenharmony_ci	}
463262306a36Sopenharmony_ci
463362306a36Sopenharmony_ci	return build_mount_idmapped(attr, usize, kattr, flags);
463462306a36Sopenharmony_ci}
463562306a36Sopenharmony_ci
463662306a36Sopenharmony_cistatic void finish_mount_kattr(struct mount_kattr *kattr)
463762306a36Sopenharmony_ci{
463862306a36Sopenharmony_ci	put_user_ns(kattr->mnt_userns);
463962306a36Sopenharmony_ci	kattr->mnt_userns = NULL;
464062306a36Sopenharmony_ci
464162306a36Sopenharmony_ci	if (kattr->mnt_idmap)
464262306a36Sopenharmony_ci		mnt_idmap_put(kattr->mnt_idmap);
464362306a36Sopenharmony_ci}
464462306a36Sopenharmony_ci
464562306a36Sopenharmony_ciSYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
464662306a36Sopenharmony_ci		unsigned int, flags, struct mount_attr __user *, uattr,
464762306a36Sopenharmony_ci		size_t, usize)
464862306a36Sopenharmony_ci{
464962306a36Sopenharmony_ci	int err;
465062306a36Sopenharmony_ci	struct path target;
465162306a36Sopenharmony_ci	struct mount_attr attr;
465262306a36Sopenharmony_ci	struct mount_kattr kattr;
465362306a36Sopenharmony_ci
465462306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
465562306a36Sopenharmony_ci
465662306a36Sopenharmony_ci	if (flags & ~(AT_EMPTY_PATH |
465762306a36Sopenharmony_ci		      AT_RECURSIVE |
465862306a36Sopenharmony_ci		      AT_SYMLINK_NOFOLLOW |
465962306a36Sopenharmony_ci		      AT_NO_AUTOMOUNT))
466062306a36Sopenharmony_ci		return -EINVAL;
466162306a36Sopenharmony_ci
466262306a36Sopenharmony_ci	if (unlikely(usize > PAGE_SIZE))
466362306a36Sopenharmony_ci		return -E2BIG;
466462306a36Sopenharmony_ci	if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
466562306a36Sopenharmony_ci		return -EINVAL;
466662306a36Sopenharmony_ci
466762306a36Sopenharmony_ci	if (!may_mount())
466862306a36Sopenharmony_ci		return -EPERM;
466962306a36Sopenharmony_ci
467062306a36Sopenharmony_ci	err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
467162306a36Sopenharmony_ci	if (err)
467262306a36Sopenharmony_ci		return err;
467362306a36Sopenharmony_ci
467462306a36Sopenharmony_ci	/* Don't bother walking through the mounts if this is a nop. */
467562306a36Sopenharmony_ci	if (attr.attr_set == 0 &&
467662306a36Sopenharmony_ci	    attr.attr_clr == 0 &&
467762306a36Sopenharmony_ci	    attr.propagation == 0)
467862306a36Sopenharmony_ci		return 0;
467962306a36Sopenharmony_ci
468062306a36Sopenharmony_ci	err = build_mount_kattr(&attr, usize, &kattr, flags);
468162306a36Sopenharmony_ci	if (err)
468262306a36Sopenharmony_ci		return err;
468362306a36Sopenharmony_ci
468462306a36Sopenharmony_ci	err = user_path_at(dfd, path, kattr.lookup_flags, &target);
468562306a36Sopenharmony_ci	if (!err) {
468662306a36Sopenharmony_ci		err = do_mount_setattr(&target, &kattr);
468762306a36Sopenharmony_ci		path_put(&target);
468862306a36Sopenharmony_ci	}
468962306a36Sopenharmony_ci	finish_mount_kattr(&kattr);
469062306a36Sopenharmony_ci	return err;
469162306a36Sopenharmony_ci}
469262306a36Sopenharmony_ci
469362306a36Sopenharmony_cistatic void __init init_mount_tree(void)
469462306a36Sopenharmony_ci{
469562306a36Sopenharmony_ci	struct vfsmount *mnt;
469662306a36Sopenharmony_ci	struct mount *m;
469762306a36Sopenharmony_ci	struct mnt_namespace *ns;
469862306a36Sopenharmony_ci	struct path root;
469962306a36Sopenharmony_ci
470062306a36Sopenharmony_ci	mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL);
470162306a36Sopenharmony_ci	if (IS_ERR(mnt))
470262306a36Sopenharmony_ci		panic("Can't create rootfs");
470362306a36Sopenharmony_ci
470462306a36Sopenharmony_ci	ns = alloc_mnt_ns(&init_user_ns, false);
470562306a36Sopenharmony_ci	if (IS_ERR(ns))
470662306a36Sopenharmony_ci		panic("Can't allocate initial namespace");
470762306a36Sopenharmony_ci	m = real_mount(mnt);
470862306a36Sopenharmony_ci	m->mnt_ns = ns;
470962306a36Sopenharmony_ci	ns->root = m;
471062306a36Sopenharmony_ci	ns->mounts = 1;
471162306a36Sopenharmony_ci	list_add(&m->mnt_list, &ns->list);
471262306a36Sopenharmony_ci	init_task.nsproxy->mnt_ns = ns;
471362306a36Sopenharmony_ci	get_mnt_ns(ns);
471462306a36Sopenharmony_ci
471562306a36Sopenharmony_ci	root.mnt = mnt;
471662306a36Sopenharmony_ci	root.dentry = mnt->mnt_root;
471762306a36Sopenharmony_ci	mnt->mnt_flags |= MNT_LOCKED;
471862306a36Sopenharmony_ci
471962306a36Sopenharmony_ci	set_fs_pwd(current->fs, &root);
472062306a36Sopenharmony_ci	set_fs_root(current->fs, &root);
472162306a36Sopenharmony_ci}
472262306a36Sopenharmony_ci
472362306a36Sopenharmony_civoid __init mnt_init(void)
472462306a36Sopenharmony_ci{
472562306a36Sopenharmony_ci	int err;
472662306a36Sopenharmony_ci
472762306a36Sopenharmony_ci	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
472862306a36Sopenharmony_ci			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
472962306a36Sopenharmony_ci
473062306a36Sopenharmony_ci	mount_hashtable = alloc_large_system_hash("Mount-cache",
473162306a36Sopenharmony_ci				sizeof(struct hlist_head),
473262306a36Sopenharmony_ci				mhash_entries, 19,
473362306a36Sopenharmony_ci				HASH_ZERO,
473462306a36Sopenharmony_ci				&m_hash_shift, &m_hash_mask, 0, 0);
473562306a36Sopenharmony_ci	mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
473662306a36Sopenharmony_ci				sizeof(struct hlist_head),
473762306a36Sopenharmony_ci				mphash_entries, 19,
473862306a36Sopenharmony_ci				HASH_ZERO,
473962306a36Sopenharmony_ci				&mp_hash_shift, &mp_hash_mask, 0, 0);
474062306a36Sopenharmony_ci
474162306a36Sopenharmony_ci	if (!mount_hashtable || !mountpoint_hashtable)
474262306a36Sopenharmony_ci		panic("Failed to allocate mount hash table\n");
474362306a36Sopenharmony_ci
474462306a36Sopenharmony_ci	kernfs_init();
474562306a36Sopenharmony_ci
474662306a36Sopenharmony_ci	err = sysfs_init();
474762306a36Sopenharmony_ci	if (err)
474862306a36Sopenharmony_ci		printk(KERN_WARNING "%s: sysfs_init error: %d\n",
474962306a36Sopenharmony_ci			__func__, err);
475062306a36Sopenharmony_ci	fs_kobj = kobject_create_and_add("fs", NULL);
475162306a36Sopenharmony_ci	if (!fs_kobj)
475262306a36Sopenharmony_ci		printk(KERN_WARNING "%s: kobj create error\n", __func__);
475362306a36Sopenharmony_ci	shmem_init();
475462306a36Sopenharmony_ci	init_rootfs();
475562306a36Sopenharmony_ci	init_mount_tree();
475662306a36Sopenharmony_ci}
475762306a36Sopenharmony_ci
475862306a36Sopenharmony_civoid put_mnt_ns(struct mnt_namespace *ns)
475962306a36Sopenharmony_ci{
476062306a36Sopenharmony_ci	if (!refcount_dec_and_test(&ns->ns.count))
476162306a36Sopenharmony_ci		return;
476262306a36Sopenharmony_ci	drop_collected_mounts(&ns->root->mnt);
476362306a36Sopenharmony_ci	free_mnt_ns(ns);
476462306a36Sopenharmony_ci}
476562306a36Sopenharmony_ci
476662306a36Sopenharmony_cistruct vfsmount *kern_mount(struct file_system_type *type)
476762306a36Sopenharmony_ci{
476862306a36Sopenharmony_ci	struct vfsmount *mnt;
476962306a36Sopenharmony_ci	mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
477062306a36Sopenharmony_ci	if (!IS_ERR(mnt)) {
477162306a36Sopenharmony_ci		/*
477262306a36Sopenharmony_ci		 * it is a longterm mount, don't release mnt until
477362306a36Sopenharmony_ci		 * we unmount before file sys is unregistered
477462306a36Sopenharmony_ci		*/
477562306a36Sopenharmony_ci		real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL;
477662306a36Sopenharmony_ci	}
477762306a36Sopenharmony_ci	return mnt;
477862306a36Sopenharmony_ci}
477962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kern_mount);
478062306a36Sopenharmony_ci
478162306a36Sopenharmony_civoid kern_unmount(struct vfsmount *mnt)
478262306a36Sopenharmony_ci{
478362306a36Sopenharmony_ci	/* release long term mount so mount point can be released */
478462306a36Sopenharmony_ci	if (!IS_ERR(mnt)) {
478562306a36Sopenharmony_ci		mnt_make_shortterm(mnt);
478662306a36Sopenharmony_ci		synchronize_rcu();	/* yecchhh... */
478762306a36Sopenharmony_ci		mntput(mnt);
478862306a36Sopenharmony_ci	}
478962306a36Sopenharmony_ci}
479062306a36Sopenharmony_ciEXPORT_SYMBOL(kern_unmount);
479162306a36Sopenharmony_ci
479262306a36Sopenharmony_civoid kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
479362306a36Sopenharmony_ci{
479462306a36Sopenharmony_ci	unsigned int i;
479562306a36Sopenharmony_ci
479662306a36Sopenharmony_ci	for (i = 0; i < num; i++)
479762306a36Sopenharmony_ci		mnt_make_shortterm(mnt[i]);
479862306a36Sopenharmony_ci	synchronize_rcu_expedited();
479962306a36Sopenharmony_ci	for (i = 0; i < num; i++)
480062306a36Sopenharmony_ci		mntput(mnt[i]);
480162306a36Sopenharmony_ci}
480262306a36Sopenharmony_ciEXPORT_SYMBOL(kern_unmount_array);
480362306a36Sopenharmony_ci
480462306a36Sopenharmony_cibool our_mnt(struct vfsmount *mnt)
480562306a36Sopenharmony_ci{
480662306a36Sopenharmony_ci	return check_mnt(real_mount(mnt));
480762306a36Sopenharmony_ci}
480862306a36Sopenharmony_ci
480962306a36Sopenharmony_cibool current_chrooted(void)
481062306a36Sopenharmony_ci{
481162306a36Sopenharmony_ci	/* Does the current process have a non-standard root */
481262306a36Sopenharmony_ci	struct path ns_root;
481362306a36Sopenharmony_ci	struct path fs_root;
481462306a36Sopenharmony_ci	bool chrooted;
481562306a36Sopenharmony_ci
481662306a36Sopenharmony_ci	/* Find the namespace root */
481762306a36Sopenharmony_ci	ns_root.mnt = &current->nsproxy->mnt_ns->root->mnt;
481862306a36Sopenharmony_ci	ns_root.dentry = ns_root.mnt->mnt_root;
481962306a36Sopenharmony_ci	path_get(&ns_root);
482062306a36Sopenharmony_ci	while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root))
482162306a36Sopenharmony_ci		;
482262306a36Sopenharmony_ci
482362306a36Sopenharmony_ci	get_fs_root(current->fs, &fs_root);
482462306a36Sopenharmony_ci
482562306a36Sopenharmony_ci	chrooted = !path_equal(&fs_root, &ns_root);
482662306a36Sopenharmony_ci
482762306a36Sopenharmony_ci	path_put(&fs_root);
482862306a36Sopenharmony_ci	path_put(&ns_root);
482962306a36Sopenharmony_ci
483062306a36Sopenharmony_ci	return chrooted;
483162306a36Sopenharmony_ci}
483262306a36Sopenharmony_ci
483362306a36Sopenharmony_cistatic bool mnt_already_visible(struct mnt_namespace *ns,
483462306a36Sopenharmony_ci				const struct super_block *sb,
483562306a36Sopenharmony_ci				int *new_mnt_flags)
483662306a36Sopenharmony_ci{
483762306a36Sopenharmony_ci	int new_flags = *new_mnt_flags;
483862306a36Sopenharmony_ci	struct mount *mnt;
483962306a36Sopenharmony_ci	bool visible = false;
484062306a36Sopenharmony_ci
484162306a36Sopenharmony_ci	down_read(&namespace_sem);
484262306a36Sopenharmony_ci	lock_ns_list(ns);
484362306a36Sopenharmony_ci	list_for_each_entry(mnt, &ns->list, mnt_list) {
484462306a36Sopenharmony_ci		struct mount *child;
484562306a36Sopenharmony_ci		int mnt_flags;
484662306a36Sopenharmony_ci
484762306a36Sopenharmony_ci		if (mnt_is_cursor(mnt))
484862306a36Sopenharmony_ci			continue;
484962306a36Sopenharmony_ci
485062306a36Sopenharmony_ci		if (mnt->mnt.mnt_sb->s_type != sb->s_type)
485162306a36Sopenharmony_ci			continue;
485262306a36Sopenharmony_ci
485362306a36Sopenharmony_ci		/* This mount is not fully visible if it's root directory
485462306a36Sopenharmony_ci		 * is not the root directory of the filesystem.
485562306a36Sopenharmony_ci		 */
485662306a36Sopenharmony_ci		if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root)
485762306a36Sopenharmony_ci			continue;
485862306a36Sopenharmony_ci
485962306a36Sopenharmony_ci		/* A local view of the mount flags */
486062306a36Sopenharmony_ci		mnt_flags = mnt->mnt.mnt_flags;
486162306a36Sopenharmony_ci
486262306a36Sopenharmony_ci		/* Don't miss readonly hidden in the superblock flags */
486362306a36Sopenharmony_ci		if (sb_rdonly(mnt->mnt.mnt_sb))
486462306a36Sopenharmony_ci			mnt_flags |= MNT_LOCK_READONLY;
486562306a36Sopenharmony_ci
486662306a36Sopenharmony_ci		/* Verify the mount flags are equal to or more permissive
486762306a36Sopenharmony_ci		 * than the proposed new mount.
486862306a36Sopenharmony_ci		 */
486962306a36Sopenharmony_ci		if ((mnt_flags & MNT_LOCK_READONLY) &&
487062306a36Sopenharmony_ci		    !(new_flags & MNT_READONLY))
487162306a36Sopenharmony_ci			continue;
487262306a36Sopenharmony_ci		if ((mnt_flags & MNT_LOCK_ATIME) &&
487362306a36Sopenharmony_ci		    ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK)))
487462306a36Sopenharmony_ci			continue;
487562306a36Sopenharmony_ci
487662306a36Sopenharmony_ci		/* This mount is not fully visible if there are any
487762306a36Sopenharmony_ci		 * locked child mounts that cover anything except for
487862306a36Sopenharmony_ci		 * empty directories.
487962306a36Sopenharmony_ci		 */
488062306a36Sopenharmony_ci		list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
488162306a36Sopenharmony_ci			struct inode *inode = child->mnt_mountpoint->d_inode;
488262306a36Sopenharmony_ci			/* Only worry about locked mounts */
488362306a36Sopenharmony_ci			if (!(child->mnt.mnt_flags & MNT_LOCKED))
488462306a36Sopenharmony_ci				continue;
488562306a36Sopenharmony_ci			/* Is the directory permanetly empty? */
488662306a36Sopenharmony_ci			if (!is_empty_dir_inode(inode))
488762306a36Sopenharmony_ci				goto next;
488862306a36Sopenharmony_ci		}
488962306a36Sopenharmony_ci		/* Preserve the locked attributes */
489062306a36Sopenharmony_ci		*new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \
489162306a36Sopenharmony_ci					       MNT_LOCK_ATIME);
489262306a36Sopenharmony_ci		visible = true;
489362306a36Sopenharmony_ci		goto found;
489462306a36Sopenharmony_ci	next:	;
489562306a36Sopenharmony_ci	}
489662306a36Sopenharmony_cifound:
489762306a36Sopenharmony_ci	unlock_ns_list(ns);
489862306a36Sopenharmony_ci	up_read(&namespace_sem);
489962306a36Sopenharmony_ci	return visible;
490062306a36Sopenharmony_ci}
490162306a36Sopenharmony_ci
490262306a36Sopenharmony_cistatic bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags)
490362306a36Sopenharmony_ci{
490462306a36Sopenharmony_ci	const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV;
490562306a36Sopenharmony_ci	struct mnt_namespace *ns = current->nsproxy->mnt_ns;
490662306a36Sopenharmony_ci	unsigned long s_iflags;
490762306a36Sopenharmony_ci
490862306a36Sopenharmony_ci	if (ns->user_ns == &init_user_ns)
490962306a36Sopenharmony_ci		return false;
491062306a36Sopenharmony_ci
491162306a36Sopenharmony_ci	/* Can this filesystem be too revealing? */
491262306a36Sopenharmony_ci	s_iflags = sb->s_iflags;
491362306a36Sopenharmony_ci	if (!(s_iflags & SB_I_USERNS_VISIBLE))
491462306a36Sopenharmony_ci		return false;
491562306a36Sopenharmony_ci
491662306a36Sopenharmony_ci	if ((s_iflags & required_iflags) != required_iflags) {
491762306a36Sopenharmony_ci		WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n",
491862306a36Sopenharmony_ci			  required_iflags);
491962306a36Sopenharmony_ci		return true;
492062306a36Sopenharmony_ci	}
492162306a36Sopenharmony_ci
492262306a36Sopenharmony_ci	return !mnt_already_visible(ns, sb, new_mnt_flags);
492362306a36Sopenharmony_ci}
492462306a36Sopenharmony_ci
492562306a36Sopenharmony_cibool mnt_may_suid(struct vfsmount *mnt)
492662306a36Sopenharmony_ci{
492762306a36Sopenharmony_ci	/*
492862306a36Sopenharmony_ci	 * Foreign mounts (accessed via fchdir or through /proc
492962306a36Sopenharmony_ci	 * symlinks) are always treated as if they are nosuid.  This
493062306a36Sopenharmony_ci	 * prevents namespaces from trusting potentially unsafe
493162306a36Sopenharmony_ci	 * suid/sgid bits, file caps, or security labels that originate
493262306a36Sopenharmony_ci	 * in other namespaces.
493362306a36Sopenharmony_ci	 */
493462306a36Sopenharmony_ci	return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) &&
493562306a36Sopenharmony_ci	       current_in_userns(mnt->mnt_sb->s_user_ns);
493662306a36Sopenharmony_ci}
493762306a36Sopenharmony_ci
493862306a36Sopenharmony_cistatic struct ns_common *mntns_get(struct task_struct *task)
493962306a36Sopenharmony_ci{
494062306a36Sopenharmony_ci	struct ns_common *ns = NULL;
494162306a36Sopenharmony_ci	struct nsproxy *nsproxy;
494262306a36Sopenharmony_ci
494362306a36Sopenharmony_ci	task_lock(task);
494462306a36Sopenharmony_ci	nsproxy = task->nsproxy;
494562306a36Sopenharmony_ci	if (nsproxy) {
494662306a36Sopenharmony_ci		ns = &nsproxy->mnt_ns->ns;
494762306a36Sopenharmony_ci		get_mnt_ns(to_mnt_ns(ns));
494862306a36Sopenharmony_ci	}
494962306a36Sopenharmony_ci	task_unlock(task);
495062306a36Sopenharmony_ci
495162306a36Sopenharmony_ci	return ns;
495262306a36Sopenharmony_ci}
495362306a36Sopenharmony_ci
495462306a36Sopenharmony_cistatic void mntns_put(struct ns_common *ns)
495562306a36Sopenharmony_ci{
495662306a36Sopenharmony_ci	put_mnt_ns(to_mnt_ns(ns));
495762306a36Sopenharmony_ci}
495862306a36Sopenharmony_ci
495962306a36Sopenharmony_cistatic int mntns_install(struct nsset *nsset, struct ns_common *ns)
496062306a36Sopenharmony_ci{
496162306a36Sopenharmony_ci	struct nsproxy *nsproxy = nsset->nsproxy;
496262306a36Sopenharmony_ci	struct fs_struct *fs = nsset->fs;
496362306a36Sopenharmony_ci	struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns;
496462306a36Sopenharmony_ci	struct user_namespace *user_ns = nsset->cred->user_ns;
496562306a36Sopenharmony_ci	struct path root;
496662306a36Sopenharmony_ci	int err;
496762306a36Sopenharmony_ci
496862306a36Sopenharmony_ci	if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) ||
496962306a36Sopenharmony_ci	    !ns_capable(user_ns, CAP_SYS_CHROOT) ||
497062306a36Sopenharmony_ci	    !ns_capable(user_ns, CAP_SYS_ADMIN))
497162306a36Sopenharmony_ci		return -EPERM;
497262306a36Sopenharmony_ci
497362306a36Sopenharmony_ci	if (is_anon_ns(mnt_ns))
497462306a36Sopenharmony_ci		return -EINVAL;
497562306a36Sopenharmony_ci
497662306a36Sopenharmony_ci	if (fs->users != 1)
497762306a36Sopenharmony_ci		return -EINVAL;
497862306a36Sopenharmony_ci
497962306a36Sopenharmony_ci	get_mnt_ns(mnt_ns);
498062306a36Sopenharmony_ci	old_mnt_ns = nsproxy->mnt_ns;
498162306a36Sopenharmony_ci	nsproxy->mnt_ns = mnt_ns;
498262306a36Sopenharmony_ci
498362306a36Sopenharmony_ci	/* Find the root */
498462306a36Sopenharmony_ci	err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt,
498562306a36Sopenharmony_ci				"/", LOOKUP_DOWN, &root);
498662306a36Sopenharmony_ci	if (err) {
498762306a36Sopenharmony_ci		/* revert to old namespace */
498862306a36Sopenharmony_ci		nsproxy->mnt_ns = old_mnt_ns;
498962306a36Sopenharmony_ci		put_mnt_ns(mnt_ns);
499062306a36Sopenharmony_ci		return err;
499162306a36Sopenharmony_ci	}
499262306a36Sopenharmony_ci
499362306a36Sopenharmony_ci	put_mnt_ns(old_mnt_ns);
499462306a36Sopenharmony_ci
499562306a36Sopenharmony_ci	/* Update the pwd and root */
499662306a36Sopenharmony_ci	set_fs_pwd(fs, &root);
499762306a36Sopenharmony_ci	set_fs_root(fs, &root);
499862306a36Sopenharmony_ci
499962306a36Sopenharmony_ci	path_put(&root);
500062306a36Sopenharmony_ci	return 0;
500162306a36Sopenharmony_ci}
500262306a36Sopenharmony_ci
500362306a36Sopenharmony_cistatic struct user_namespace *mntns_owner(struct ns_common *ns)
500462306a36Sopenharmony_ci{
500562306a36Sopenharmony_ci	return to_mnt_ns(ns)->user_ns;
500662306a36Sopenharmony_ci}
500762306a36Sopenharmony_ci
500862306a36Sopenharmony_ciconst struct proc_ns_operations mntns_operations = {
500962306a36Sopenharmony_ci	.name		= "mnt",
501062306a36Sopenharmony_ci	.type		= CLONE_NEWNS,
501162306a36Sopenharmony_ci	.get		= mntns_get,
501262306a36Sopenharmony_ci	.put		= mntns_put,
501362306a36Sopenharmony_ci	.install	= mntns_install,
501462306a36Sopenharmony_ci	.owner		= mntns_owner,
501562306a36Sopenharmony_ci};
501662306a36Sopenharmony_ci
501762306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL
501862306a36Sopenharmony_cistatic struct ctl_table fs_namespace_sysctls[] = {
501962306a36Sopenharmony_ci	{
502062306a36Sopenharmony_ci		.procname	= "mount-max",
502162306a36Sopenharmony_ci		.data		= &sysctl_mount_max,
502262306a36Sopenharmony_ci		.maxlen		= sizeof(unsigned int),
502362306a36Sopenharmony_ci		.mode		= 0644,
502462306a36Sopenharmony_ci		.proc_handler	= proc_dointvec_minmax,
502562306a36Sopenharmony_ci		.extra1		= SYSCTL_ONE,
502662306a36Sopenharmony_ci	},
502762306a36Sopenharmony_ci	{ }
502862306a36Sopenharmony_ci};
502962306a36Sopenharmony_ci
503062306a36Sopenharmony_cistatic int __init init_fs_namespace_sysctls(void)
503162306a36Sopenharmony_ci{
503262306a36Sopenharmony_ci	register_sysctl_init("fs", fs_namespace_sysctls);
503362306a36Sopenharmony_ci	return 0;
503462306a36Sopenharmony_ci}
503562306a36Sopenharmony_cifs_initcall(init_fs_namespace_sysctls);
503662306a36Sopenharmony_ci
503762306a36Sopenharmony_ci#endif /* CONFIG_SYSCTL */
5038