162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/namespace.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * (C) Copyright Al Viro 2000, 2001 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Based on code from fs/super.c, copyright Linus Torvalds and others. 862306a36Sopenharmony_ci * Heavily rewritten. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include <linux/syscalls.h> 1262306a36Sopenharmony_ci#include <linux/export.h> 1362306a36Sopenharmony_ci#include <linux/capability.h> 1462306a36Sopenharmony_ci#include <linux/mnt_namespace.h> 1562306a36Sopenharmony_ci#include <linux/user_namespace.h> 1662306a36Sopenharmony_ci#include <linux/namei.h> 1762306a36Sopenharmony_ci#include <linux/security.h> 1862306a36Sopenharmony_ci#include <linux/cred.h> 1962306a36Sopenharmony_ci#include <linux/idr.h> 2062306a36Sopenharmony_ci#include <linux/init.h> /* init_rootfs */ 2162306a36Sopenharmony_ci#include <linux/fs_struct.h> /* get_fs_root et.al. */ 2262306a36Sopenharmony_ci#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ 2362306a36Sopenharmony_ci#include <linux/file.h> 2462306a36Sopenharmony_ci#include <linux/uaccess.h> 2562306a36Sopenharmony_ci#include <linux/proc_ns.h> 2662306a36Sopenharmony_ci#include <linux/magic.h> 2762306a36Sopenharmony_ci#include <linux/memblock.h> 2862306a36Sopenharmony_ci#include <linux/proc_fs.h> 2962306a36Sopenharmony_ci#include <linux/task_work.h> 3062306a36Sopenharmony_ci#include <linux/sched/task.h> 3162306a36Sopenharmony_ci#include <uapi/linux/mount.h> 3262306a36Sopenharmony_ci#include <linux/fs_context.h> 3362306a36Sopenharmony_ci#include <linux/shmem_fs.h> 3462306a36Sopenharmony_ci#include <linux/mnt_idmapping.h> 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#include "pnode.h" 3762306a36Sopenharmony_ci#include "internal.h" 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* Maximum number of mounts in a mount namespace */ 4062306a36Sopenharmony_cistatic unsigned int sysctl_mount_max __read_mostly = 100000; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistatic unsigned int m_hash_mask __read_mostly; 4362306a36Sopenharmony_cistatic unsigned int m_hash_shift __read_mostly; 4462306a36Sopenharmony_cistatic unsigned int mp_hash_mask __read_mostly; 4562306a36Sopenharmony_cistatic unsigned int mp_hash_shift __read_mostly; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_cistatic __initdata unsigned long mhash_entries; 4862306a36Sopenharmony_cistatic int __init set_mhash_entries(char *str) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci if (!str) 5162306a36Sopenharmony_ci return 0; 5262306a36Sopenharmony_ci mhash_entries = simple_strtoul(str, &str, 0); 5362306a36Sopenharmony_ci return 1; 5462306a36Sopenharmony_ci} 5562306a36Sopenharmony_ci__setup("mhash_entries=", set_mhash_entries); 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cistatic __initdata unsigned long mphash_entries; 5862306a36Sopenharmony_cistatic int __init set_mphash_entries(char *str) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci if (!str) 6162306a36Sopenharmony_ci return 0; 6262306a36Sopenharmony_ci mphash_entries = simple_strtoul(str, &str, 0); 6362306a36Sopenharmony_ci return 1; 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ci__setup("mphash_entries=", set_mphash_entries); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic u64 event; 6862306a36Sopenharmony_cistatic DEFINE_IDA(mnt_id_ida); 6962306a36Sopenharmony_cistatic DEFINE_IDA(mnt_group_ida); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic struct hlist_head *mount_hashtable __read_mostly; 7262306a36Sopenharmony_cistatic struct hlist_head *mountpoint_hashtable __read_mostly; 7362306a36Sopenharmony_cistatic struct kmem_cache *mnt_cache __read_mostly; 7462306a36Sopenharmony_cistatic DECLARE_RWSEM(namespace_sem); 7562306a36Sopenharmony_cistatic HLIST_HEAD(unmounted); /* protected by namespace_sem */ 7662306a36Sopenharmony_cistatic LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistruct mount_kattr { 7962306a36Sopenharmony_ci unsigned int attr_set; 8062306a36Sopenharmony_ci unsigned int attr_clr; 8162306a36Sopenharmony_ci unsigned int propagation; 8262306a36Sopenharmony_ci unsigned int lookup_flags; 8362306a36Sopenharmony_ci bool recurse; 8462306a36Sopenharmony_ci struct user_namespace *mnt_userns; 8562306a36Sopenharmony_ci struct mnt_idmap *mnt_idmap; 8662306a36Sopenharmony_ci}; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci/* /sys/fs */ 8962306a36Sopenharmony_cistruct kobject *fs_kobj; 9062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(fs_kobj); 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/* 9362306a36Sopenharmony_ci * vfsmount lock may be taken for read to prevent changes to the 9462306a36Sopenharmony_ci * vfsmount hash, ie. during mountpoint lookups or walking back 9562306a36Sopenharmony_ci * up the tree. 9662306a36Sopenharmony_ci * 9762306a36Sopenharmony_ci * It should be taken for write in all cases where the vfsmount 9862306a36Sopenharmony_ci * tree or hash is modified or when a vfsmount structure is modified. 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_ci__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_cistatic inline void lock_mount_hash(void) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci write_seqlock(&mount_lock); 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_cistatic inline void unlock_mount_hash(void) 10862306a36Sopenharmony_ci{ 10962306a36Sopenharmony_ci write_sequnlock(&mount_lock); 11062306a36Sopenharmony_ci} 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_cistatic inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); 11562306a36Sopenharmony_ci tmp += ((unsigned long)dentry / L1_CACHE_BYTES); 11662306a36Sopenharmony_ci tmp = tmp + (tmp >> m_hash_shift); 11762306a36Sopenharmony_ci return &mount_hashtable[tmp & m_hash_mask]; 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic inline struct hlist_head *mp_hash(struct dentry *dentry) 12162306a36Sopenharmony_ci{ 12262306a36Sopenharmony_ci unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); 12362306a36Sopenharmony_ci tmp = tmp + (tmp >> mp_hash_shift); 12462306a36Sopenharmony_ci return &mountpoint_hashtable[tmp & mp_hash_mask]; 12562306a36Sopenharmony_ci} 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_cistatic int mnt_alloc_id(struct mount *mnt) 12862306a36Sopenharmony_ci{ 12962306a36Sopenharmony_ci int res = ida_alloc(&mnt_id_ida, GFP_KERNEL); 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci if (res < 0) 13262306a36Sopenharmony_ci return res; 13362306a36Sopenharmony_ci mnt->mnt_id = res; 13462306a36Sopenharmony_ci return 0; 13562306a36Sopenharmony_ci} 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_cistatic void mnt_free_id(struct mount *mnt) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci ida_free(&mnt_id_ida, mnt->mnt_id); 14062306a36Sopenharmony_ci} 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci/* 14362306a36Sopenharmony_ci * Allocate a new peer group ID 14462306a36Sopenharmony_ci */ 14562306a36Sopenharmony_cistatic int mnt_alloc_group_id(struct mount *mnt) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci int res = ida_alloc_min(&mnt_group_ida, 1, GFP_KERNEL); 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci if (res < 0) 15062306a36Sopenharmony_ci return res; 15162306a36Sopenharmony_ci mnt->mnt_group_id = res; 15262306a36Sopenharmony_ci return 0; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci/* 15662306a36Sopenharmony_ci * Release a peer group ID 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_civoid mnt_release_group_id(struct mount *mnt) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci ida_free(&mnt_group_ida, mnt->mnt_group_id); 16162306a36Sopenharmony_ci mnt->mnt_group_id = 0; 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci/* 16562306a36Sopenharmony_ci * vfsmount lock must be held for read 16662306a36Sopenharmony_ci */ 16762306a36Sopenharmony_cistatic inline void mnt_add_count(struct mount *mnt, int n) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci#ifdef CONFIG_SMP 17062306a36Sopenharmony_ci this_cpu_add(mnt->mnt_pcp->mnt_count, n); 17162306a36Sopenharmony_ci#else 17262306a36Sopenharmony_ci preempt_disable(); 17362306a36Sopenharmony_ci mnt->mnt_count += n; 17462306a36Sopenharmony_ci preempt_enable(); 17562306a36Sopenharmony_ci#endif 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/* 17962306a36Sopenharmony_ci * vfsmount lock must be held for write 18062306a36Sopenharmony_ci */ 18162306a36Sopenharmony_ciint mnt_get_count(struct mount *mnt) 18262306a36Sopenharmony_ci{ 18362306a36Sopenharmony_ci#ifdef CONFIG_SMP 18462306a36Sopenharmony_ci int count = 0; 18562306a36Sopenharmony_ci int cpu; 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 18862306a36Sopenharmony_ci count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_count; 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci return count; 19262306a36Sopenharmony_ci#else 19362306a36Sopenharmony_ci return mnt->mnt_count; 19462306a36Sopenharmony_ci#endif 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci 19762306a36Sopenharmony_cistatic struct mount *alloc_vfsmnt(const char *name) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL); 20062306a36Sopenharmony_ci if (mnt) { 20162306a36Sopenharmony_ci int err; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci err = mnt_alloc_id(mnt); 20462306a36Sopenharmony_ci if (err) 20562306a36Sopenharmony_ci goto out_free_cache; 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci if (name) { 20862306a36Sopenharmony_ci mnt->mnt_devname = kstrdup_const(name, 20962306a36Sopenharmony_ci GFP_KERNEL_ACCOUNT); 21062306a36Sopenharmony_ci if (!mnt->mnt_devname) 21162306a36Sopenharmony_ci goto out_free_id; 21262306a36Sopenharmony_ci } 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci#ifdef CONFIG_SMP 21562306a36Sopenharmony_ci mnt->mnt_pcp = alloc_percpu(struct mnt_pcp); 21662306a36Sopenharmony_ci if (!mnt->mnt_pcp) 21762306a36Sopenharmony_ci goto out_free_devname; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci this_cpu_add(mnt->mnt_pcp->mnt_count, 1); 22062306a36Sopenharmony_ci#else 22162306a36Sopenharmony_ci mnt->mnt_count = 1; 22262306a36Sopenharmony_ci mnt->mnt_writers = 0; 22362306a36Sopenharmony_ci#endif 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci INIT_HLIST_NODE(&mnt->mnt_hash); 22662306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_child); 22762306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_mounts); 22862306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_list); 22962306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_expire); 23062306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_share); 23162306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_slave_list); 23262306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_slave); 23362306a36Sopenharmony_ci INIT_HLIST_NODE(&mnt->mnt_mp_list); 23462306a36Sopenharmony_ci INIT_LIST_HEAD(&mnt->mnt_umounting); 23562306a36Sopenharmony_ci INIT_HLIST_HEAD(&mnt->mnt_stuck_children); 23662306a36Sopenharmony_ci mnt->mnt.mnt_idmap = &nop_mnt_idmap; 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci return mnt; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci#ifdef CONFIG_SMP 24162306a36Sopenharmony_ciout_free_devname: 24262306a36Sopenharmony_ci kfree_const(mnt->mnt_devname); 24362306a36Sopenharmony_ci#endif 24462306a36Sopenharmony_ciout_free_id: 24562306a36Sopenharmony_ci mnt_free_id(mnt); 24662306a36Sopenharmony_ciout_free_cache: 24762306a36Sopenharmony_ci kmem_cache_free(mnt_cache, mnt); 24862306a36Sopenharmony_ci return NULL; 24962306a36Sopenharmony_ci} 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci/* 25262306a36Sopenharmony_ci * Most r/o checks on a fs are for operations that take 25362306a36Sopenharmony_ci * discrete amounts of time, like a write() or unlink(). 25462306a36Sopenharmony_ci * We must keep track of when those operations start 25562306a36Sopenharmony_ci * (for permission checks) and when they end, so that 25662306a36Sopenharmony_ci * we can determine when writes are able to occur to 25762306a36Sopenharmony_ci * a filesystem. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci/* 26062306a36Sopenharmony_ci * __mnt_is_readonly: check whether a mount is read-only 26162306a36Sopenharmony_ci * @mnt: the mount to check for its write status 26262306a36Sopenharmony_ci * 26362306a36Sopenharmony_ci * This shouldn't be used directly ouside of the VFS. 26462306a36Sopenharmony_ci * It does not guarantee that the filesystem will stay 26562306a36Sopenharmony_ci * r/w, just that it is right *now*. This can not and 26662306a36Sopenharmony_ci * should not be used in place of IS_RDONLY(inode). 26762306a36Sopenharmony_ci * mnt_want/drop_write() will _keep_ the filesystem 26862306a36Sopenharmony_ci * r/w. 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_cibool __mnt_is_readonly(struct vfsmount *mnt) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci return (mnt->mnt_flags & MNT_READONLY) || sb_rdonly(mnt->mnt_sb); 27362306a36Sopenharmony_ci} 27462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__mnt_is_readonly); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic inline void mnt_inc_writers(struct mount *mnt) 27762306a36Sopenharmony_ci{ 27862306a36Sopenharmony_ci#ifdef CONFIG_SMP 27962306a36Sopenharmony_ci this_cpu_inc(mnt->mnt_pcp->mnt_writers); 28062306a36Sopenharmony_ci#else 28162306a36Sopenharmony_ci mnt->mnt_writers++; 28262306a36Sopenharmony_ci#endif 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_cistatic inline void mnt_dec_writers(struct mount *mnt) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci#ifdef CONFIG_SMP 28862306a36Sopenharmony_ci this_cpu_dec(mnt->mnt_pcp->mnt_writers); 28962306a36Sopenharmony_ci#else 29062306a36Sopenharmony_ci mnt->mnt_writers--; 29162306a36Sopenharmony_ci#endif 29262306a36Sopenharmony_ci} 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_cistatic unsigned int mnt_get_writers(struct mount *mnt) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci#ifdef CONFIG_SMP 29762306a36Sopenharmony_ci unsigned int count = 0; 29862306a36Sopenharmony_ci int cpu; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 30162306a36Sopenharmony_ci count += per_cpu_ptr(mnt->mnt_pcp, cpu)->mnt_writers; 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci return count; 30562306a36Sopenharmony_ci#else 30662306a36Sopenharmony_ci return mnt->mnt_writers; 30762306a36Sopenharmony_ci#endif 30862306a36Sopenharmony_ci} 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_cistatic int mnt_is_readonly(struct vfsmount *mnt) 31162306a36Sopenharmony_ci{ 31262306a36Sopenharmony_ci if (READ_ONCE(mnt->mnt_sb->s_readonly_remount)) 31362306a36Sopenharmony_ci return 1; 31462306a36Sopenharmony_ci /* 31562306a36Sopenharmony_ci * The barrier pairs with the barrier in sb_start_ro_state_change() 31662306a36Sopenharmony_ci * making sure if we don't see s_readonly_remount set yet, we also will 31762306a36Sopenharmony_ci * not see any superblock / mount flag changes done by remount. 31862306a36Sopenharmony_ci * It also pairs with the barrier in sb_end_ro_state_change() 31962306a36Sopenharmony_ci * assuring that if we see s_readonly_remount already cleared, we will 32062306a36Sopenharmony_ci * see the values of superblock / mount flags updated by remount. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ci smp_rmb(); 32362306a36Sopenharmony_ci return __mnt_is_readonly(mnt); 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci/* 32762306a36Sopenharmony_ci * Most r/o & frozen checks on a fs are for operations that take discrete 32862306a36Sopenharmony_ci * amounts of time, like a write() or unlink(). We must keep track of when 32962306a36Sopenharmony_ci * those operations start (for permission checks) and when they end, so that we 33062306a36Sopenharmony_ci * can determine when writes are able to occur to a filesystem. 33162306a36Sopenharmony_ci */ 33262306a36Sopenharmony_ci/** 33362306a36Sopenharmony_ci * __mnt_want_write - get write access to a mount without freeze protection 33462306a36Sopenharmony_ci * @m: the mount on which to take a write 33562306a36Sopenharmony_ci * 33662306a36Sopenharmony_ci * This tells the low-level filesystem that a write is about to be performed to 33762306a36Sopenharmony_ci * it, and makes sure that writes are allowed (mnt it read-write) before 33862306a36Sopenharmony_ci * returning success. This operation does not protect against filesystem being 33962306a36Sopenharmony_ci * frozen. When the write operation is finished, __mnt_drop_write() must be 34062306a36Sopenharmony_ci * called. This is effectively a refcount. 34162306a36Sopenharmony_ci */ 34262306a36Sopenharmony_ciint __mnt_want_write(struct vfsmount *m) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci struct mount *mnt = real_mount(m); 34562306a36Sopenharmony_ci int ret = 0; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci preempt_disable(); 34862306a36Sopenharmony_ci mnt_inc_writers(mnt); 34962306a36Sopenharmony_ci /* 35062306a36Sopenharmony_ci * The store to mnt_inc_writers must be visible before we pass 35162306a36Sopenharmony_ci * MNT_WRITE_HOLD loop below, so that the slowpath can see our 35262306a36Sopenharmony_ci * incremented count after it has set MNT_WRITE_HOLD. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ci smp_mb(); 35562306a36Sopenharmony_ci might_lock(&mount_lock.lock); 35662306a36Sopenharmony_ci while (READ_ONCE(mnt->mnt.mnt_flags) & MNT_WRITE_HOLD) { 35762306a36Sopenharmony_ci if (!IS_ENABLED(CONFIG_PREEMPT_RT)) { 35862306a36Sopenharmony_ci cpu_relax(); 35962306a36Sopenharmony_ci } else { 36062306a36Sopenharmony_ci /* 36162306a36Sopenharmony_ci * This prevents priority inversion, if the task 36262306a36Sopenharmony_ci * setting MNT_WRITE_HOLD got preempted on a remote 36362306a36Sopenharmony_ci * CPU, and it prevents life lock if the task setting 36462306a36Sopenharmony_ci * MNT_WRITE_HOLD has a lower priority and is bound to 36562306a36Sopenharmony_ci * the same CPU as the task that is spinning here. 36662306a36Sopenharmony_ci */ 36762306a36Sopenharmony_ci preempt_enable(); 36862306a36Sopenharmony_ci lock_mount_hash(); 36962306a36Sopenharmony_ci unlock_mount_hash(); 37062306a36Sopenharmony_ci preempt_disable(); 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci /* 37462306a36Sopenharmony_ci * The barrier pairs with the barrier sb_start_ro_state_change() making 37562306a36Sopenharmony_ci * sure that if we see MNT_WRITE_HOLD cleared, we will also see 37662306a36Sopenharmony_ci * s_readonly_remount set (or even SB_RDONLY / MNT_READONLY flags) in 37762306a36Sopenharmony_ci * mnt_is_readonly() and bail in case we are racing with remount 37862306a36Sopenharmony_ci * read-only. 37962306a36Sopenharmony_ci */ 38062306a36Sopenharmony_ci smp_rmb(); 38162306a36Sopenharmony_ci if (mnt_is_readonly(m)) { 38262306a36Sopenharmony_ci mnt_dec_writers(mnt); 38362306a36Sopenharmony_ci ret = -EROFS; 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci preempt_enable(); 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci return ret; 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci/** 39162306a36Sopenharmony_ci * mnt_want_write - get write access to a mount 39262306a36Sopenharmony_ci * @m: the mount on which to take a write 39362306a36Sopenharmony_ci * 39462306a36Sopenharmony_ci * This tells the low-level filesystem that a write is about to be performed to 39562306a36Sopenharmony_ci * it, and makes sure that writes are allowed (mount is read-write, filesystem 39662306a36Sopenharmony_ci * is not frozen) before returning success. When the write operation is 39762306a36Sopenharmony_ci * finished, mnt_drop_write() must be called. This is effectively a refcount. 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_ciint mnt_want_write(struct vfsmount *m) 40062306a36Sopenharmony_ci{ 40162306a36Sopenharmony_ci int ret; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci sb_start_write(m->mnt_sb); 40462306a36Sopenharmony_ci ret = __mnt_want_write(m); 40562306a36Sopenharmony_ci if (ret) 40662306a36Sopenharmony_ci sb_end_write(m->mnt_sb); 40762306a36Sopenharmony_ci return ret; 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mnt_want_write); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci/** 41262306a36Sopenharmony_ci * __mnt_want_write_file - get write access to a file's mount 41362306a36Sopenharmony_ci * @file: the file who's mount on which to take a write 41462306a36Sopenharmony_ci * 41562306a36Sopenharmony_ci * This is like __mnt_want_write, but if the file is already open for writing it 41662306a36Sopenharmony_ci * skips incrementing mnt_writers (since the open file already has a reference) 41762306a36Sopenharmony_ci * and instead only does the check for emergency r/o remounts. This must be 41862306a36Sopenharmony_ci * paired with __mnt_drop_write_file. 41962306a36Sopenharmony_ci */ 42062306a36Sopenharmony_ciint __mnt_want_write_file(struct file *file) 42162306a36Sopenharmony_ci{ 42262306a36Sopenharmony_ci if (file->f_mode & FMODE_WRITER) { 42362306a36Sopenharmony_ci /* 42462306a36Sopenharmony_ci * Superblock may have become readonly while there are still 42562306a36Sopenharmony_ci * writable fd's, e.g. due to a fs error with errors=remount-ro 42662306a36Sopenharmony_ci */ 42762306a36Sopenharmony_ci if (__mnt_is_readonly(file->f_path.mnt)) 42862306a36Sopenharmony_ci return -EROFS; 42962306a36Sopenharmony_ci return 0; 43062306a36Sopenharmony_ci } 43162306a36Sopenharmony_ci return __mnt_want_write(file->f_path.mnt); 43262306a36Sopenharmony_ci} 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci/** 43562306a36Sopenharmony_ci * mnt_want_write_file - get write access to a file's mount 43662306a36Sopenharmony_ci * @file: the file who's mount on which to take a write 43762306a36Sopenharmony_ci * 43862306a36Sopenharmony_ci * This is like mnt_want_write, but if the file is already open for writing it 43962306a36Sopenharmony_ci * skips incrementing mnt_writers (since the open file already has a reference) 44062306a36Sopenharmony_ci * and instead only does the freeze protection and the check for emergency r/o 44162306a36Sopenharmony_ci * remounts. This must be paired with mnt_drop_write_file. 44262306a36Sopenharmony_ci */ 44362306a36Sopenharmony_ciint mnt_want_write_file(struct file *file) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci int ret; 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_ci sb_start_write(file_inode(file)->i_sb); 44862306a36Sopenharmony_ci ret = __mnt_want_write_file(file); 44962306a36Sopenharmony_ci if (ret) 45062306a36Sopenharmony_ci sb_end_write(file_inode(file)->i_sb); 45162306a36Sopenharmony_ci return ret; 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mnt_want_write_file); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci/** 45662306a36Sopenharmony_ci * __mnt_drop_write - give up write access to a mount 45762306a36Sopenharmony_ci * @mnt: the mount on which to give up write access 45862306a36Sopenharmony_ci * 45962306a36Sopenharmony_ci * Tells the low-level filesystem that we are done 46062306a36Sopenharmony_ci * performing writes to it. Must be matched with 46162306a36Sopenharmony_ci * __mnt_want_write() call above. 46262306a36Sopenharmony_ci */ 46362306a36Sopenharmony_civoid __mnt_drop_write(struct vfsmount *mnt) 46462306a36Sopenharmony_ci{ 46562306a36Sopenharmony_ci preempt_disable(); 46662306a36Sopenharmony_ci mnt_dec_writers(real_mount(mnt)); 46762306a36Sopenharmony_ci preempt_enable(); 46862306a36Sopenharmony_ci} 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci/** 47162306a36Sopenharmony_ci * mnt_drop_write - give up write access to a mount 47262306a36Sopenharmony_ci * @mnt: the mount on which to give up write access 47362306a36Sopenharmony_ci * 47462306a36Sopenharmony_ci * Tells the low-level filesystem that we are done performing writes to it and 47562306a36Sopenharmony_ci * also allows filesystem to be frozen again. Must be matched with 47662306a36Sopenharmony_ci * mnt_want_write() call above. 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_civoid mnt_drop_write(struct vfsmount *mnt) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci __mnt_drop_write(mnt); 48162306a36Sopenharmony_ci sb_end_write(mnt->mnt_sb); 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mnt_drop_write); 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_civoid __mnt_drop_write_file(struct file *file) 48662306a36Sopenharmony_ci{ 48762306a36Sopenharmony_ci if (!(file->f_mode & FMODE_WRITER)) 48862306a36Sopenharmony_ci __mnt_drop_write(file->f_path.mnt); 48962306a36Sopenharmony_ci} 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_civoid mnt_drop_write_file(struct file *file) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci __mnt_drop_write_file(file); 49462306a36Sopenharmony_ci sb_end_write(file_inode(file)->i_sb); 49562306a36Sopenharmony_ci} 49662306a36Sopenharmony_ciEXPORT_SYMBOL(mnt_drop_write_file); 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci/** 49962306a36Sopenharmony_ci * mnt_hold_writers - prevent write access to the given mount 50062306a36Sopenharmony_ci * @mnt: mnt to prevent write access to 50162306a36Sopenharmony_ci * 50262306a36Sopenharmony_ci * Prevents write access to @mnt if there are no active writers for @mnt. 50362306a36Sopenharmony_ci * This function needs to be called and return successfully before changing 50462306a36Sopenharmony_ci * properties of @mnt that need to remain stable for callers with write access 50562306a36Sopenharmony_ci * to @mnt. 50662306a36Sopenharmony_ci * 50762306a36Sopenharmony_ci * After this functions has been called successfully callers must pair it with 50862306a36Sopenharmony_ci * a call to mnt_unhold_writers() in order to stop preventing write access to 50962306a36Sopenharmony_ci * @mnt. 51062306a36Sopenharmony_ci * 51162306a36Sopenharmony_ci * Context: This function expects lock_mount_hash() to be held serializing 51262306a36Sopenharmony_ci * setting MNT_WRITE_HOLD. 51362306a36Sopenharmony_ci * Return: On success 0 is returned. 51462306a36Sopenharmony_ci * On error, -EBUSY is returned. 51562306a36Sopenharmony_ci */ 51662306a36Sopenharmony_cistatic inline int mnt_hold_writers(struct mount *mnt) 51762306a36Sopenharmony_ci{ 51862306a36Sopenharmony_ci mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; 51962306a36Sopenharmony_ci /* 52062306a36Sopenharmony_ci * After storing MNT_WRITE_HOLD, we'll read the counters. This store 52162306a36Sopenharmony_ci * should be visible before we do. 52262306a36Sopenharmony_ci */ 52362306a36Sopenharmony_ci smp_mb(); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci /* 52662306a36Sopenharmony_ci * With writers on hold, if this value is zero, then there are 52762306a36Sopenharmony_ci * definitely no active writers (although held writers may subsequently 52862306a36Sopenharmony_ci * increment the count, they'll have to wait, and decrement it after 52962306a36Sopenharmony_ci * seeing MNT_READONLY). 53062306a36Sopenharmony_ci * 53162306a36Sopenharmony_ci * It is OK to have counter incremented on one CPU and decremented on 53262306a36Sopenharmony_ci * another: the sum will add up correctly. The danger would be when we 53362306a36Sopenharmony_ci * sum up each counter, if we read a counter before it is incremented, 53462306a36Sopenharmony_ci * but then read another CPU's count which it has been subsequently 53562306a36Sopenharmony_ci * decremented from -- we would see more decrements than we should. 53662306a36Sopenharmony_ci * MNT_WRITE_HOLD protects against this scenario, because 53762306a36Sopenharmony_ci * mnt_want_write first increments count, then smp_mb, then spins on 53862306a36Sopenharmony_ci * MNT_WRITE_HOLD, so it can't be decremented by another CPU while 53962306a36Sopenharmony_ci * we're counting up here. 54062306a36Sopenharmony_ci */ 54162306a36Sopenharmony_ci if (mnt_get_writers(mnt) > 0) 54262306a36Sopenharmony_ci return -EBUSY; 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci return 0; 54562306a36Sopenharmony_ci} 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci/** 54862306a36Sopenharmony_ci * mnt_unhold_writers - stop preventing write access to the given mount 54962306a36Sopenharmony_ci * @mnt: mnt to stop preventing write access to 55062306a36Sopenharmony_ci * 55162306a36Sopenharmony_ci * Stop preventing write access to @mnt allowing callers to gain write access 55262306a36Sopenharmony_ci * to @mnt again. 55362306a36Sopenharmony_ci * 55462306a36Sopenharmony_ci * This function can only be called after a successful call to 55562306a36Sopenharmony_ci * mnt_hold_writers(). 55662306a36Sopenharmony_ci * 55762306a36Sopenharmony_ci * Context: This function expects lock_mount_hash() to be held. 55862306a36Sopenharmony_ci */ 55962306a36Sopenharmony_cistatic inline void mnt_unhold_writers(struct mount *mnt) 56062306a36Sopenharmony_ci{ 56162306a36Sopenharmony_ci /* 56262306a36Sopenharmony_ci * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers 56362306a36Sopenharmony_ci * that become unheld will see MNT_READONLY. 56462306a36Sopenharmony_ci */ 56562306a36Sopenharmony_ci smp_wmb(); 56662306a36Sopenharmony_ci mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 56762306a36Sopenharmony_ci} 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_cistatic int mnt_make_readonly(struct mount *mnt) 57062306a36Sopenharmony_ci{ 57162306a36Sopenharmony_ci int ret; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci ret = mnt_hold_writers(mnt); 57462306a36Sopenharmony_ci if (!ret) 57562306a36Sopenharmony_ci mnt->mnt.mnt_flags |= MNT_READONLY; 57662306a36Sopenharmony_ci mnt_unhold_writers(mnt); 57762306a36Sopenharmony_ci return ret; 57862306a36Sopenharmony_ci} 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ciint sb_prepare_remount_readonly(struct super_block *sb) 58162306a36Sopenharmony_ci{ 58262306a36Sopenharmony_ci struct mount *mnt; 58362306a36Sopenharmony_ci int err = 0; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci /* Racy optimization. Recheck the counter under MNT_WRITE_HOLD */ 58662306a36Sopenharmony_ci if (atomic_long_read(&sb->s_remove_count)) 58762306a36Sopenharmony_ci return -EBUSY; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci lock_mount_hash(); 59062306a36Sopenharmony_ci list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 59162306a36Sopenharmony_ci if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { 59262306a36Sopenharmony_ci err = mnt_hold_writers(mnt); 59362306a36Sopenharmony_ci if (err) 59462306a36Sopenharmony_ci break; 59562306a36Sopenharmony_ci } 59662306a36Sopenharmony_ci } 59762306a36Sopenharmony_ci if (!err && atomic_long_read(&sb->s_remove_count)) 59862306a36Sopenharmony_ci err = -EBUSY; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (!err) 60162306a36Sopenharmony_ci sb_start_ro_state_change(sb); 60262306a36Sopenharmony_ci list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { 60362306a36Sopenharmony_ci if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) 60462306a36Sopenharmony_ci mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; 60562306a36Sopenharmony_ci } 60662306a36Sopenharmony_ci unlock_mount_hash(); 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci return err; 60962306a36Sopenharmony_ci} 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_cistatic void free_vfsmnt(struct mount *mnt) 61262306a36Sopenharmony_ci{ 61362306a36Sopenharmony_ci mnt_idmap_put(mnt_idmap(&mnt->mnt)); 61462306a36Sopenharmony_ci kfree_const(mnt->mnt_devname); 61562306a36Sopenharmony_ci#ifdef CONFIG_SMP 61662306a36Sopenharmony_ci free_percpu(mnt->mnt_pcp); 61762306a36Sopenharmony_ci#endif 61862306a36Sopenharmony_ci kmem_cache_free(mnt_cache, mnt); 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_cistatic void delayed_free_vfsmnt(struct rcu_head *head) 62262306a36Sopenharmony_ci{ 62362306a36Sopenharmony_ci free_vfsmnt(container_of(head, struct mount, mnt_rcu)); 62462306a36Sopenharmony_ci} 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci/* call under rcu_read_lock */ 62762306a36Sopenharmony_ciint __legitimize_mnt(struct vfsmount *bastard, unsigned seq) 62862306a36Sopenharmony_ci{ 62962306a36Sopenharmony_ci struct mount *mnt; 63062306a36Sopenharmony_ci if (read_seqretry(&mount_lock, seq)) 63162306a36Sopenharmony_ci return 1; 63262306a36Sopenharmony_ci if (bastard == NULL) 63362306a36Sopenharmony_ci return 0; 63462306a36Sopenharmony_ci mnt = real_mount(bastard); 63562306a36Sopenharmony_ci mnt_add_count(mnt, 1); 63662306a36Sopenharmony_ci smp_mb(); // see mntput_no_expire() 63762306a36Sopenharmony_ci if (likely(!read_seqretry(&mount_lock, seq))) 63862306a36Sopenharmony_ci return 0; 63962306a36Sopenharmony_ci if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { 64062306a36Sopenharmony_ci mnt_add_count(mnt, -1); 64162306a36Sopenharmony_ci return 1; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci lock_mount_hash(); 64462306a36Sopenharmony_ci if (unlikely(bastard->mnt_flags & MNT_DOOMED)) { 64562306a36Sopenharmony_ci mnt_add_count(mnt, -1); 64662306a36Sopenharmony_ci unlock_mount_hash(); 64762306a36Sopenharmony_ci return 1; 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci unlock_mount_hash(); 65062306a36Sopenharmony_ci /* caller will mntput() */ 65162306a36Sopenharmony_ci return -1; 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci/* call under rcu_read_lock */ 65562306a36Sopenharmony_cistatic bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) 65662306a36Sopenharmony_ci{ 65762306a36Sopenharmony_ci int res = __legitimize_mnt(bastard, seq); 65862306a36Sopenharmony_ci if (likely(!res)) 65962306a36Sopenharmony_ci return true; 66062306a36Sopenharmony_ci if (unlikely(res < 0)) { 66162306a36Sopenharmony_ci rcu_read_unlock(); 66262306a36Sopenharmony_ci mntput(bastard); 66362306a36Sopenharmony_ci rcu_read_lock(); 66462306a36Sopenharmony_ci } 66562306a36Sopenharmony_ci return false; 66662306a36Sopenharmony_ci} 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci/** 66962306a36Sopenharmony_ci * __lookup_mnt - find first child mount 67062306a36Sopenharmony_ci * @mnt: parent mount 67162306a36Sopenharmony_ci * @dentry: mountpoint 67262306a36Sopenharmony_ci * 67362306a36Sopenharmony_ci * If @mnt has a child mount @c mounted @dentry find and return it. 67462306a36Sopenharmony_ci * 67562306a36Sopenharmony_ci * Note that the child mount @c need not be unique. There are cases 67662306a36Sopenharmony_ci * where shadow mounts are created. For example, during mount 67762306a36Sopenharmony_ci * propagation when a source mount @mnt whose root got overmounted by a 67862306a36Sopenharmony_ci * mount @o after path lookup but before @namespace_sem could be 67962306a36Sopenharmony_ci * acquired gets copied and propagated. So @mnt gets copied including 68062306a36Sopenharmony_ci * @o. When @mnt is propagated to a destination mount @d that already 68162306a36Sopenharmony_ci * has another mount @n mounted at the same mountpoint then the source 68262306a36Sopenharmony_ci * mount @mnt will be tucked beneath @n, i.e., @n will be mounted on 68362306a36Sopenharmony_ci * @mnt and @mnt mounted on @d. Now both @n and @o are mounted at @mnt 68462306a36Sopenharmony_ci * on @dentry. 68562306a36Sopenharmony_ci * 68662306a36Sopenharmony_ci * Return: The first child of @mnt mounted @dentry or NULL. 68762306a36Sopenharmony_ci */ 68862306a36Sopenharmony_cistruct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) 68962306a36Sopenharmony_ci{ 69062306a36Sopenharmony_ci struct hlist_head *head = m_hash(mnt, dentry); 69162306a36Sopenharmony_ci struct mount *p; 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci hlist_for_each_entry_rcu(p, head, mnt_hash) 69462306a36Sopenharmony_ci if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) 69562306a36Sopenharmony_ci return p; 69662306a36Sopenharmony_ci return NULL; 69762306a36Sopenharmony_ci} 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci/* 70062306a36Sopenharmony_ci * lookup_mnt - Return the first child mount mounted at path 70162306a36Sopenharmony_ci * 70262306a36Sopenharmony_ci * "First" means first mounted chronologically. If you create the 70362306a36Sopenharmony_ci * following mounts: 70462306a36Sopenharmony_ci * 70562306a36Sopenharmony_ci * mount /dev/sda1 /mnt 70662306a36Sopenharmony_ci * mount /dev/sda2 /mnt 70762306a36Sopenharmony_ci * mount /dev/sda3 /mnt 70862306a36Sopenharmony_ci * 70962306a36Sopenharmony_ci * Then lookup_mnt() on the base /mnt dentry in the root mount will 71062306a36Sopenharmony_ci * return successively the root dentry and vfsmount of /dev/sda1, then 71162306a36Sopenharmony_ci * /dev/sda2, then /dev/sda3, then NULL. 71262306a36Sopenharmony_ci * 71362306a36Sopenharmony_ci * lookup_mnt takes a reference to the found vfsmount. 71462306a36Sopenharmony_ci */ 71562306a36Sopenharmony_cistruct vfsmount *lookup_mnt(const struct path *path) 71662306a36Sopenharmony_ci{ 71762306a36Sopenharmony_ci struct mount *child_mnt; 71862306a36Sopenharmony_ci struct vfsmount *m; 71962306a36Sopenharmony_ci unsigned seq; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci rcu_read_lock(); 72262306a36Sopenharmony_ci do { 72362306a36Sopenharmony_ci seq = read_seqbegin(&mount_lock); 72462306a36Sopenharmony_ci child_mnt = __lookup_mnt(path->mnt, path->dentry); 72562306a36Sopenharmony_ci m = child_mnt ? &child_mnt->mnt : NULL; 72662306a36Sopenharmony_ci } while (!legitimize_mnt(m, seq)); 72762306a36Sopenharmony_ci rcu_read_unlock(); 72862306a36Sopenharmony_ci return m; 72962306a36Sopenharmony_ci} 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_cistatic inline void lock_ns_list(struct mnt_namespace *ns) 73262306a36Sopenharmony_ci{ 73362306a36Sopenharmony_ci spin_lock(&ns->ns_lock); 73462306a36Sopenharmony_ci} 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_cistatic inline void unlock_ns_list(struct mnt_namespace *ns) 73762306a36Sopenharmony_ci{ 73862306a36Sopenharmony_ci spin_unlock(&ns->ns_lock); 73962306a36Sopenharmony_ci} 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_cistatic inline bool mnt_is_cursor(struct mount *mnt) 74262306a36Sopenharmony_ci{ 74362306a36Sopenharmony_ci return mnt->mnt.mnt_flags & MNT_CURSOR; 74462306a36Sopenharmony_ci} 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci/* 74762306a36Sopenharmony_ci * __is_local_mountpoint - Test to see if dentry is a mountpoint in the 74862306a36Sopenharmony_ci * current mount namespace. 74962306a36Sopenharmony_ci * 75062306a36Sopenharmony_ci * The common case is dentries are not mountpoints at all and that 75162306a36Sopenharmony_ci * test is handled inline. For the slow case when we are actually 75262306a36Sopenharmony_ci * dealing with a mountpoint of some kind, walk through all of the 75362306a36Sopenharmony_ci * mounts in the current mount namespace and test to see if the dentry 75462306a36Sopenharmony_ci * is a mountpoint. 75562306a36Sopenharmony_ci * 75662306a36Sopenharmony_ci * The mount_hashtable is not usable in the context because we 75762306a36Sopenharmony_ci * need to identify all mounts that may be in the current mount 75862306a36Sopenharmony_ci * namespace not just a mount that happens to have some specified 75962306a36Sopenharmony_ci * parent mount. 76062306a36Sopenharmony_ci */ 76162306a36Sopenharmony_cibool __is_local_mountpoint(struct dentry *dentry) 76262306a36Sopenharmony_ci{ 76362306a36Sopenharmony_ci struct mnt_namespace *ns = current->nsproxy->mnt_ns; 76462306a36Sopenharmony_ci struct mount *mnt; 76562306a36Sopenharmony_ci bool is_covered = false; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci down_read(&namespace_sem); 76862306a36Sopenharmony_ci lock_ns_list(ns); 76962306a36Sopenharmony_ci list_for_each_entry(mnt, &ns->list, mnt_list) { 77062306a36Sopenharmony_ci if (mnt_is_cursor(mnt)) 77162306a36Sopenharmony_ci continue; 77262306a36Sopenharmony_ci is_covered = (mnt->mnt_mountpoint == dentry); 77362306a36Sopenharmony_ci if (is_covered) 77462306a36Sopenharmony_ci break; 77562306a36Sopenharmony_ci } 77662306a36Sopenharmony_ci unlock_ns_list(ns); 77762306a36Sopenharmony_ci up_read(&namespace_sem); 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci return is_covered; 78062306a36Sopenharmony_ci} 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_cistatic struct mountpoint *lookup_mountpoint(struct dentry *dentry) 78362306a36Sopenharmony_ci{ 78462306a36Sopenharmony_ci struct hlist_head *chain = mp_hash(dentry); 78562306a36Sopenharmony_ci struct mountpoint *mp; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci hlist_for_each_entry(mp, chain, m_hash) { 78862306a36Sopenharmony_ci if (mp->m_dentry == dentry) { 78962306a36Sopenharmony_ci mp->m_count++; 79062306a36Sopenharmony_ci return mp; 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci } 79362306a36Sopenharmony_ci return NULL; 79462306a36Sopenharmony_ci} 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_cistatic struct mountpoint *get_mountpoint(struct dentry *dentry) 79762306a36Sopenharmony_ci{ 79862306a36Sopenharmony_ci struct mountpoint *mp, *new = NULL; 79962306a36Sopenharmony_ci int ret; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci if (d_mountpoint(dentry)) { 80262306a36Sopenharmony_ci /* might be worth a WARN_ON() */ 80362306a36Sopenharmony_ci if (d_unlinked(dentry)) 80462306a36Sopenharmony_ci return ERR_PTR(-ENOENT); 80562306a36Sopenharmony_cimountpoint: 80662306a36Sopenharmony_ci read_seqlock_excl(&mount_lock); 80762306a36Sopenharmony_ci mp = lookup_mountpoint(dentry); 80862306a36Sopenharmony_ci read_sequnlock_excl(&mount_lock); 80962306a36Sopenharmony_ci if (mp) 81062306a36Sopenharmony_ci goto done; 81162306a36Sopenharmony_ci } 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci if (!new) 81462306a36Sopenharmony_ci new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); 81562306a36Sopenharmony_ci if (!new) 81662306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci /* Exactly one processes may set d_mounted */ 82062306a36Sopenharmony_ci ret = d_set_mounted(dentry); 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci /* Someone else set d_mounted? */ 82362306a36Sopenharmony_ci if (ret == -EBUSY) 82462306a36Sopenharmony_ci goto mountpoint; 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci /* The dentry is not available as a mountpoint? */ 82762306a36Sopenharmony_ci mp = ERR_PTR(ret); 82862306a36Sopenharmony_ci if (ret) 82962306a36Sopenharmony_ci goto done; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci /* Add the new mountpoint to the hash table */ 83262306a36Sopenharmony_ci read_seqlock_excl(&mount_lock); 83362306a36Sopenharmony_ci new->m_dentry = dget(dentry); 83462306a36Sopenharmony_ci new->m_count = 1; 83562306a36Sopenharmony_ci hlist_add_head(&new->m_hash, mp_hash(dentry)); 83662306a36Sopenharmony_ci INIT_HLIST_HEAD(&new->m_list); 83762306a36Sopenharmony_ci read_sequnlock_excl(&mount_lock); 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci mp = new; 84062306a36Sopenharmony_ci new = NULL; 84162306a36Sopenharmony_cidone: 84262306a36Sopenharmony_ci kfree(new); 84362306a36Sopenharmony_ci return mp; 84462306a36Sopenharmony_ci} 84562306a36Sopenharmony_ci 84662306a36Sopenharmony_ci/* 84762306a36Sopenharmony_ci * vfsmount lock must be held. Additionally, the caller is responsible 84862306a36Sopenharmony_ci * for serializing calls for given disposal list. 84962306a36Sopenharmony_ci */ 85062306a36Sopenharmony_cistatic void __put_mountpoint(struct mountpoint *mp, struct list_head *list) 85162306a36Sopenharmony_ci{ 85262306a36Sopenharmony_ci if (!--mp->m_count) { 85362306a36Sopenharmony_ci struct dentry *dentry = mp->m_dentry; 85462306a36Sopenharmony_ci BUG_ON(!hlist_empty(&mp->m_list)); 85562306a36Sopenharmony_ci spin_lock(&dentry->d_lock); 85662306a36Sopenharmony_ci dentry->d_flags &= ~DCACHE_MOUNTED; 85762306a36Sopenharmony_ci spin_unlock(&dentry->d_lock); 85862306a36Sopenharmony_ci dput_to_list(dentry, list); 85962306a36Sopenharmony_ci hlist_del(&mp->m_hash); 86062306a36Sopenharmony_ci kfree(mp); 86162306a36Sopenharmony_ci } 86262306a36Sopenharmony_ci} 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci/* called with namespace_lock and vfsmount lock */ 86562306a36Sopenharmony_cistatic void put_mountpoint(struct mountpoint *mp) 86662306a36Sopenharmony_ci{ 86762306a36Sopenharmony_ci __put_mountpoint(mp, &ex_mountpoints); 86862306a36Sopenharmony_ci} 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_cistatic inline int check_mnt(struct mount *mnt) 87162306a36Sopenharmony_ci{ 87262306a36Sopenharmony_ci return mnt->mnt_ns == current->nsproxy->mnt_ns; 87362306a36Sopenharmony_ci} 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci/* 87662306a36Sopenharmony_ci * vfsmount lock must be held for write 87762306a36Sopenharmony_ci */ 87862306a36Sopenharmony_cistatic void touch_mnt_namespace(struct mnt_namespace *ns) 87962306a36Sopenharmony_ci{ 88062306a36Sopenharmony_ci if (ns) { 88162306a36Sopenharmony_ci ns->event = ++event; 88262306a36Sopenharmony_ci wake_up_interruptible(&ns->poll); 88362306a36Sopenharmony_ci } 88462306a36Sopenharmony_ci} 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci/* 88762306a36Sopenharmony_ci * vfsmount lock must be held for write 88862306a36Sopenharmony_ci */ 88962306a36Sopenharmony_cistatic void __touch_mnt_namespace(struct mnt_namespace *ns) 89062306a36Sopenharmony_ci{ 89162306a36Sopenharmony_ci if (ns && ns->event != event) { 89262306a36Sopenharmony_ci ns->event = event; 89362306a36Sopenharmony_ci wake_up_interruptible(&ns->poll); 89462306a36Sopenharmony_ci } 89562306a36Sopenharmony_ci} 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci/* 89862306a36Sopenharmony_ci * vfsmount lock must be held for write 89962306a36Sopenharmony_ci */ 90062306a36Sopenharmony_cistatic struct mountpoint *unhash_mnt(struct mount *mnt) 90162306a36Sopenharmony_ci{ 90262306a36Sopenharmony_ci struct mountpoint *mp; 90362306a36Sopenharmony_ci mnt->mnt_parent = mnt; 90462306a36Sopenharmony_ci mnt->mnt_mountpoint = mnt->mnt.mnt_root; 90562306a36Sopenharmony_ci list_del_init(&mnt->mnt_child); 90662306a36Sopenharmony_ci hlist_del_init_rcu(&mnt->mnt_hash); 90762306a36Sopenharmony_ci hlist_del_init(&mnt->mnt_mp_list); 90862306a36Sopenharmony_ci mp = mnt->mnt_mp; 90962306a36Sopenharmony_ci mnt->mnt_mp = NULL; 91062306a36Sopenharmony_ci return mp; 91162306a36Sopenharmony_ci} 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci/* 91462306a36Sopenharmony_ci * vfsmount lock must be held for write 91562306a36Sopenharmony_ci */ 91662306a36Sopenharmony_cistatic void umount_mnt(struct mount *mnt) 91762306a36Sopenharmony_ci{ 91862306a36Sopenharmony_ci put_mountpoint(unhash_mnt(mnt)); 91962306a36Sopenharmony_ci} 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci/* 92262306a36Sopenharmony_ci * vfsmount lock must be held for write 92362306a36Sopenharmony_ci */ 92462306a36Sopenharmony_civoid mnt_set_mountpoint(struct mount *mnt, 92562306a36Sopenharmony_ci struct mountpoint *mp, 92662306a36Sopenharmony_ci struct mount *child_mnt) 92762306a36Sopenharmony_ci{ 92862306a36Sopenharmony_ci mp->m_count++; 92962306a36Sopenharmony_ci mnt_add_count(mnt, 1); /* essentially, that's mntget */ 93062306a36Sopenharmony_ci child_mnt->mnt_mountpoint = mp->m_dentry; 93162306a36Sopenharmony_ci child_mnt->mnt_parent = mnt; 93262306a36Sopenharmony_ci child_mnt->mnt_mp = mp; 93362306a36Sopenharmony_ci hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list); 93462306a36Sopenharmony_ci} 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci/** 93762306a36Sopenharmony_ci * mnt_set_mountpoint_beneath - mount a mount beneath another one 93862306a36Sopenharmony_ci * 93962306a36Sopenharmony_ci * @new_parent: the source mount 94062306a36Sopenharmony_ci * @top_mnt: the mount beneath which @new_parent is mounted 94162306a36Sopenharmony_ci * @new_mp: the new mountpoint of @top_mnt on @new_parent 94262306a36Sopenharmony_ci * 94362306a36Sopenharmony_ci * Remove @top_mnt from its current mountpoint @top_mnt->mnt_mp and 94462306a36Sopenharmony_ci * parent @top_mnt->mnt_parent and mount it on top of @new_parent at 94562306a36Sopenharmony_ci * @new_mp. And mount @new_parent on the old parent and old 94662306a36Sopenharmony_ci * mountpoint of @top_mnt. 94762306a36Sopenharmony_ci * 94862306a36Sopenharmony_ci * Context: This function expects namespace_lock() and lock_mount_hash() 94962306a36Sopenharmony_ci * to have been acquired in that order. 95062306a36Sopenharmony_ci */ 95162306a36Sopenharmony_cistatic void mnt_set_mountpoint_beneath(struct mount *new_parent, 95262306a36Sopenharmony_ci struct mount *top_mnt, 95362306a36Sopenharmony_ci struct mountpoint *new_mp) 95462306a36Sopenharmony_ci{ 95562306a36Sopenharmony_ci struct mount *old_top_parent = top_mnt->mnt_parent; 95662306a36Sopenharmony_ci struct mountpoint *old_top_mp = top_mnt->mnt_mp; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci mnt_set_mountpoint(old_top_parent, old_top_mp, new_parent); 95962306a36Sopenharmony_ci mnt_change_mountpoint(new_parent, new_mp, top_mnt); 96062306a36Sopenharmony_ci} 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_cistatic void __attach_mnt(struct mount *mnt, struct mount *parent) 96462306a36Sopenharmony_ci{ 96562306a36Sopenharmony_ci hlist_add_head_rcu(&mnt->mnt_hash, 96662306a36Sopenharmony_ci m_hash(&parent->mnt, mnt->mnt_mountpoint)); 96762306a36Sopenharmony_ci list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci/** 97162306a36Sopenharmony_ci * attach_mnt - mount a mount, attach to @mount_hashtable and parent's 97262306a36Sopenharmony_ci * list of child mounts 97362306a36Sopenharmony_ci * @parent: the parent 97462306a36Sopenharmony_ci * @mnt: the new mount 97562306a36Sopenharmony_ci * @mp: the new mountpoint 97662306a36Sopenharmony_ci * @beneath: whether to mount @mnt beneath or on top of @parent 97762306a36Sopenharmony_ci * 97862306a36Sopenharmony_ci * If @beneath is false, mount @mnt at @mp on @parent. Then attach @mnt 97962306a36Sopenharmony_ci * to @parent's child mount list and to @mount_hashtable. 98062306a36Sopenharmony_ci * 98162306a36Sopenharmony_ci * If @beneath is true, remove @mnt from its current parent and 98262306a36Sopenharmony_ci * mountpoint and mount it on @mp on @parent, and mount @parent on the 98362306a36Sopenharmony_ci * old parent and old mountpoint of @mnt. Finally, attach @parent to 98462306a36Sopenharmony_ci * @mnt_hashtable and @parent->mnt_parent->mnt_mounts. 98562306a36Sopenharmony_ci * 98662306a36Sopenharmony_ci * Note, when __attach_mnt() is called @mnt->mnt_parent already points 98762306a36Sopenharmony_ci * to the correct parent. 98862306a36Sopenharmony_ci * 98962306a36Sopenharmony_ci * Context: This function expects namespace_lock() and lock_mount_hash() 99062306a36Sopenharmony_ci * to have been acquired in that order. 99162306a36Sopenharmony_ci */ 99262306a36Sopenharmony_cistatic void attach_mnt(struct mount *mnt, struct mount *parent, 99362306a36Sopenharmony_ci struct mountpoint *mp, bool beneath) 99462306a36Sopenharmony_ci{ 99562306a36Sopenharmony_ci if (beneath) 99662306a36Sopenharmony_ci mnt_set_mountpoint_beneath(mnt, parent, mp); 99762306a36Sopenharmony_ci else 99862306a36Sopenharmony_ci mnt_set_mountpoint(parent, mp, mnt); 99962306a36Sopenharmony_ci /* 100062306a36Sopenharmony_ci * Note, @mnt->mnt_parent has to be used. If @mnt was mounted 100162306a36Sopenharmony_ci * beneath @parent then @mnt will need to be attached to 100262306a36Sopenharmony_ci * @parent's old parent, not @parent. IOW, @mnt->mnt_parent 100362306a36Sopenharmony_ci * isn't the same mount as @parent. 100462306a36Sopenharmony_ci */ 100562306a36Sopenharmony_ci __attach_mnt(mnt, mnt->mnt_parent); 100662306a36Sopenharmony_ci} 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_civoid mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt) 100962306a36Sopenharmony_ci{ 101062306a36Sopenharmony_ci struct mountpoint *old_mp = mnt->mnt_mp; 101162306a36Sopenharmony_ci struct mount *old_parent = mnt->mnt_parent; 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci list_del_init(&mnt->mnt_child); 101462306a36Sopenharmony_ci hlist_del_init(&mnt->mnt_mp_list); 101562306a36Sopenharmony_ci hlist_del_init_rcu(&mnt->mnt_hash); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci attach_mnt(mnt, parent, mp, false); 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci put_mountpoint(old_mp); 102062306a36Sopenharmony_ci mnt_add_count(old_parent, -1); 102162306a36Sopenharmony_ci} 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci/* 102462306a36Sopenharmony_ci * vfsmount lock must be held for write 102562306a36Sopenharmony_ci */ 102662306a36Sopenharmony_cistatic void commit_tree(struct mount *mnt) 102762306a36Sopenharmony_ci{ 102862306a36Sopenharmony_ci struct mount *parent = mnt->mnt_parent; 102962306a36Sopenharmony_ci struct mount *m; 103062306a36Sopenharmony_ci LIST_HEAD(head); 103162306a36Sopenharmony_ci struct mnt_namespace *n = parent->mnt_ns; 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci BUG_ON(parent == mnt); 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci list_add_tail(&head, &mnt->mnt_list); 103662306a36Sopenharmony_ci list_for_each_entry(m, &head, mnt_list) 103762306a36Sopenharmony_ci m->mnt_ns = n; 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_ci list_splice(&head, n->list.prev); 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci n->mounts += n->pending_mounts; 104262306a36Sopenharmony_ci n->pending_mounts = 0; 104362306a36Sopenharmony_ci 104462306a36Sopenharmony_ci __attach_mnt(mnt, parent); 104562306a36Sopenharmony_ci touch_mnt_namespace(n); 104662306a36Sopenharmony_ci} 104762306a36Sopenharmony_ci 104862306a36Sopenharmony_cistatic struct mount *next_mnt(struct mount *p, struct mount *root) 104962306a36Sopenharmony_ci{ 105062306a36Sopenharmony_ci struct list_head *next = p->mnt_mounts.next; 105162306a36Sopenharmony_ci if (next == &p->mnt_mounts) { 105262306a36Sopenharmony_ci while (1) { 105362306a36Sopenharmony_ci if (p == root) 105462306a36Sopenharmony_ci return NULL; 105562306a36Sopenharmony_ci next = p->mnt_child.next; 105662306a36Sopenharmony_ci if (next != &p->mnt_parent->mnt_mounts) 105762306a36Sopenharmony_ci break; 105862306a36Sopenharmony_ci p = p->mnt_parent; 105962306a36Sopenharmony_ci } 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci return list_entry(next, struct mount, mnt_child); 106262306a36Sopenharmony_ci} 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_cistatic struct mount *skip_mnt_tree(struct mount *p) 106562306a36Sopenharmony_ci{ 106662306a36Sopenharmony_ci struct list_head *prev = p->mnt_mounts.prev; 106762306a36Sopenharmony_ci while (prev != &p->mnt_mounts) { 106862306a36Sopenharmony_ci p = list_entry(prev, struct mount, mnt_child); 106962306a36Sopenharmony_ci prev = p->mnt_mounts.prev; 107062306a36Sopenharmony_ci } 107162306a36Sopenharmony_ci return p; 107262306a36Sopenharmony_ci} 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci/** 107562306a36Sopenharmony_ci * vfs_create_mount - Create a mount for a configured superblock 107662306a36Sopenharmony_ci * @fc: The configuration context with the superblock attached 107762306a36Sopenharmony_ci * 107862306a36Sopenharmony_ci * Create a mount to an already configured superblock. If necessary, the 107962306a36Sopenharmony_ci * caller should invoke vfs_get_tree() before calling this. 108062306a36Sopenharmony_ci * 108162306a36Sopenharmony_ci * Note that this does not attach the mount to anything. 108262306a36Sopenharmony_ci */ 108362306a36Sopenharmony_cistruct vfsmount *vfs_create_mount(struct fs_context *fc) 108462306a36Sopenharmony_ci{ 108562306a36Sopenharmony_ci struct mount *mnt; 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci if (!fc->root) 108862306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci mnt = alloc_vfsmnt(fc->source ?: "none"); 109162306a36Sopenharmony_ci if (!mnt) 109262306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci if (fc->sb_flags & SB_KERNMOUNT) 109562306a36Sopenharmony_ci mnt->mnt.mnt_flags = MNT_INTERNAL; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci atomic_inc(&fc->root->d_sb->s_active); 109862306a36Sopenharmony_ci mnt->mnt.mnt_sb = fc->root->d_sb; 109962306a36Sopenharmony_ci mnt->mnt.mnt_root = dget(fc->root); 110062306a36Sopenharmony_ci mnt->mnt_mountpoint = mnt->mnt.mnt_root; 110162306a36Sopenharmony_ci mnt->mnt_parent = mnt; 110262306a36Sopenharmony_ci 110362306a36Sopenharmony_ci lock_mount_hash(); 110462306a36Sopenharmony_ci list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts); 110562306a36Sopenharmony_ci unlock_mount_hash(); 110662306a36Sopenharmony_ci return &mnt->mnt; 110762306a36Sopenharmony_ci} 110862306a36Sopenharmony_ciEXPORT_SYMBOL(vfs_create_mount); 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_cistruct vfsmount *fc_mount(struct fs_context *fc) 111162306a36Sopenharmony_ci{ 111262306a36Sopenharmony_ci int err = vfs_get_tree(fc); 111362306a36Sopenharmony_ci if (!err) { 111462306a36Sopenharmony_ci up_write(&fc->root->d_sb->s_umount); 111562306a36Sopenharmony_ci return vfs_create_mount(fc); 111662306a36Sopenharmony_ci } 111762306a36Sopenharmony_ci return ERR_PTR(err); 111862306a36Sopenharmony_ci} 111962306a36Sopenharmony_ciEXPORT_SYMBOL(fc_mount); 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_cistruct vfsmount *vfs_kern_mount(struct file_system_type *type, 112262306a36Sopenharmony_ci int flags, const char *name, 112362306a36Sopenharmony_ci void *data) 112462306a36Sopenharmony_ci{ 112562306a36Sopenharmony_ci struct fs_context *fc; 112662306a36Sopenharmony_ci struct vfsmount *mnt; 112762306a36Sopenharmony_ci int ret = 0; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci if (!type) 113062306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci fc = fs_context_for_mount(type, flags); 113362306a36Sopenharmony_ci if (IS_ERR(fc)) 113462306a36Sopenharmony_ci return ERR_CAST(fc); 113562306a36Sopenharmony_ci 113662306a36Sopenharmony_ci if (name) 113762306a36Sopenharmony_ci ret = vfs_parse_fs_string(fc, "source", 113862306a36Sopenharmony_ci name, strlen(name)); 113962306a36Sopenharmony_ci if (!ret) 114062306a36Sopenharmony_ci ret = parse_monolithic_mount_data(fc, data); 114162306a36Sopenharmony_ci if (!ret) 114262306a36Sopenharmony_ci mnt = fc_mount(fc); 114362306a36Sopenharmony_ci else 114462306a36Sopenharmony_ci mnt = ERR_PTR(ret); 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci put_fs_context(fc); 114762306a36Sopenharmony_ci return mnt; 114862306a36Sopenharmony_ci} 114962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfs_kern_mount); 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_cistruct vfsmount * 115262306a36Sopenharmony_civfs_submount(const struct dentry *mountpoint, struct file_system_type *type, 115362306a36Sopenharmony_ci const char *name, void *data) 115462306a36Sopenharmony_ci{ 115562306a36Sopenharmony_ci /* Until it is worked out how to pass the user namespace 115662306a36Sopenharmony_ci * through from the parent mount to the submount don't support 115762306a36Sopenharmony_ci * unprivileged mounts with submounts. 115862306a36Sopenharmony_ci */ 115962306a36Sopenharmony_ci if (mountpoint->d_sb->s_user_ns != &init_user_ns) 116062306a36Sopenharmony_ci return ERR_PTR(-EPERM); 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci return vfs_kern_mount(type, SB_SUBMOUNT, name, data); 116362306a36Sopenharmony_ci} 116462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vfs_submount); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_cistatic struct mount *clone_mnt(struct mount *old, struct dentry *root, 116762306a36Sopenharmony_ci int flag) 116862306a36Sopenharmony_ci{ 116962306a36Sopenharmony_ci struct super_block *sb = old->mnt.mnt_sb; 117062306a36Sopenharmony_ci struct mount *mnt; 117162306a36Sopenharmony_ci int err; 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci mnt = alloc_vfsmnt(old->mnt_devname); 117462306a36Sopenharmony_ci if (!mnt) 117562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) 117862306a36Sopenharmony_ci mnt->mnt_group_id = 0; /* not a peer of original */ 117962306a36Sopenharmony_ci else 118062306a36Sopenharmony_ci mnt->mnt_group_id = old->mnt_group_id; 118162306a36Sopenharmony_ci 118262306a36Sopenharmony_ci if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { 118362306a36Sopenharmony_ci err = mnt_alloc_group_id(mnt); 118462306a36Sopenharmony_ci if (err) 118562306a36Sopenharmony_ci goto out_free; 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci mnt->mnt.mnt_flags = old->mnt.mnt_flags; 118962306a36Sopenharmony_ci mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci atomic_inc(&sb->s_active); 119262306a36Sopenharmony_ci mnt->mnt.mnt_idmap = mnt_idmap_get(mnt_idmap(&old->mnt)); 119362306a36Sopenharmony_ci 119462306a36Sopenharmony_ci mnt->mnt.mnt_sb = sb; 119562306a36Sopenharmony_ci mnt->mnt.mnt_root = dget(root); 119662306a36Sopenharmony_ci mnt->mnt_mountpoint = mnt->mnt.mnt_root; 119762306a36Sopenharmony_ci mnt->mnt_parent = mnt; 119862306a36Sopenharmony_ci lock_mount_hash(); 119962306a36Sopenharmony_ci list_add_tail(&mnt->mnt_instance, &sb->s_mounts); 120062306a36Sopenharmony_ci unlock_mount_hash(); 120162306a36Sopenharmony_ci 120262306a36Sopenharmony_ci if ((flag & CL_SLAVE) || 120362306a36Sopenharmony_ci ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { 120462306a36Sopenharmony_ci list_add(&mnt->mnt_slave, &old->mnt_slave_list); 120562306a36Sopenharmony_ci mnt->mnt_master = old; 120662306a36Sopenharmony_ci CLEAR_MNT_SHARED(mnt); 120762306a36Sopenharmony_ci } else if (!(flag & CL_PRIVATE)) { 120862306a36Sopenharmony_ci if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) 120962306a36Sopenharmony_ci list_add(&mnt->mnt_share, &old->mnt_share); 121062306a36Sopenharmony_ci if (IS_MNT_SLAVE(old)) 121162306a36Sopenharmony_ci list_add(&mnt->mnt_slave, &old->mnt_slave); 121262306a36Sopenharmony_ci mnt->mnt_master = old->mnt_master; 121362306a36Sopenharmony_ci } else { 121462306a36Sopenharmony_ci CLEAR_MNT_SHARED(mnt); 121562306a36Sopenharmony_ci } 121662306a36Sopenharmony_ci if (flag & CL_MAKE_SHARED) 121762306a36Sopenharmony_ci set_mnt_shared(mnt); 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci /* stick the duplicate mount on the same expiry list 122062306a36Sopenharmony_ci * as the original if that was on one */ 122162306a36Sopenharmony_ci if (flag & CL_EXPIRE) { 122262306a36Sopenharmony_ci if (!list_empty(&old->mnt_expire)) 122362306a36Sopenharmony_ci list_add(&mnt->mnt_expire, &old->mnt_expire); 122462306a36Sopenharmony_ci } 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci return mnt; 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci out_free: 122962306a36Sopenharmony_ci mnt_free_id(mnt); 123062306a36Sopenharmony_ci free_vfsmnt(mnt); 123162306a36Sopenharmony_ci return ERR_PTR(err); 123262306a36Sopenharmony_ci} 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_cistatic void cleanup_mnt(struct mount *mnt) 123562306a36Sopenharmony_ci{ 123662306a36Sopenharmony_ci struct hlist_node *p; 123762306a36Sopenharmony_ci struct mount *m; 123862306a36Sopenharmony_ci /* 123962306a36Sopenharmony_ci * The warning here probably indicates that somebody messed 124062306a36Sopenharmony_ci * up a mnt_want/drop_write() pair. If this happens, the 124162306a36Sopenharmony_ci * filesystem was probably unable to make r/w->r/o transitions. 124262306a36Sopenharmony_ci * The locking used to deal with mnt_count decrement provides barriers, 124362306a36Sopenharmony_ci * so mnt_get_writers() below is safe. 124462306a36Sopenharmony_ci */ 124562306a36Sopenharmony_ci WARN_ON(mnt_get_writers(mnt)); 124662306a36Sopenharmony_ci if (unlikely(mnt->mnt_pins.first)) 124762306a36Sopenharmony_ci mnt_pin_kill(mnt); 124862306a36Sopenharmony_ci hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) { 124962306a36Sopenharmony_ci hlist_del(&m->mnt_umount); 125062306a36Sopenharmony_ci mntput(&m->mnt); 125162306a36Sopenharmony_ci } 125262306a36Sopenharmony_ci fsnotify_vfsmount_delete(&mnt->mnt); 125362306a36Sopenharmony_ci dput(mnt->mnt.mnt_root); 125462306a36Sopenharmony_ci deactivate_super(mnt->mnt.mnt_sb); 125562306a36Sopenharmony_ci mnt_free_id(mnt); 125662306a36Sopenharmony_ci call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt); 125762306a36Sopenharmony_ci} 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_cistatic void __cleanup_mnt(struct rcu_head *head) 126062306a36Sopenharmony_ci{ 126162306a36Sopenharmony_ci cleanup_mnt(container_of(head, struct mount, mnt_rcu)); 126262306a36Sopenharmony_ci} 126362306a36Sopenharmony_ci 126462306a36Sopenharmony_cistatic LLIST_HEAD(delayed_mntput_list); 126562306a36Sopenharmony_cistatic void delayed_mntput(struct work_struct *unused) 126662306a36Sopenharmony_ci{ 126762306a36Sopenharmony_ci struct llist_node *node = llist_del_all(&delayed_mntput_list); 126862306a36Sopenharmony_ci struct mount *m, *t; 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci llist_for_each_entry_safe(m, t, node, mnt_llist) 127162306a36Sopenharmony_ci cleanup_mnt(m); 127262306a36Sopenharmony_ci} 127362306a36Sopenharmony_cistatic DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput); 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_cistatic void mntput_no_expire(struct mount *mnt) 127662306a36Sopenharmony_ci{ 127762306a36Sopenharmony_ci LIST_HEAD(list); 127862306a36Sopenharmony_ci int count; 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci rcu_read_lock(); 128162306a36Sopenharmony_ci if (likely(READ_ONCE(mnt->mnt_ns))) { 128262306a36Sopenharmony_ci /* 128362306a36Sopenharmony_ci * Since we don't do lock_mount_hash() here, 128462306a36Sopenharmony_ci * ->mnt_ns can change under us. However, if it's 128562306a36Sopenharmony_ci * non-NULL, then there's a reference that won't 128662306a36Sopenharmony_ci * be dropped until after an RCU delay done after 128762306a36Sopenharmony_ci * turning ->mnt_ns NULL. So if we observe it 128862306a36Sopenharmony_ci * non-NULL under rcu_read_lock(), the reference 128962306a36Sopenharmony_ci * we are dropping is not the final one. 129062306a36Sopenharmony_ci */ 129162306a36Sopenharmony_ci mnt_add_count(mnt, -1); 129262306a36Sopenharmony_ci rcu_read_unlock(); 129362306a36Sopenharmony_ci return; 129462306a36Sopenharmony_ci } 129562306a36Sopenharmony_ci lock_mount_hash(); 129662306a36Sopenharmony_ci /* 129762306a36Sopenharmony_ci * make sure that if __legitimize_mnt() has not seen us grab 129862306a36Sopenharmony_ci * mount_lock, we'll see their refcount increment here. 129962306a36Sopenharmony_ci */ 130062306a36Sopenharmony_ci smp_mb(); 130162306a36Sopenharmony_ci mnt_add_count(mnt, -1); 130262306a36Sopenharmony_ci count = mnt_get_count(mnt); 130362306a36Sopenharmony_ci if (count != 0) { 130462306a36Sopenharmony_ci WARN_ON(count < 0); 130562306a36Sopenharmony_ci rcu_read_unlock(); 130662306a36Sopenharmony_ci unlock_mount_hash(); 130762306a36Sopenharmony_ci return; 130862306a36Sopenharmony_ci } 130962306a36Sopenharmony_ci if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { 131062306a36Sopenharmony_ci rcu_read_unlock(); 131162306a36Sopenharmony_ci unlock_mount_hash(); 131262306a36Sopenharmony_ci return; 131362306a36Sopenharmony_ci } 131462306a36Sopenharmony_ci mnt->mnt.mnt_flags |= MNT_DOOMED; 131562306a36Sopenharmony_ci rcu_read_unlock(); 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci list_del(&mnt->mnt_instance); 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci if (unlikely(!list_empty(&mnt->mnt_mounts))) { 132062306a36Sopenharmony_ci struct mount *p, *tmp; 132162306a36Sopenharmony_ci list_for_each_entry_safe(p, tmp, &mnt->mnt_mounts, mnt_child) { 132262306a36Sopenharmony_ci __put_mountpoint(unhash_mnt(p), &list); 132362306a36Sopenharmony_ci hlist_add_head(&p->mnt_umount, &mnt->mnt_stuck_children); 132462306a36Sopenharmony_ci } 132562306a36Sopenharmony_ci } 132662306a36Sopenharmony_ci unlock_mount_hash(); 132762306a36Sopenharmony_ci shrink_dentry_list(&list); 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { 133062306a36Sopenharmony_ci struct task_struct *task = current; 133162306a36Sopenharmony_ci if (likely(!(task->flags & PF_KTHREAD))) { 133262306a36Sopenharmony_ci init_task_work(&mnt->mnt_rcu, __cleanup_mnt); 133362306a36Sopenharmony_ci if (!task_work_add(task, &mnt->mnt_rcu, TWA_RESUME)) 133462306a36Sopenharmony_ci return; 133562306a36Sopenharmony_ci } 133662306a36Sopenharmony_ci if (llist_add(&mnt->mnt_llist, &delayed_mntput_list)) 133762306a36Sopenharmony_ci schedule_delayed_work(&delayed_mntput_work, 1); 133862306a36Sopenharmony_ci return; 133962306a36Sopenharmony_ci } 134062306a36Sopenharmony_ci cleanup_mnt(mnt); 134162306a36Sopenharmony_ci} 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_civoid mntput(struct vfsmount *mnt) 134462306a36Sopenharmony_ci{ 134562306a36Sopenharmony_ci if (mnt) { 134662306a36Sopenharmony_ci struct mount *m = real_mount(mnt); 134762306a36Sopenharmony_ci /* avoid cacheline pingpong, hope gcc doesn't get "smart" */ 134862306a36Sopenharmony_ci if (unlikely(m->mnt_expiry_mark)) 134962306a36Sopenharmony_ci m->mnt_expiry_mark = 0; 135062306a36Sopenharmony_ci mntput_no_expire(m); 135162306a36Sopenharmony_ci } 135262306a36Sopenharmony_ci} 135362306a36Sopenharmony_ciEXPORT_SYMBOL(mntput); 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_cistruct vfsmount *mntget(struct vfsmount *mnt) 135662306a36Sopenharmony_ci{ 135762306a36Sopenharmony_ci if (mnt) 135862306a36Sopenharmony_ci mnt_add_count(real_mount(mnt), 1); 135962306a36Sopenharmony_ci return mnt; 136062306a36Sopenharmony_ci} 136162306a36Sopenharmony_ciEXPORT_SYMBOL(mntget); 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci/* 136462306a36Sopenharmony_ci * Make a mount point inaccessible to new lookups. 136562306a36Sopenharmony_ci * Because there may still be current users, the caller MUST WAIT 136662306a36Sopenharmony_ci * for an RCU grace period before destroying the mount point. 136762306a36Sopenharmony_ci */ 136862306a36Sopenharmony_civoid mnt_make_shortterm(struct vfsmount *mnt) 136962306a36Sopenharmony_ci{ 137062306a36Sopenharmony_ci if (mnt) 137162306a36Sopenharmony_ci real_mount(mnt)->mnt_ns = NULL; 137262306a36Sopenharmony_ci} 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci/** 137562306a36Sopenharmony_ci * path_is_mountpoint() - Check if path is a mount in the current namespace. 137662306a36Sopenharmony_ci * @path: path to check 137762306a36Sopenharmony_ci * 137862306a36Sopenharmony_ci * d_mountpoint() can only be used reliably to establish if a dentry is 137962306a36Sopenharmony_ci * not mounted in any namespace and that common case is handled inline. 138062306a36Sopenharmony_ci * d_mountpoint() isn't aware of the possibility there may be multiple 138162306a36Sopenharmony_ci * mounts using a given dentry in a different namespace. This function 138262306a36Sopenharmony_ci * checks if the passed in path is a mountpoint rather than the dentry 138362306a36Sopenharmony_ci * alone. 138462306a36Sopenharmony_ci */ 138562306a36Sopenharmony_cibool path_is_mountpoint(const struct path *path) 138662306a36Sopenharmony_ci{ 138762306a36Sopenharmony_ci unsigned seq; 138862306a36Sopenharmony_ci bool res; 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci if (!d_mountpoint(path->dentry)) 139162306a36Sopenharmony_ci return false; 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci rcu_read_lock(); 139462306a36Sopenharmony_ci do { 139562306a36Sopenharmony_ci seq = read_seqbegin(&mount_lock); 139662306a36Sopenharmony_ci res = __path_is_mountpoint(path); 139762306a36Sopenharmony_ci } while (read_seqretry(&mount_lock, seq)); 139862306a36Sopenharmony_ci rcu_read_unlock(); 139962306a36Sopenharmony_ci 140062306a36Sopenharmony_ci return res; 140162306a36Sopenharmony_ci} 140262306a36Sopenharmony_ciEXPORT_SYMBOL(path_is_mountpoint); 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_cistruct vfsmount *mnt_clone_internal(const struct path *path) 140562306a36Sopenharmony_ci{ 140662306a36Sopenharmony_ci struct mount *p; 140762306a36Sopenharmony_ci p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); 140862306a36Sopenharmony_ci if (IS_ERR(p)) 140962306a36Sopenharmony_ci return ERR_CAST(p); 141062306a36Sopenharmony_ci p->mnt.mnt_flags |= MNT_INTERNAL; 141162306a36Sopenharmony_ci return &p->mnt; 141262306a36Sopenharmony_ci} 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 141562306a36Sopenharmony_cistatic struct mount *mnt_list_next(struct mnt_namespace *ns, 141662306a36Sopenharmony_ci struct list_head *p) 141762306a36Sopenharmony_ci{ 141862306a36Sopenharmony_ci struct mount *mnt, *ret = NULL; 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci lock_ns_list(ns); 142162306a36Sopenharmony_ci list_for_each_continue(p, &ns->list) { 142262306a36Sopenharmony_ci mnt = list_entry(p, typeof(*mnt), mnt_list); 142362306a36Sopenharmony_ci if (!mnt_is_cursor(mnt)) { 142462306a36Sopenharmony_ci ret = mnt; 142562306a36Sopenharmony_ci break; 142662306a36Sopenharmony_ci } 142762306a36Sopenharmony_ci } 142862306a36Sopenharmony_ci unlock_ns_list(ns); 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci return ret; 143162306a36Sopenharmony_ci} 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_ci/* iterator; we want it to have access to namespace_sem, thus here... */ 143462306a36Sopenharmony_cistatic void *m_start(struct seq_file *m, loff_t *pos) 143562306a36Sopenharmony_ci{ 143662306a36Sopenharmony_ci struct proc_mounts *p = m->private; 143762306a36Sopenharmony_ci struct list_head *prev; 143862306a36Sopenharmony_ci 143962306a36Sopenharmony_ci down_read(&namespace_sem); 144062306a36Sopenharmony_ci if (!*pos) { 144162306a36Sopenharmony_ci prev = &p->ns->list; 144262306a36Sopenharmony_ci } else { 144362306a36Sopenharmony_ci prev = &p->cursor.mnt_list; 144462306a36Sopenharmony_ci 144562306a36Sopenharmony_ci /* Read after we'd reached the end? */ 144662306a36Sopenharmony_ci if (list_empty(prev)) 144762306a36Sopenharmony_ci return NULL; 144862306a36Sopenharmony_ci } 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci return mnt_list_next(p->ns, prev); 145162306a36Sopenharmony_ci} 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_cistatic void *m_next(struct seq_file *m, void *v, loff_t *pos) 145462306a36Sopenharmony_ci{ 145562306a36Sopenharmony_ci struct proc_mounts *p = m->private; 145662306a36Sopenharmony_ci struct mount *mnt = v; 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci ++*pos; 145962306a36Sopenharmony_ci return mnt_list_next(p->ns, &mnt->mnt_list); 146062306a36Sopenharmony_ci} 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_cistatic void m_stop(struct seq_file *m, void *v) 146362306a36Sopenharmony_ci{ 146462306a36Sopenharmony_ci struct proc_mounts *p = m->private; 146562306a36Sopenharmony_ci struct mount *mnt = v; 146662306a36Sopenharmony_ci 146762306a36Sopenharmony_ci lock_ns_list(p->ns); 146862306a36Sopenharmony_ci if (mnt) 146962306a36Sopenharmony_ci list_move_tail(&p->cursor.mnt_list, &mnt->mnt_list); 147062306a36Sopenharmony_ci else 147162306a36Sopenharmony_ci list_del_init(&p->cursor.mnt_list); 147262306a36Sopenharmony_ci unlock_ns_list(p->ns); 147362306a36Sopenharmony_ci up_read(&namespace_sem); 147462306a36Sopenharmony_ci} 147562306a36Sopenharmony_ci 147662306a36Sopenharmony_cistatic int m_show(struct seq_file *m, void *v) 147762306a36Sopenharmony_ci{ 147862306a36Sopenharmony_ci struct proc_mounts *p = m->private; 147962306a36Sopenharmony_ci struct mount *r = v; 148062306a36Sopenharmony_ci return p->show(m, &r->mnt); 148162306a36Sopenharmony_ci} 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ciconst struct seq_operations mounts_op = { 148462306a36Sopenharmony_ci .start = m_start, 148562306a36Sopenharmony_ci .next = m_next, 148662306a36Sopenharmony_ci .stop = m_stop, 148762306a36Sopenharmony_ci .show = m_show, 148862306a36Sopenharmony_ci}; 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_civoid mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor) 149162306a36Sopenharmony_ci{ 149262306a36Sopenharmony_ci down_read(&namespace_sem); 149362306a36Sopenharmony_ci lock_ns_list(ns); 149462306a36Sopenharmony_ci list_del(&cursor->mnt_list); 149562306a36Sopenharmony_ci unlock_ns_list(ns); 149662306a36Sopenharmony_ci up_read(&namespace_sem); 149762306a36Sopenharmony_ci} 149862306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */ 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci/** 150162306a36Sopenharmony_ci * may_umount_tree - check if a mount tree is busy 150262306a36Sopenharmony_ci * @m: root of mount tree 150362306a36Sopenharmony_ci * 150462306a36Sopenharmony_ci * This is called to check if a tree of mounts has any 150562306a36Sopenharmony_ci * open files, pwds, chroots or sub mounts that are 150662306a36Sopenharmony_ci * busy. 150762306a36Sopenharmony_ci */ 150862306a36Sopenharmony_ciint may_umount_tree(struct vfsmount *m) 150962306a36Sopenharmony_ci{ 151062306a36Sopenharmony_ci struct mount *mnt = real_mount(m); 151162306a36Sopenharmony_ci int actual_refs = 0; 151262306a36Sopenharmony_ci int minimum_refs = 0; 151362306a36Sopenharmony_ci struct mount *p; 151462306a36Sopenharmony_ci BUG_ON(!m); 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci /* write lock needed for mnt_get_count */ 151762306a36Sopenharmony_ci lock_mount_hash(); 151862306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) { 151962306a36Sopenharmony_ci actual_refs += mnt_get_count(p); 152062306a36Sopenharmony_ci minimum_refs += 2; 152162306a36Sopenharmony_ci } 152262306a36Sopenharmony_ci unlock_mount_hash(); 152362306a36Sopenharmony_ci 152462306a36Sopenharmony_ci if (actual_refs > minimum_refs) 152562306a36Sopenharmony_ci return 0; 152662306a36Sopenharmony_ci 152762306a36Sopenharmony_ci return 1; 152862306a36Sopenharmony_ci} 152962306a36Sopenharmony_ci 153062306a36Sopenharmony_ciEXPORT_SYMBOL(may_umount_tree); 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci/** 153362306a36Sopenharmony_ci * may_umount - check if a mount point is busy 153462306a36Sopenharmony_ci * @mnt: root of mount 153562306a36Sopenharmony_ci * 153662306a36Sopenharmony_ci * This is called to check if a mount point has any 153762306a36Sopenharmony_ci * open files, pwds, chroots or sub mounts. If the 153862306a36Sopenharmony_ci * mount has sub mounts this will return busy 153962306a36Sopenharmony_ci * regardless of whether the sub mounts are busy. 154062306a36Sopenharmony_ci * 154162306a36Sopenharmony_ci * Doesn't take quota and stuff into account. IOW, in some cases it will 154262306a36Sopenharmony_ci * give false negatives. The main reason why it's here is that we need 154362306a36Sopenharmony_ci * a non-destructive way to look for easily umountable filesystems. 154462306a36Sopenharmony_ci */ 154562306a36Sopenharmony_ciint may_umount(struct vfsmount *mnt) 154662306a36Sopenharmony_ci{ 154762306a36Sopenharmony_ci int ret = 1; 154862306a36Sopenharmony_ci down_read(&namespace_sem); 154962306a36Sopenharmony_ci lock_mount_hash(); 155062306a36Sopenharmony_ci if (propagate_mount_busy(real_mount(mnt), 2)) 155162306a36Sopenharmony_ci ret = 0; 155262306a36Sopenharmony_ci unlock_mount_hash(); 155362306a36Sopenharmony_ci up_read(&namespace_sem); 155462306a36Sopenharmony_ci return ret; 155562306a36Sopenharmony_ci} 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ciEXPORT_SYMBOL(may_umount); 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_cistatic void namespace_unlock(void) 156062306a36Sopenharmony_ci{ 156162306a36Sopenharmony_ci struct hlist_head head; 156262306a36Sopenharmony_ci struct hlist_node *p; 156362306a36Sopenharmony_ci struct mount *m; 156462306a36Sopenharmony_ci LIST_HEAD(list); 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci hlist_move_list(&unmounted, &head); 156762306a36Sopenharmony_ci list_splice_init(&ex_mountpoints, &list); 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci up_write(&namespace_sem); 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci shrink_dentry_list(&list); 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_ci if (likely(hlist_empty(&head))) 157462306a36Sopenharmony_ci return; 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci synchronize_rcu_expedited(); 157762306a36Sopenharmony_ci 157862306a36Sopenharmony_ci hlist_for_each_entry_safe(m, p, &head, mnt_umount) { 157962306a36Sopenharmony_ci hlist_del(&m->mnt_umount); 158062306a36Sopenharmony_ci mntput(&m->mnt); 158162306a36Sopenharmony_ci } 158262306a36Sopenharmony_ci} 158362306a36Sopenharmony_ci 158462306a36Sopenharmony_cistatic inline void namespace_lock(void) 158562306a36Sopenharmony_ci{ 158662306a36Sopenharmony_ci down_write(&namespace_sem); 158762306a36Sopenharmony_ci} 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_cienum umount_tree_flags { 159062306a36Sopenharmony_ci UMOUNT_SYNC = 1, 159162306a36Sopenharmony_ci UMOUNT_PROPAGATE = 2, 159262306a36Sopenharmony_ci UMOUNT_CONNECTED = 4, 159362306a36Sopenharmony_ci}; 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_cistatic bool disconnect_mount(struct mount *mnt, enum umount_tree_flags how) 159662306a36Sopenharmony_ci{ 159762306a36Sopenharmony_ci /* Leaving mounts connected is only valid for lazy umounts */ 159862306a36Sopenharmony_ci if (how & UMOUNT_SYNC) 159962306a36Sopenharmony_ci return true; 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_ci /* A mount without a parent has nothing to be connected to */ 160262306a36Sopenharmony_ci if (!mnt_has_parent(mnt)) 160362306a36Sopenharmony_ci return true; 160462306a36Sopenharmony_ci 160562306a36Sopenharmony_ci /* Because the reference counting rules change when mounts are 160662306a36Sopenharmony_ci * unmounted and connected, umounted mounts may not be 160762306a36Sopenharmony_ci * connected to mounted mounts. 160862306a36Sopenharmony_ci */ 160962306a36Sopenharmony_ci if (!(mnt->mnt_parent->mnt.mnt_flags & MNT_UMOUNT)) 161062306a36Sopenharmony_ci return true; 161162306a36Sopenharmony_ci 161262306a36Sopenharmony_ci /* Has it been requested that the mount remain connected? */ 161362306a36Sopenharmony_ci if (how & UMOUNT_CONNECTED) 161462306a36Sopenharmony_ci return false; 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci /* Is the mount locked such that it needs to remain connected? */ 161762306a36Sopenharmony_ci if (IS_MNT_LOCKED(mnt)) 161862306a36Sopenharmony_ci return false; 161962306a36Sopenharmony_ci 162062306a36Sopenharmony_ci /* By default disconnect the mount */ 162162306a36Sopenharmony_ci return true; 162262306a36Sopenharmony_ci} 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci/* 162562306a36Sopenharmony_ci * mount_lock must be held 162662306a36Sopenharmony_ci * namespace_sem must be held for write 162762306a36Sopenharmony_ci */ 162862306a36Sopenharmony_cistatic void umount_tree(struct mount *mnt, enum umount_tree_flags how) 162962306a36Sopenharmony_ci{ 163062306a36Sopenharmony_ci LIST_HEAD(tmp_list); 163162306a36Sopenharmony_ci struct mount *p; 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci if (how & UMOUNT_PROPAGATE) 163462306a36Sopenharmony_ci propagate_mount_unlock(mnt); 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci /* Gather the mounts to umount */ 163762306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) { 163862306a36Sopenharmony_ci p->mnt.mnt_flags |= MNT_UMOUNT; 163962306a36Sopenharmony_ci list_move(&p->mnt_list, &tmp_list); 164062306a36Sopenharmony_ci } 164162306a36Sopenharmony_ci 164262306a36Sopenharmony_ci /* Hide the mounts from mnt_mounts */ 164362306a36Sopenharmony_ci list_for_each_entry(p, &tmp_list, mnt_list) { 164462306a36Sopenharmony_ci list_del_init(&p->mnt_child); 164562306a36Sopenharmony_ci } 164662306a36Sopenharmony_ci 164762306a36Sopenharmony_ci /* Add propogated mounts to the tmp_list */ 164862306a36Sopenharmony_ci if (how & UMOUNT_PROPAGATE) 164962306a36Sopenharmony_ci propagate_umount(&tmp_list); 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci while (!list_empty(&tmp_list)) { 165262306a36Sopenharmony_ci struct mnt_namespace *ns; 165362306a36Sopenharmony_ci bool disconnect; 165462306a36Sopenharmony_ci p = list_first_entry(&tmp_list, struct mount, mnt_list); 165562306a36Sopenharmony_ci list_del_init(&p->mnt_expire); 165662306a36Sopenharmony_ci list_del_init(&p->mnt_list); 165762306a36Sopenharmony_ci ns = p->mnt_ns; 165862306a36Sopenharmony_ci if (ns) { 165962306a36Sopenharmony_ci ns->mounts--; 166062306a36Sopenharmony_ci __touch_mnt_namespace(ns); 166162306a36Sopenharmony_ci } 166262306a36Sopenharmony_ci p->mnt_ns = NULL; 166362306a36Sopenharmony_ci if (how & UMOUNT_SYNC) 166462306a36Sopenharmony_ci p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_ci disconnect = disconnect_mount(p, how); 166762306a36Sopenharmony_ci if (mnt_has_parent(p)) { 166862306a36Sopenharmony_ci mnt_add_count(p->mnt_parent, -1); 166962306a36Sopenharmony_ci if (!disconnect) { 167062306a36Sopenharmony_ci /* Don't forget about p */ 167162306a36Sopenharmony_ci list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); 167262306a36Sopenharmony_ci } else { 167362306a36Sopenharmony_ci umount_mnt(p); 167462306a36Sopenharmony_ci } 167562306a36Sopenharmony_ci } 167662306a36Sopenharmony_ci change_mnt_propagation(p, MS_PRIVATE); 167762306a36Sopenharmony_ci if (disconnect) 167862306a36Sopenharmony_ci hlist_add_head(&p->mnt_umount, &unmounted); 167962306a36Sopenharmony_ci } 168062306a36Sopenharmony_ci} 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_cistatic void shrink_submounts(struct mount *mnt); 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_cistatic int do_umount_root(struct super_block *sb) 168562306a36Sopenharmony_ci{ 168662306a36Sopenharmony_ci int ret = 0; 168762306a36Sopenharmony_ci 168862306a36Sopenharmony_ci down_write(&sb->s_umount); 168962306a36Sopenharmony_ci if (!sb_rdonly(sb)) { 169062306a36Sopenharmony_ci struct fs_context *fc; 169162306a36Sopenharmony_ci 169262306a36Sopenharmony_ci fc = fs_context_for_reconfigure(sb->s_root, SB_RDONLY, 169362306a36Sopenharmony_ci SB_RDONLY); 169462306a36Sopenharmony_ci if (IS_ERR(fc)) { 169562306a36Sopenharmony_ci ret = PTR_ERR(fc); 169662306a36Sopenharmony_ci } else { 169762306a36Sopenharmony_ci ret = parse_monolithic_mount_data(fc, NULL); 169862306a36Sopenharmony_ci if (!ret) 169962306a36Sopenharmony_ci ret = reconfigure_super(fc); 170062306a36Sopenharmony_ci put_fs_context(fc); 170162306a36Sopenharmony_ci } 170262306a36Sopenharmony_ci } 170362306a36Sopenharmony_ci up_write(&sb->s_umount); 170462306a36Sopenharmony_ci return ret; 170562306a36Sopenharmony_ci} 170662306a36Sopenharmony_ci 170762306a36Sopenharmony_cistatic int do_umount(struct mount *mnt, int flags) 170862306a36Sopenharmony_ci{ 170962306a36Sopenharmony_ci struct super_block *sb = mnt->mnt.mnt_sb; 171062306a36Sopenharmony_ci int retval; 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_ci retval = security_sb_umount(&mnt->mnt, flags); 171362306a36Sopenharmony_ci if (retval) 171462306a36Sopenharmony_ci return retval; 171562306a36Sopenharmony_ci 171662306a36Sopenharmony_ci /* 171762306a36Sopenharmony_ci * Allow userspace to request a mountpoint be expired rather than 171862306a36Sopenharmony_ci * unmounting unconditionally. Unmount only happens if: 171962306a36Sopenharmony_ci * (1) the mark is already set (the mark is cleared by mntput()) 172062306a36Sopenharmony_ci * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] 172162306a36Sopenharmony_ci */ 172262306a36Sopenharmony_ci if (flags & MNT_EXPIRE) { 172362306a36Sopenharmony_ci if (&mnt->mnt == current->fs->root.mnt || 172462306a36Sopenharmony_ci flags & (MNT_FORCE | MNT_DETACH)) 172562306a36Sopenharmony_ci return -EINVAL; 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci /* 172862306a36Sopenharmony_ci * probably don't strictly need the lock here if we examined 172962306a36Sopenharmony_ci * all race cases, but it's a slowpath. 173062306a36Sopenharmony_ci */ 173162306a36Sopenharmony_ci lock_mount_hash(); 173262306a36Sopenharmony_ci if (mnt_get_count(mnt) != 2) { 173362306a36Sopenharmony_ci unlock_mount_hash(); 173462306a36Sopenharmony_ci return -EBUSY; 173562306a36Sopenharmony_ci } 173662306a36Sopenharmony_ci unlock_mount_hash(); 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci if (!xchg(&mnt->mnt_expiry_mark, 1)) 173962306a36Sopenharmony_ci return -EAGAIN; 174062306a36Sopenharmony_ci } 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci /* 174362306a36Sopenharmony_ci * If we may have to abort operations to get out of this 174462306a36Sopenharmony_ci * mount, and they will themselves hold resources we must 174562306a36Sopenharmony_ci * allow the fs to do things. In the Unix tradition of 174662306a36Sopenharmony_ci * 'Gee thats tricky lets do it in userspace' the umount_begin 174762306a36Sopenharmony_ci * might fail to complete on the first run through as other tasks 174862306a36Sopenharmony_ci * must return, and the like. Thats for the mount program to worry 174962306a36Sopenharmony_ci * about for the moment. 175062306a36Sopenharmony_ci */ 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ci if (flags & MNT_FORCE && sb->s_op->umount_begin) { 175362306a36Sopenharmony_ci sb->s_op->umount_begin(sb); 175462306a36Sopenharmony_ci } 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci /* 175762306a36Sopenharmony_ci * No sense to grab the lock for this test, but test itself looks 175862306a36Sopenharmony_ci * somewhat bogus. Suggestions for better replacement? 175962306a36Sopenharmony_ci * Ho-hum... In principle, we might treat that as umount + switch 176062306a36Sopenharmony_ci * to rootfs. GC would eventually take care of the old vfsmount. 176162306a36Sopenharmony_ci * Actually it makes sense, especially if rootfs would contain a 176262306a36Sopenharmony_ci * /reboot - static binary that would close all descriptors and 176362306a36Sopenharmony_ci * call reboot(9). Then init(8) could umount root and exec /reboot. 176462306a36Sopenharmony_ci */ 176562306a36Sopenharmony_ci if (&mnt->mnt == current->fs->root.mnt && !(flags & MNT_DETACH)) { 176662306a36Sopenharmony_ci /* 176762306a36Sopenharmony_ci * Special case for "unmounting" root ... 176862306a36Sopenharmony_ci * we just try to remount it readonly. 176962306a36Sopenharmony_ci */ 177062306a36Sopenharmony_ci if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) 177162306a36Sopenharmony_ci return -EPERM; 177262306a36Sopenharmony_ci return do_umount_root(sb); 177362306a36Sopenharmony_ci } 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci namespace_lock(); 177662306a36Sopenharmony_ci lock_mount_hash(); 177762306a36Sopenharmony_ci 177862306a36Sopenharmony_ci /* Recheck MNT_LOCKED with the locks held */ 177962306a36Sopenharmony_ci retval = -EINVAL; 178062306a36Sopenharmony_ci if (mnt->mnt.mnt_flags & MNT_LOCKED) 178162306a36Sopenharmony_ci goto out; 178262306a36Sopenharmony_ci 178362306a36Sopenharmony_ci event++; 178462306a36Sopenharmony_ci if (flags & MNT_DETACH) { 178562306a36Sopenharmony_ci if (!list_empty(&mnt->mnt_list)) 178662306a36Sopenharmony_ci umount_tree(mnt, UMOUNT_PROPAGATE); 178762306a36Sopenharmony_ci retval = 0; 178862306a36Sopenharmony_ci } else { 178962306a36Sopenharmony_ci shrink_submounts(mnt); 179062306a36Sopenharmony_ci retval = -EBUSY; 179162306a36Sopenharmony_ci if (!propagate_mount_busy(mnt, 2)) { 179262306a36Sopenharmony_ci if (!list_empty(&mnt->mnt_list)) 179362306a36Sopenharmony_ci umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); 179462306a36Sopenharmony_ci retval = 0; 179562306a36Sopenharmony_ci } 179662306a36Sopenharmony_ci } 179762306a36Sopenharmony_ciout: 179862306a36Sopenharmony_ci unlock_mount_hash(); 179962306a36Sopenharmony_ci namespace_unlock(); 180062306a36Sopenharmony_ci return retval; 180162306a36Sopenharmony_ci} 180262306a36Sopenharmony_ci 180362306a36Sopenharmony_ci/* 180462306a36Sopenharmony_ci * __detach_mounts - lazily unmount all mounts on the specified dentry 180562306a36Sopenharmony_ci * 180662306a36Sopenharmony_ci * During unlink, rmdir, and d_drop it is possible to loose the path 180762306a36Sopenharmony_ci * to an existing mountpoint, and wind up leaking the mount. 180862306a36Sopenharmony_ci * detach_mounts allows lazily unmounting those mounts instead of 180962306a36Sopenharmony_ci * leaking them. 181062306a36Sopenharmony_ci * 181162306a36Sopenharmony_ci * The caller may hold dentry->d_inode->i_mutex. 181262306a36Sopenharmony_ci */ 181362306a36Sopenharmony_civoid __detach_mounts(struct dentry *dentry) 181462306a36Sopenharmony_ci{ 181562306a36Sopenharmony_ci struct mountpoint *mp; 181662306a36Sopenharmony_ci struct mount *mnt; 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci namespace_lock(); 181962306a36Sopenharmony_ci lock_mount_hash(); 182062306a36Sopenharmony_ci mp = lookup_mountpoint(dentry); 182162306a36Sopenharmony_ci if (!mp) 182262306a36Sopenharmony_ci goto out_unlock; 182362306a36Sopenharmony_ci 182462306a36Sopenharmony_ci event++; 182562306a36Sopenharmony_ci while (!hlist_empty(&mp->m_list)) { 182662306a36Sopenharmony_ci mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list); 182762306a36Sopenharmony_ci if (mnt->mnt.mnt_flags & MNT_UMOUNT) { 182862306a36Sopenharmony_ci umount_mnt(mnt); 182962306a36Sopenharmony_ci hlist_add_head(&mnt->mnt_umount, &unmounted); 183062306a36Sopenharmony_ci } 183162306a36Sopenharmony_ci else umount_tree(mnt, UMOUNT_CONNECTED); 183262306a36Sopenharmony_ci } 183362306a36Sopenharmony_ci put_mountpoint(mp); 183462306a36Sopenharmony_ciout_unlock: 183562306a36Sopenharmony_ci unlock_mount_hash(); 183662306a36Sopenharmony_ci namespace_unlock(); 183762306a36Sopenharmony_ci} 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci/* 184062306a36Sopenharmony_ci * Is the caller allowed to modify his namespace? 184162306a36Sopenharmony_ci */ 184262306a36Sopenharmony_cibool may_mount(void) 184362306a36Sopenharmony_ci{ 184462306a36Sopenharmony_ci return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN); 184562306a36Sopenharmony_ci} 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci/** 184862306a36Sopenharmony_ci * path_mounted - check whether path is mounted 184962306a36Sopenharmony_ci * @path: path to check 185062306a36Sopenharmony_ci * 185162306a36Sopenharmony_ci * Determine whether @path refers to the root of a mount. 185262306a36Sopenharmony_ci * 185362306a36Sopenharmony_ci * Return: true if @path is the root of a mount, false if not. 185462306a36Sopenharmony_ci */ 185562306a36Sopenharmony_cistatic inline bool path_mounted(const struct path *path) 185662306a36Sopenharmony_ci{ 185762306a36Sopenharmony_ci return path->mnt->mnt_root == path->dentry; 185862306a36Sopenharmony_ci} 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_cistatic void warn_mandlock(void) 186162306a36Sopenharmony_ci{ 186262306a36Sopenharmony_ci pr_warn_once("=======================================================\n" 186362306a36Sopenharmony_ci "WARNING: The mand mount option has been deprecated and\n" 186462306a36Sopenharmony_ci " and is ignored by this kernel. Remove the mand\n" 186562306a36Sopenharmony_ci " option from the mount to silence this warning.\n" 186662306a36Sopenharmony_ci "=======================================================\n"); 186762306a36Sopenharmony_ci} 186862306a36Sopenharmony_ci 186962306a36Sopenharmony_cistatic int can_umount(const struct path *path, int flags) 187062306a36Sopenharmony_ci{ 187162306a36Sopenharmony_ci struct mount *mnt = real_mount(path->mnt); 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci if (!may_mount()) 187462306a36Sopenharmony_ci return -EPERM; 187562306a36Sopenharmony_ci if (!path_mounted(path)) 187662306a36Sopenharmony_ci return -EINVAL; 187762306a36Sopenharmony_ci if (!check_mnt(mnt)) 187862306a36Sopenharmony_ci return -EINVAL; 187962306a36Sopenharmony_ci if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ 188062306a36Sopenharmony_ci return -EINVAL; 188162306a36Sopenharmony_ci if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) 188262306a36Sopenharmony_ci return -EPERM; 188362306a36Sopenharmony_ci return 0; 188462306a36Sopenharmony_ci} 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci// caller is responsible for flags being sane 188762306a36Sopenharmony_ciint path_umount(struct path *path, int flags) 188862306a36Sopenharmony_ci{ 188962306a36Sopenharmony_ci struct mount *mnt = real_mount(path->mnt); 189062306a36Sopenharmony_ci int ret; 189162306a36Sopenharmony_ci 189262306a36Sopenharmony_ci ret = can_umount(path, flags); 189362306a36Sopenharmony_ci if (!ret) 189462306a36Sopenharmony_ci ret = do_umount(mnt, flags); 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 189762306a36Sopenharmony_ci dput(path->dentry); 189862306a36Sopenharmony_ci mntput_no_expire(mnt); 189962306a36Sopenharmony_ci return ret; 190062306a36Sopenharmony_ci} 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_cistatic int ksys_umount(char __user *name, int flags) 190362306a36Sopenharmony_ci{ 190462306a36Sopenharmony_ci int lookup_flags = LOOKUP_MOUNTPOINT; 190562306a36Sopenharmony_ci struct path path; 190662306a36Sopenharmony_ci int ret; 190762306a36Sopenharmony_ci 190862306a36Sopenharmony_ci // basic validity checks done first 190962306a36Sopenharmony_ci if (flags & ~(MNT_FORCE | MNT_DETACH | MNT_EXPIRE | UMOUNT_NOFOLLOW)) 191062306a36Sopenharmony_ci return -EINVAL; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci if (!(flags & UMOUNT_NOFOLLOW)) 191362306a36Sopenharmony_ci lookup_flags |= LOOKUP_FOLLOW; 191462306a36Sopenharmony_ci ret = user_path_at(AT_FDCWD, name, lookup_flags, &path); 191562306a36Sopenharmony_ci if (ret) 191662306a36Sopenharmony_ci return ret; 191762306a36Sopenharmony_ci return path_umount(&path, flags); 191862306a36Sopenharmony_ci} 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_ciSYSCALL_DEFINE2(umount, char __user *, name, int, flags) 192162306a36Sopenharmony_ci{ 192262306a36Sopenharmony_ci return ksys_umount(name, flags); 192362306a36Sopenharmony_ci} 192462306a36Sopenharmony_ci 192562306a36Sopenharmony_ci#ifdef __ARCH_WANT_SYS_OLDUMOUNT 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci/* 192862306a36Sopenharmony_ci * The 2.0 compatible umount. No flags. 192962306a36Sopenharmony_ci */ 193062306a36Sopenharmony_ciSYSCALL_DEFINE1(oldumount, char __user *, name) 193162306a36Sopenharmony_ci{ 193262306a36Sopenharmony_ci return ksys_umount(name, 0); 193362306a36Sopenharmony_ci} 193462306a36Sopenharmony_ci 193562306a36Sopenharmony_ci#endif 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_cistatic bool is_mnt_ns_file(struct dentry *dentry) 193862306a36Sopenharmony_ci{ 193962306a36Sopenharmony_ci /* Is this a proxy for a mount namespace? */ 194062306a36Sopenharmony_ci return dentry->d_op == &ns_dentry_operations && 194162306a36Sopenharmony_ci dentry->d_fsdata == &mntns_operations; 194262306a36Sopenharmony_ci} 194362306a36Sopenharmony_ci 194462306a36Sopenharmony_cistatic struct mnt_namespace *to_mnt_ns(struct ns_common *ns) 194562306a36Sopenharmony_ci{ 194662306a36Sopenharmony_ci return container_of(ns, struct mnt_namespace, ns); 194762306a36Sopenharmony_ci} 194862306a36Sopenharmony_ci 194962306a36Sopenharmony_cistruct ns_common *from_mnt_ns(struct mnt_namespace *mnt) 195062306a36Sopenharmony_ci{ 195162306a36Sopenharmony_ci return &mnt->ns; 195262306a36Sopenharmony_ci} 195362306a36Sopenharmony_ci 195462306a36Sopenharmony_cistatic bool mnt_ns_loop(struct dentry *dentry) 195562306a36Sopenharmony_ci{ 195662306a36Sopenharmony_ci /* Could bind mounting the mount namespace inode cause a 195762306a36Sopenharmony_ci * mount namespace loop? 195862306a36Sopenharmony_ci */ 195962306a36Sopenharmony_ci struct mnt_namespace *mnt_ns; 196062306a36Sopenharmony_ci if (!is_mnt_ns_file(dentry)) 196162306a36Sopenharmony_ci return false; 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); 196462306a36Sopenharmony_ci return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; 196562306a36Sopenharmony_ci} 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_cistruct mount *copy_tree(struct mount *mnt, struct dentry *dentry, 196862306a36Sopenharmony_ci int flag) 196962306a36Sopenharmony_ci{ 197062306a36Sopenharmony_ci struct mount *res, *p, *q, *r, *parent; 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_ci if (!(flag & CL_COPY_UNBINDABLE) && IS_MNT_UNBINDABLE(mnt)) 197362306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 197462306a36Sopenharmony_ci 197562306a36Sopenharmony_ci if (!(flag & CL_COPY_MNT_NS_FILE) && is_mnt_ns_file(dentry)) 197662306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci res = q = clone_mnt(mnt, dentry, flag); 197962306a36Sopenharmony_ci if (IS_ERR(q)) 198062306a36Sopenharmony_ci return q; 198162306a36Sopenharmony_ci 198262306a36Sopenharmony_ci q->mnt_mountpoint = mnt->mnt_mountpoint; 198362306a36Sopenharmony_ci 198462306a36Sopenharmony_ci p = mnt; 198562306a36Sopenharmony_ci list_for_each_entry(r, &mnt->mnt_mounts, mnt_child) { 198662306a36Sopenharmony_ci struct mount *s; 198762306a36Sopenharmony_ci if (!is_subdir(r->mnt_mountpoint, dentry)) 198862306a36Sopenharmony_ci continue; 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci for (s = r; s; s = next_mnt(s, r)) { 199162306a36Sopenharmony_ci if (!(flag & CL_COPY_UNBINDABLE) && 199262306a36Sopenharmony_ci IS_MNT_UNBINDABLE(s)) { 199362306a36Sopenharmony_ci if (s->mnt.mnt_flags & MNT_LOCKED) { 199462306a36Sopenharmony_ci /* Both unbindable and locked. */ 199562306a36Sopenharmony_ci q = ERR_PTR(-EPERM); 199662306a36Sopenharmony_ci goto out; 199762306a36Sopenharmony_ci } else { 199862306a36Sopenharmony_ci s = skip_mnt_tree(s); 199962306a36Sopenharmony_ci continue; 200062306a36Sopenharmony_ci } 200162306a36Sopenharmony_ci } 200262306a36Sopenharmony_ci if (!(flag & CL_COPY_MNT_NS_FILE) && 200362306a36Sopenharmony_ci is_mnt_ns_file(s->mnt.mnt_root)) { 200462306a36Sopenharmony_ci s = skip_mnt_tree(s); 200562306a36Sopenharmony_ci continue; 200662306a36Sopenharmony_ci } 200762306a36Sopenharmony_ci while (p != s->mnt_parent) { 200862306a36Sopenharmony_ci p = p->mnt_parent; 200962306a36Sopenharmony_ci q = q->mnt_parent; 201062306a36Sopenharmony_ci } 201162306a36Sopenharmony_ci p = s; 201262306a36Sopenharmony_ci parent = q; 201362306a36Sopenharmony_ci q = clone_mnt(p, p->mnt.mnt_root, flag); 201462306a36Sopenharmony_ci if (IS_ERR(q)) 201562306a36Sopenharmony_ci goto out; 201662306a36Sopenharmony_ci lock_mount_hash(); 201762306a36Sopenharmony_ci list_add_tail(&q->mnt_list, &res->mnt_list); 201862306a36Sopenharmony_ci attach_mnt(q, parent, p->mnt_mp, false); 201962306a36Sopenharmony_ci unlock_mount_hash(); 202062306a36Sopenharmony_ci } 202162306a36Sopenharmony_ci } 202262306a36Sopenharmony_ci return res; 202362306a36Sopenharmony_ciout: 202462306a36Sopenharmony_ci if (res) { 202562306a36Sopenharmony_ci lock_mount_hash(); 202662306a36Sopenharmony_ci umount_tree(res, UMOUNT_SYNC); 202762306a36Sopenharmony_ci unlock_mount_hash(); 202862306a36Sopenharmony_ci } 202962306a36Sopenharmony_ci return q; 203062306a36Sopenharmony_ci} 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci/* Caller should check returned pointer for errors */ 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_cistruct vfsmount *collect_mounts(const struct path *path) 203562306a36Sopenharmony_ci{ 203662306a36Sopenharmony_ci struct mount *tree; 203762306a36Sopenharmony_ci namespace_lock(); 203862306a36Sopenharmony_ci if (!check_mnt(real_mount(path->mnt))) 203962306a36Sopenharmony_ci tree = ERR_PTR(-EINVAL); 204062306a36Sopenharmony_ci else 204162306a36Sopenharmony_ci tree = copy_tree(real_mount(path->mnt), path->dentry, 204262306a36Sopenharmony_ci CL_COPY_ALL | CL_PRIVATE); 204362306a36Sopenharmony_ci namespace_unlock(); 204462306a36Sopenharmony_ci if (IS_ERR(tree)) 204562306a36Sopenharmony_ci return ERR_CAST(tree); 204662306a36Sopenharmony_ci return &tree->mnt; 204762306a36Sopenharmony_ci} 204862306a36Sopenharmony_ci 204962306a36Sopenharmony_cistatic void free_mnt_ns(struct mnt_namespace *); 205062306a36Sopenharmony_cistatic struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool); 205162306a36Sopenharmony_ci 205262306a36Sopenharmony_civoid dissolve_on_fput(struct vfsmount *mnt) 205362306a36Sopenharmony_ci{ 205462306a36Sopenharmony_ci struct mnt_namespace *ns; 205562306a36Sopenharmony_ci namespace_lock(); 205662306a36Sopenharmony_ci lock_mount_hash(); 205762306a36Sopenharmony_ci ns = real_mount(mnt)->mnt_ns; 205862306a36Sopenharmony_ci if (ns) { 205962306a36Sopenharmony_ci if (is_anon_ns(ns)) 206062306a36Sopenharmony_ci umount_tree(real_mount(mnt), UMOUNT_CONNECTED); 206162306a36Sopenharmony_ci else 206262306a36Sopenharmony_ci ns = NULL; 206362306a36Sopenharmony_ci } 206462306a36Sopenharmony_ci unlock_mount_hash(); 206562306a36Sopenharmony_ci namespace_unlock(); 206662306a36Sopenharmony_ci if (ns) 206762306a36Sopenharmony_ci free_mnt_ns(ns); 206862306a36Sopenharmony_ci} 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_civoid drop_collected_mounts(struct vfsmount *mnt) 207162306a36Sopenharmony_ci{ 207262306a36Sopenharmony_ci namespace_lock(); 207362306a36Sopenharmony_ci lock_mount_hash(); 207462306a36Sopenharmony_ci umount_tree(real_mount(mnt), 0); 207562306a36Sopenharmony_ci unlock_mount_hash(); 207662306a36Sopenharmony_ci namespace_unlock(); 207762306a36Sopenharmony_ci} 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_cistatic bool has_locked_children(struct mount *mnt, struct dentry *dentry) 208062306a36Sopenharmony_ci{ 208162306a36Sopenharmony_ci struct mount *child; 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { 208462306a36Sopenharmony_ci if (!is_subdir(child->mnt_mountpoint, dentry)) 208562306a36Sopenharmony_ci continue; 208662306a36Sopenharmony_ci 208762306a36Sopenharmony_ci if (child->mnt.mnt_flags & MNT_LOCKED) 208862306a36Sopenharmony_ci return true; 208962306a36Sopenharmony_ci } 209062306a36Sopenharmony_ci return false; 209162306a36Sopenharmony_ci} 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci/** 209462306a36Sopenharmony_ci * clone_private_mount - create a private clone of a path 209562306a36Sopenharmony_ci * @path: path to clone 209662306a36Sopenharmony_ci * 209762306a36Sopenharmony_ci * This creates a new vfsmount, which will be the clone of @path. The new mount 209862306a36Sopenharmony_ci * will not be attached anywhere in the namespace and will be private (i.e. 209962306a36Sopenharmony_ci * changes to the originating mount won't be propagated into this). 210062306a36Sopenharmony_ci * 210162306a36Sopenharmony_ci * Release with mntput(). 210262306a36Sopenharmony_ci */ 210362306a36Sopenharmony_cistruct vfsmount *clone_private_mount(const struct path *path) 210462306a36Sopenharmony_ci{ 210562306a36Sopenharmony_ci struct mount *old_mnt = real_mount(path->mnt); 210662306a36Sopenharmony_ci struct mount *new_mnt; 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci down_read(&namespace_sem); 210962306a36Sopenharmony_ci if (IS_MNT_UNBINDABLE(old_mnt)) 211062306a36Sopenharmony_ci goto invalid; 211162306a36Sopenharmony_ci 211262306a36Sopenharmony_ci if (!check_mnt(old_mnt)) 211362306a36Sopenharmony_ci goto invalid; 211462306a36Sopenharmony_ci 211562306a36Sopenharmony_ci if (has_locked_children(old_mnt, path->dentry)) 211662306a36Sopenharmony_ci goto invalid; 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); 211962306a36Sopenharmony_ci up_read(&namespace_sem); 212062306a36Sopenharmony_ci 212162306a36Sopenharmony_ci if (IS_ERR(new_mnt)) 212262306a36Sopenharmony_ci return ERR_CAST(new_mnt); 212362306a36Sopenharmony_ci 212462306a36Sopenharmony_ci /* Longterm mount to be removed by kern_unmount*() */ 212562306a36Sopenharmony_ci new_mnt->mnt_ns = MNT_NS_INTERNAL; 212662306a36Sopenharmony_ci 212762306a36Sopenharmony_ci return &new_mnt->mnt; 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ciinvalid: 213062306a36Sopenharmony_ci up_read(&namespace_sem); 213162306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 213262306a36Sopenharmony_ci} 213362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(clone_private_mount); 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ciint iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, 213662306a36Sopenharmony_ci struct vfsmount *root) 213762306a36Sopenharmony_ci{ 213862306a36Sopenharmony_ci struct mount *mnt; 213962306a36Sopenharmony_ci int res = f(root, arg); 214062306a36Sopenharmony_ci if (res) 214162306a36Sopenharmony_ci return res; 214262306a36Sopenharmony_ci list_for_each_entry(mnt, &real_mount(root)->mnt_list, mnt_list) { 214362306a36Sopenharmony_ci res = f(&mnt->mnt, arg); 214462306a36Sopenharmony_ci if (res) 214562306a36Sopenharmony_ci return res; 214662306a36Sopenharmony_ci } 214762306a36Sopenharmony_ci return 0; 214862306a36Sopenharmony_ci} 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_cistatic void lock_mnt_tree(struct mount *mnt) 215162306a36Sopenharmony_ci{ 215262306a36Sopenharmony_ci struct mount *p; 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) { 215562306a36Sopenharmony_ci int flags = p->mnt.mnt_flags; 215662306a36Sopenharmony_ci /* Don't allow unprivileged users to change mount flags */ 215762306a36Sopenharmony_ci flags |= MNT_LOCK_ATIME; 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci if (flags & MNT_READONLY) 216062306a36Sopenharmony_ci flags |= MNT_LOCK_READONLY; 216162306a36Sopenharmony_ci 216262306a36Sopenharmony_ci if (flags & MNT_NODEV) 216362306a36Sopenharmony_ci flags |= MNT_LOCK_NODEV; 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci if (flags & MNT_NOSUID) 216662306a36Sopenharmony_ci flags |= MNT_LOCK_NOSUID; 216762306a36Sopenharmony_ci 216862306a36Sopenharmony_ci if (flags & MNT_NOEXEC) 216962306a36Sopenharmony_ci flags |= MNT_LOCK_NOEXEC; 217062306a36Sopenharmony_ci /* Don't allow unprivileged users to reveal what is under a mount */ 217162306a36Sopenharmony_ci if (list_empty(&p->mnt_expire)) 217262306a36Sopenharmony_ci flags |= MNT_LOCKED; 217362306a36Sopenharmony_ci p->mnt.mnt_flags = flags; 217462306a36Sopenharmony_ci } 217562306a36Sopenharmony_ci} 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_cistatic void cleanup_group_ids(struct mount *mnt, struct mount *end) 217862306a36Sopenharmony_ci{ 217962306a36Sopenharmony_ci struct mount *p; 218062306a36Sopenharmony_ci 218162306a36Sopenharmony_ci for (p = mnt; p != end; p = next_mnt(p, mnt)) { 218262306a36Sopenharmony_ci if (p->mnt_group_id && !IS_MNT_SHARED(p)) 218362306a36Sopenharmony_ci mnt_release_group_id(p); 218462306a36Sopenharmony_ci } 218562306a36Sopenharmony_ci} 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_cistatic int invent_group_ids(struct mount *mnt, bool recurse) 218862306a36Sopenharmony_ci{ 218962306a36Sopenharmony_ci struct mount *p; 219062306a36Sopenharmony_ci 219162306a36Sopenharmony_ci for (p = mnt; p; p = recurse ? next_mnt(p, mnt) : NULL) { 219262306a36Sopenharmony_ci if (!p->mnt_group_id && !IS_MNT_SHARED(p)) { 219362306a36Sopenharmony_ci int err = mnt_alloc_group_id(p); 219462306a36Sopenharmony_ci if (err) { 219562306a36Sopenharmony_ci cleanup_group_ids(mnt, p); 219662306a36Sopenharmony_ci return err; 219762306a36Sopenharmony_ci } 219862306a36Sopenharmony_ci } 219962306a36Sopenharmony_ci } 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci return 0; 220262306a36Sopenharmony_ci} 220362306a36Sopenharmony_ci 220462306a36Sopenharmony_ciint count_mounts(struct mnt_namespace *ns, struct mount *mnt) 220562306a36Sopenharmony_ci{ 220662306a36Sopenharmony_ci unsigned int max = READ_ONCE(sysctl_mount_max); 220762306a36Sopenharmony_ci unsigned int mounts = 0; 220862306a36Sopenharmony_ci struct mount *p; 220962306a36Sopenharmony_ci 221062306a36Sopenharmony_ci if (ns->mounts >= max) 221162306a36Sopenharmony_ci return -ENOSPC; 221262306a36Sopenharmony_ci max -= ns->mounts; 221362306a36Sopenharmony_ci if (ns->pending_mounts >= max) 221462306a36Sopenharmony_ci return -ENOSPC; 221562306a36Sopenharmony_ci max -= ns->pending_mounts; 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) 221862306a36Sopenharmony_ci mounts++; 221962306a36Sopenharmony_ci 222062306a36Sopenharmony_ci if (mounts > max) 222162306a36Sopenharmony_ci return -ENOSPC; 222262306a36Sopenharmony_ci 222362306a36Sopenharmony_ci ns->pending_mounts += mounts; 222462306a36Sopenharmony_ci return 0; 222562306a36Sopenharmony_ci} 222662306a36Sopenharmony_ci 222762306a36Sopenharmony_cienum mnt_tree_flags_t { 222862306a36Sopenharmony_ci MNT_TREE_MOVE = BIT(0), 222962306a36Sopenharmony_ci MNT_TREE_BENEATH = BIT(1), 223062306a36Sopenharmony_ci}; 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci/** 223362306a36Sopenharmony_ci * attach_recursive_mnt - attach a source mount tree 223462306a36Sopenharmony_ci * @source_mnt: mount tree to be attached 223562306a36Sopenharmony_ci * @top_mnt: mount that @source_mnt will be mounted on or mounted beneath 223662306a36Sopenharmony_ci * @dest_mp: the mountpoint @source_mnt will be mounted at 223762306a36Sopenharmony_ci * @flags: modify how @source_mnt is supposed to be attached 223862306a36Sopenharmony_ci * 223962306a36Sopenharmony_ci * NOTE: in the table below explains the semantics when a source mount 224062306a36Sopenharmony_ci * of a given type is attached to a destination mount of a given type. 224162306a36Sopenharmony_ci * --------------------------------------------------------------------------- 224262306a36Sopenharmony_ci * | BIND MOUNT OPERATION | 224362306a36Sopenharmony_ci * |************************************************************************** 224462306a36Sopenharmony_ci * | source-->| shared | private | slave | unbindable | 224562306a36Sopenharmony_ci * | dest | | | | | 224662306a36Sopenharmony_ci * | | | | | | | 224762306a36Sopenharmony_ci * | v | | | | | 224862306a36Sopenharmony_ci * |************************************************************************** 224962306a36Sopenharmony_ci * | shared | shared (++) | shared (+) | shared(+++)| invalid | 225062306a36Sopenharmony_ci * | | | | | | 225162306a36Sopenharmony_ci * |non-shared| shared (+) | private | slave (*) | invalid | 225262306a36Sopenharmony_ci * *************************************************************************** 225362306a36Sopenharmony_ci * A bind operation clones the source mount and mounts the clone on the 225462306a36Sopenharmony_ci * destination mount. 225562306a36Sopenharmony_ci * 225662306a36Sopenharmony_ci * (++) the cloned mount is propagated to all the mounts in the propagation 225762306a36Sopenharmony_ci * tree of the destination mount and the cloned mount is added to 225862306a36Sopenharmony_ci * the peer group of the source mount. 225962306a36Sopenharmony_ci * (+) the cloned mount is created under the destination mount and is marked 226062306a36Sopenharmony_ci * as shared. The cloned mount is added to the peer group of the source 226162306a36Sopenharmony_ci * mount. 226262306a36Sopenharmony_ci * (+++) the mount is propagated to all the mounts in the propagation tree 226362306a36Sopenharmony_ci * of the destination mount and the cloned mount is made slave 226462306a36Sopenharmony_ci * of the same master as that of the source mount. The cloned mount 226562306a36Sopenharmony_ci * is marked as 'shared and slave'. 226662306a36Sopenharmony_ci * (*) the cloned mount is made a slave of the same master as that of the 226762306a36Sopenharmony_ci * source mount. 226862306a36Sopenharmony_ci * 226962306a36Sopenharmony_ci * --------------------------------------------------------------------------- 227062306a36Sopenharmony_ci * | MOVE MOUNT OPERATION | 227162306a36Sopenharmony_ci * |************************************************************************** 227262306a36Sopenharmony_ci * | source-->| shared | private | slave | unbindable | 227362306a36Sopenharmony_ci * | dest | | | | | 227462306a36Sopenharmony_ci * | | | | | | | 227562306a36Sopenharmony_ci * | v | | | | | 227662306a36Sopenharmony_ci * |************************************************************************** 227762306a36Sopenharmony_ci * | shared | shared (+) | shared (+) | shared(+++) | invalid | 227862306a36Sopenharmony_ci * | | | | | | 227962306a36Sopenharmony_ci * |non-shared| shared (+*) | private | slave (*) | unbindable | 228062306a36Sopenharmony_ci * *************************************************************************** 228162306a36Sopenharmony_ci * 228262306a36Sopenharmony_ci * (+) the mount is moved to the destination. And is then propagated to 228362306a36Sopenharmony_ci * all the mounts in the propagation tree of the destination mount. 228462306a36Sopenharmony_ci * (+*) the mount is moved to the destination. 228562306a36Sopenharmony_ci * (+++) the mount is moved to the destination and is then propagated to 228662306a36Sopenharmony_ci * all the mounts belonging to the destination mount's propagation tree. 228762306a36Sopenharmony_ci * the mount is marked as 'shared and slave'. 228862306a36Sopenharmony_ci * (*) the mount continues to be a slave at the new location. 228962306a36Sopenharmony_ci * 229062306a36Sopenharmony_ci * if the source mount is a tree, the operations explained above is 229162306a36Sopenharmony_ci * applied to each mount in the tree. 229262306a36Sopenharmony_ci * Must be called without spinlocks held, since this function can sleep 229362306a36Sopenharmony_ci * in allocations. 229462306a36Sopenharmony_ci * 229562306a36Sopenharmony_ci * Context: The function expects namespace_lock() to be held. 229662306a36Sopenharmony_ci * Return: If @source_mnt was successfully attached 0 is returned. 229762306a36Sopenharmony_ci * Otherwise a negative error code is returned. 229862306a36Sopenharmony_ci */ 229962306a36Sopenharmony_cistatic int attach_recursive_mnt(struct mount *source_mnt, 230062306a36Sopenharmony_ci struct mount *top_mnt, 230162306a36Sopenharmony_ci struct mountpoint *dest_mp, 230262306a36Sopenharmony_ci enum mnt_tree_flags_t flags) 230362306a36Sopenharmony_ci{ 230462306a36Sopenharmony_ci struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 230562306a36Sopenharmony_ci HLIST_HEAD(tree_list); 230662306a36Sopenharmony_ci struct mnt_namespace *ns = top_mnt->mnt_ns; 230762306a36Sopenharmony_ci struct mountpoint *smp; 230862306a36Sopenharmony_ci struct mount *child, *dest_mnt, *p; 230962306a36Sopenharmony_ci struct hlist_node *n; 231062306a36Sopenharmony_ci int err = 0; 231162306a36Sopenharmony_ci bool moving = flags & MNT_TREE_MOVE, beneath = flags & MNT_TREE_BENEATH; 231262306a36Sopenharmony_ci 231362306a36Sopenharmony_ci /* 231462306a36Sopenharmony_ci * Preallocate a mountpoint in case the new mounts need to be 231562306a36Sopenharmony_ci * mounted beneath mounts on the same mountpoint. 231662306a36Sopenharmony_ci */ 231762306a36Sopenharmony_ci smp = get_mountpoint(source_mnt->mnt.mnt_root); 231862306a36Sopenharmony_ci if (IS_ERR(smp)) 231962306a36Sopenharmony_ci return PTR_ERR(smp); 232062306a36Sopenharmony_ci 232162306a36Sopenharmony_ci /* Is there space to add these mounts to the mount namespace? */ 232262306a36Sopenharmony_ci if (!moving) { 232362306a36Sopenharmony_ci err = count_mounts(ns, source_mnt); 232462306a36Sopenharmony_ci if (err) 232562306a36Sopenharmony_ci goto out; 232662306a36Sopenharmony_ci } 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci if (beneath) 232962306a36Sopenharmony_ci dest_mnt = top_mnt->mnt_parent; 233062306a36Sopenharmony_ci else 233162306a36Sopenharmony_ci dest_mnt = top_mnt; 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci if (IS_MNT_SHARED(dest_mnt)) { 233462306a36Sopenharmony_ci err = invent_group_ids(source_mnt, true); 233562306a36Sopenharmony_ci if (err) 233662306a36Sopenharmony_ci goto out; 233762306a36Sopenharmony_ci err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); 233862306a36Sopenharmony_ci } 233962306a36Sopenharmony_ci lock_mount_hash(); 234062306a36Sopenharmony_ci if (err) 234162306a36Sopenharmony_ci goto out_cleanup_ids; 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci if (IS_MNT_SHARED(dest_mnt)) { 234462306a36Sopenharmony_ci for (p = source_mnt; p; p = next_mnt(p, source_mnt)) 234562306a36Sopenharmony_ci set_mnt_shared(p); 234662306a36Sopenharmony_ci } 234762306a36Sopenharmony_ci 234862306a36Sopenharmony_ci if (moving) { 234962306a36Sopenharmony_ci if (beneath) 235062306a36Sopenharmony_ci dest_mp = smp; 235162306a36Sopenharmony_ci unhash_mnt(source_mnt); 235262306a36Sopenharmony_ci attach_mnt(source_mnt, top_mnt, dest_mp, beneath); 235362306a36Sopenharmony_ci touch_mnt_namespace(source_mnt->mnt_ns); 235462306a36Sopenharmony_ci } else { 235562306a36Sopenharmony_ci if (source_mnt->mnt_ns) { 235662306a36Sopenharmony_ci /* move from anon - the caller will destroy */ 235762306a36Sopenharmony_ci list_del_init(&source_mnt->mnt_ns->list); 235862306a36Sopenharmony_ci } 235962306a36Sopenharmony_ci if (beneath) 236062306a36Sopenharmony_ci mnt_set_mountpoint_beneath(source_mnt, top_mnt, smp); 236162306a36Sopenharmony_ci else 236262306a36Sopenharmony_ci mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); 236362306a36Sopenharmony_ci commit_tree(source_mnt); 236462306a36Sopenharmony_ci } 236562306a36Sopenharmony_ci 236662306a36Sopenharmony_ci hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { 236762306a36Sopenharmony_ci struct mount *q; 236862306a36Sopenharmony_ci hlist_del_init(&child->mnt_hash); 236962306a36Sopenharmony_ci q = __lookup_mnt(&child->mnt_parent->mnt, 237062306a36Sopenharmony_ci child->mnt_mountpoint); 237162306a36Sopenharmony_ci if (q) 237262306a36Sopenharmony_ci mnt_change_mountpoint(child, smp, q); 237362306a36Sopenharmony_ci /* Notice when we are propagating across user namespaces */ 237462306a36Sopenharmony_ci if (child->mnt_parent->mnt_ns->user_ns != user_ns) 237562306a36Sopenharmony_ci lock_mnt_tree(child); 237662306a36Sopenharmony_ci child->mnt.mnt_flags &= ~MNT_LOCKED; 237762306a36Sopenharmony_ci commit_tree(child); 237862306a36Sopenharmony_ci } 237962306a36Sopenharmony_ci put_mountpoint(smp); 238062306a36Sopenharmony_ci unlock_mount_hash(); 238162306a36Sopenharmony_ci 238262306a36Sopenharmony_ci return 0; 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci out_cleanup_ids: 238562306a36Sopenharmony_ci while (!hlist_empty(&tree_list)) { 238662306a36Sopenharmony_ci child = hlist_entry(tree_list.first, struct mount, mnt_hash); 238762306a36Sopenharmony_ci child->mnt_parent->mnt_ns->pending_mounts = 0; 238862306a36Sopenharmony_ci umount_tree(child, UMOUNT_SYNC); 238962306a36Sopenharmony_ci } 239062306a36Sopenharmony_ci unlock_mount_hash(); 239162306a36Sopenharmony_ci cleanup_group_ids(source_mnt, NULL); 239262306a36Sopenharmony_ci out: 239362306a36Sopenharmony_ci ns->pending_mounts = 0; 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci read_seqlock_excl(&mount_lock); 239662306a36Sopenharmony_ci put_mountpoint(smp); 239762306a36Sopenharmony_ci read_sequnlock_excl(&mount_lock); 239862306a36Sopenharmony_ci 239962306a36Sopenharmony_ci return err; 240062306a36Sopenharmony_ci} 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci/** 240362306a36Sopenharmony_ci * do_lock_mount - lock mount and mountpoint 240462306a36Sopenharmony_ci * @path: target path 240562306a36Sopenharmony_ci * @beneath: whether the intention is to mount beneath @path 240662306a36Sopenharmony_ci * 240762306a36Sopenharmony_ci * Follow the mount stack on @path until the top mount @mnt is found. If 240862306a36Sopenharmony_ci * the initial @path->{mnt,dentry} is a mountpoint lookup the first 240962306a36Sopenharmony_ci * mount stacked on top of it. Then simply follow @{mnt,mnt->mnt_root} 241062306a36Sopenharmony_ci * until nothing is stacked on top of it anymore. 241162306a36Sopenharmony_ci * 241262306a36Sopenharmony_ci * Acquire the inode_lock() on the top mount's ->mnt_root to protect 241362306a36Sopenharmony_ci * against concurrent removal of the new mountpoint from another mount 241462306a36Sopenharmony_ci * namespace. 241562306a36Sopenharmony_ci * 241662306a36Sopenharmony_ci * If @beneath is requested, acquire inode_lock() on @mnt's mountpoint 241762306a36Sopenharmony_ci * @mp on @mnt->mnt_parent must be acquired. This protects against a 241862306a36Sopenharmony_ci * concurrent unlink of @mp->mnt_dentry from another mount namespace 241962306a36Sopenharmony_ci * where @mnt doesn't have a child mount mounted @mp. A concurrent 242062306a36Sopenharmony_ci * removal of @mnt->mnt_root doesn't matter as nothing will be mounted 242162306a36Sopenharmony_ci * on top of it for @beneath. 242262306a36Sopenharmony_ci * 242362306a36Sopenharmony_ci * In addition, @beneath needs to make sure that @mnt hasn't been 242462306a36Sopenharmony_ci * unmounted or moved from its current mountpoint in between dropping 242562306a36Sopenharmony_ci * @mount_lock and acquiring @namespace_sem. For the !@beneath case @mnt 242662306a36Sopenharmony_ci * being unmounted would be detected later by e.g., calling 242762306a36Sopenharmony_ci * check_mnt(mnt) in the function it's called from. For the @beneath 242862306a36Sopenharmony_ci * case however, it's useful to detect it directly in do_lock_mount(). 242962306a36Sopenharmony_ci * If @mnt hasn't been unmounted then @mnt->mnt_mountpoint still points 243062306a36Sopenharmony_ci * to @mnt->mnt_mp->m_dentry. But if @mnt has been unmounted it will 243162306a36Sopenharmony_ci * point to @mnt->mnt_root and @mnt->mnt_mp will be NULL. 243262306a36Sopenharmony_ci * 243362306a36Sopenharmony_ci * Return: Either the target mountpoint on the top mount or the top 243462306a36Sopenharmony_ci * mount's mountpoint. 243562306a36Sopenharmony_ci */ 243662306a36Sopenharmony_cistatic struct mountpoint *do_lock_mount(struct path *path, bool beneath) 243762306a36Sopenharmony_ci{ 243862306a36Sopenharmony_ci struct vfsmount *mnt = path->mnt; 243962306a36Sopenharmony_ci struct dentry *dentry; 244062306a36Sopenharmony_ci struct mountpoint *mp = ERR_PTR(-ENOENT); 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ci for (;;) { 244362306a36Sopenharmony_ci struct mount *m; 244462306a36Sopenharmony_ci 244562306a36Sopenharmony_ci if (beneath) { 244662306a36Sopenharmony_ci m = real_mount(mnt); 244762306a36Sopenharmony_ci read_seqlock_excl(&mount_lock); 244862306a36Sopenharmony_ci dentry = dget(m->mnt_mountpoint); 244962306a36Sopenharmony_ci read_sequnlock_excl(&mount_lock); 245062306a36Sopenharmony_ci } else { 245162306a36Sopenharmony_ci dentry = path->dentry; 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci inode_lock(dentry->d_inode); 245562306a36Sopenharmony_ci if (unlikely(cant_mount(dentry))) { 245662306a36Sopenharmony_ci inode_unlock(dentry->d_inode); 245762306a36Sopenharmony_ci goto out; 245862306a36Sopenharmony_ci } 245962306a36Sopenharmony_ci 246062306a36Sopenharmony_ci namespace_lock(); 246162306a36Sopenharmony_ci 246262306a36Sopenharmony_ci if (beneath && (!is_mounted(mnt) || m->mnt_mountpoint != dentry)) { 246362306a36Sopenharmony_ci namespace_unlock(); 246462306a36Sopenharmony_ci inode_unlock(dentry->d_inode); 246562306a36Sopenharmony_ci goto out; 246662306a36Sopenharmony_ci } 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci mnt = lookup_mnt(path); 246962306a36Sopenharmony_ci if (likely(!mnt)) 247062306a36Sopenharmony_ci break; 247162306a36Sopenharmony_ci 247262306a36Sopenharmony_ci namespace_unlock(); 247362306a36Sopenharmony_ci inode_unlock(dentry->d_inode); 247462306a36Sopenharmony_ci if (beneath) 247562306a36Sopenharmony_ci dput(dentry); 247662306a36Sopenharmony_ci path_put(path); 247762306a36Sopenharmony_ci path->mnt = mnt; 247862306a36Sopenharmony_ci path->dentry = dget(mnt->mnt_root); 247962306a36Sopenharmony_ci } 248062306a36Sopenharmony_ci 248162306a36Sopenharmony_ci mp = get_mountpoint(dentry); 248262306a36Sopenharmony_ci if (IS_ERR(mp)) { 248362306a36Sopenharmony_ci namespace_unlock(); 248462306a36Sopenharmony_ci inode_unlock(dentry->d_inode); 248562306a36Sopenharmony_ci } 248662306a36Sopenharmony_ci 248762306a36Sopenharmony_ciout: 248862306a36Sopenharmony_ci if (beneath) 248962306a36Sopenharmony_ci dput(dentry); 249062306a36Sopenharmony_ci 249162306a36Sopenharmony_ci return mp; 249262306a36Sopenharmony_ci} 249362306a36Sopenharmony_ci 249462306a36Sopenharmony_cistatic inline struct mountpoint *lock_mount(struct path *path) 249562306a36Sopenharmony_ci{ 249662306a36Sopenharmony_ci return do_lock_mount(path, false); 249762306a36Sopenharmony_ci} 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_cistatic void unlock_mount(struct mountpoint *where) 250062306a36Sopenharmony_ci{ 250162306a36Sopenharmony_ci struct dentry *dentry = where->m_dentry; 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ci read_seqlock_excl(&mount_lock); 250462306a36Sopenharmony_ci put_mountpoint(where); 250562306a36Sopenharmony_ci read_sequnlock_excl(&mount_lock); 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_ci namespace_unlock(); 250862306a36Sopenharmony_ci inode_unlock(dentry->d_inode); 250962306a36Sopenharmony_ci} 251062306a36Sopenharmony_ci 251162306a36Sopenharmony_cistatic int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) 251262306a36Sopenharmony_ci{ 251362306a36Sopenharmony_ci if (mnt->mnt.mnt_sb->s_flags & SB_NOUSER) 251462306a36Sopenharmony_ci return -EINVAL; 251562306a36Sopenharmony_ci 251662306a36Sopenharmony_ci if (d_is_dir(mp->m_dentry) != 251762306a36Sopenharmony_ci d_is_dir(mnt->mnt.mnt_root)) 251862306a36Sopenharmony_ci return -ENOTDIR; 251962306a36Sopenharmony_ci 252062306a36Sopenharmony_ci return attach_recursive_mnt(mnt, p, mp, 0); 252162306a36Sopenharmony_ci} 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci/* 252462306a36Sopenharmony_ci * Sanity check the flags to change_mnt_propagation. 252562306a36Sopenharmony_ci */ 252662306a36Sopenharmony_ci 252762306a36Sopenharmony_cistatic int flags_to_propagation_type(int ms_flags) 252862306a36Sopenharmony_ci{ 252962306a36Sopenharmony_ci int type = ms_flags & ~(MS_REC | MS_SILENT); 253062306a36Sopenharmony_ci 253162306a36Sopenharmony_ci /* Fail if any non-propagation flags are set */ 253262306a36Sopenharmony_ci if (type & ~(MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 253362306a36Sopenharmony_ci return 0; 253462306a36Sopenharmony_ci /* Only one propagation flag should be set */ 253562306a36Sopenharmony_ci if (!is_power_of_2(type)) 253662306a36Sopenharmony_ci return 0; 253762306a36Sopenharmony_ci return type; 253862306a36Sopenharmony_ci} 253962306a36Sopenharmony_ci 254062306a36Sopenharmony_ci/* 254162306a36Sopenharmony_ci * recursively change the type of the mountpoint. 254262306a36Sopenharmony_ci */ 254362306a36Sopenharmony_cistatic int do_change_type(struct path *path, int ms_flags) 254462306a36Sopenharmony_ci{ 254562306a36Sopenharmony_ci struct mount *m; 254662306a36Sopenharmony_ci struct mount *mnt = real_mount(path->mnt); 254762306a36Sopenharmony_ci int recurse = ms_flags & MS_REC; 254862306a36Sopenharmony_ci int type; 254962306a36Sopenharmony_ci int err = 0; 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci if (!path_mounted(path)) 255262306a36Sopenharmony_ci return -EINVAL; 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_ci type = flags_to_propagation_type(ms_flags); 255562306a36Sopenharmony_ci if (!type) 255662306a36Sopenharmony_ci return -EINVAL; 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci namespace_lock(); 255962306a36Sopenharmony_ci if (type == MS_SHARED) { 256062306a36Sopenharmony_ci err = invent_group_ids(mnt, recurse); 256162306a36Sopenharmony_ci if (err) 256262306a36Sopenharmony_ci goto out_unlock; 256362306a36Sopenharmony_ci } 256462306a36Sopenharmony_ci 256562306a36Sopenharmony_ci lock_mount_hash(); 256662306a36Sopenharmony_ci for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) 256762306a36Sopenharmony_ci change_mnt_propagation(m, type); 256862306a36Sopenharmony_ci unlock_mount_hash(); 256962306a36Sopenharmony_ci 257062306a36Sopenharmony_ci out_unlock: 257162306a36Sopenharmony_ci namespace_unlock(); 257262306a36Sopenharmony_ci return err; 257362306a36Sopenharmony_ci} 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_cistatic struct mount *__do_loopback(struct path *old_path, int recurse) 257662306a36Sopenharmony_ci{ 257762306a36Sopenharmony_ci struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt); 257862306a36Sopenharmony_ci 257962306a36Sopenharmony_ci if (IS_MNT_UNBINDABLE(old)) 258062306a36Sopenharmony_ci return mnt; 258162306a36Sopenharmony_ci 258262306a36Sopenharmony_ci if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations) 258362306a36Sopenharmony_ci return mnt; 258462306a36Sopenharmony_ci 258562306a36Sopenharmony_ci if (!recurse && has_locked_children(old, old_path->dentry)) 258662306a36Sopenharmony_ci return mnt; 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci if (recurse) 258962306a36Sopenharmony_ci mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE); 259062306a36Sopenharmony_ci else 259162306a36Sopenharmony_ci mnt = clone_mnt(old, old_path->dentry, 0); 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci if (!IS_ERR(mnt)) 259462306a36Sopenharmony_ci mnt->mnt.mnt_flags &= ~MNT_LOCKED; 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci return mnt; 259762306a36Sopenharmony_ci} 259862306a36Sopenharmony_ci 259962306a36Sopenharmony_ci/* 260062306a36Sopenharmony_ci * do loopback mount. 260162306a36Sopenharmony_ci */ 260262306a36Sopenharmony_cistatic int do_loopback(struct path *path, const char *old_name, 260362306a36Sopenharmony_ci int recurse) 260462306a36Sopenharmony_ci{ 260562306a36Sopenharmony_ci struct path old_path; 260662306a36Sopenharmony_ci struct mount *mnt = NULL, *parent; 260762306a36Sopenharmony_ci struct mountpoint *mp; 260862306a36Sopenharmony_ci int err; 260962306a36Sopenharmony_ci if (!old_name || !*old_name) 261062306a36Sopenharmony_ci return -EINVAL; 261162306a36Sopenharmony_ci err = kern_path(old_name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &old_path); 261262306a36Sopenharmony_ci if (err) 261362306a36Sopenharmony_ci return err; 261462306a36Sopenharmony_ci 261562306a36Sopenharmony_ci err = -EINVAL; 261662306a36Sopenharmony_ci if (mnt_ns_loop(old_path.dentry)) 261762306a36Sopenharmony_ci goto out; 261862306a36Sopenharmony_ci 261962306a36Sopenharmony_ci mp = lock_mount(path); 262062306a36Sopenharmony_ci if (IS_ERR(mp)) { 262162306a36Sopenharmony_ci err = PTR_ERR(mp); 262262306a36Sopenharmony_ci goto out; 262362306a36Sopenharmony_ci } 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci parent = real_mount(path->mnt); 262662306a36Sopenharmony_ci if (!check_mnt(parent)) 262762306a36Sopenharmony_ci goto out2; 262862306a36Sopenharmony_ci 262962306a36Sopenharmony_ci mnt = __do_loopback(&old_path, recurse); 263062306a36Sopenharmony_ci if (IS_ERR(mnt)) { 263162306a36Sopenharmony_ci err = PTR_ERR(mnt); 263262306a36Sopenharmony_ci goto out2; 263362306a36Sopenharmony_ci } 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci err = graft_tree(mnt, parent, mp); 263662306a36Sopenharmony_ci if (err) { 263762306a36Sopenharmony_ci lock_mount_hash(); 263862306a36Sopenharmony_ci umount_tree(mnt, UMOUNT_SYNC); 263962306a36Sopenharmony_ci unlock_mount_hash(); 264062306a36Sopenharmony_ci } 264162306a36Sopenharmony_ciout2: 264262306a36Sopenharmony_ci unlock_mount(mp); 264362306a36Sopenharmony_ciout: 264462306a36Sopenharmony_ci path_put(&old_path); 264562306a36Sopenharmony_ci return err; 264662306a36Sopenharmony_ci} 264762306a36Sopenharmony_ci 264862306a36Sopenharmony_cistatic struct file *open_detached_copy(struct path *path, bool recursive) 264962306a36Sopenharmony_ci{ 265062306a36Sopenharmony_ci struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; 265162306a36Sopenharmony_ci struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true); 265262306a36Sopenharmony_ci struct mount *mnt, *p; 265362306a36Sopenharmony_ci struct file *file; 265462306a36Sopenharmony_ci 265562306a36Sopenharmony_ci if (IS_ERR(ns)) 265662306a36Sopenharmony_ci return ERR_CAST(ns); 265762306a36Sopenharmony_ci 265862306a36Sopenharmony_ci namespace_lock(); 265962306a36Sopenharmony_ci mnt = __do_loopback(path, recursive); 266062306a36Sopenharmony_ci if (IS_ERR(mnt)) { 266162306a36Sopenharmony_ci namespace_unlock(); 266262306a36Sopenharmony_ci free_mnt_ns(ns); 266362306a36Sopenharmony_ci return ERR_CAST(mnt); 266462306a36Sopenharmony_ci } 266562306a36Sopenharmony_ci 266662306a36Sopenharmony_ci lock_mount_hash(); 266762306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) { 266862306a36Sopenharmony_ci p->mnt_ns = ns; 266962306a36Sopenharmony_ci ns->mounts++; 267062306a36Sopenharmony_ci } 267162306a36Sopenharmony_ci ns->root = mnt; 267262306a36Sopenharmony_ci list_add_tail(&ns->list, &mnt->mnt_list); 267362306a36Sopenharmony_ci mntget(&mnt->mnt); 267462306a36Sopenharmony_ci unlock_mount_hash(); 267562306a36Sopenharmony_ci namespace_unlock(); 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci mntput(path->mnt); 267862306a36Sopenharmony_ci path->mnt = &mnt->mnt; 267962306a36Sopenharmony_ci file = dentry_open(path, O_PATH, current_cred()); 268062306a36Sopenharmony_ci if (IS_ERR(file)) 268162306a36Sopenharmony_ci dissolve_on_fput(path->mnt); 268262306a36Sopenharmony_ci else 268362306a36Sopenharmony_ci file->f_mode |= FMODE_NEED_UNMOUNT; 268462306a36Sopenharmony_ci return file; 268562306a36Sopenharmony_ci} 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_ciSYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) 268862306a36Sopenharmony_ci{ 268962306a36Sopenharmony_ci struct file *file; 269062306a36Sopenharmony_ci struct path path; 269162306a36Sopenharmony_ci int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; 269262306a36Sopenharmony_ci bool detached = flags & OPEN_TREE_CLONE; 269362306a36Sopenharmony_ci int error; 269462306a36Sopenharmony_ci int fd; 269562306a36Sopenharmony_ci 269662306a36Sopenharmony_ci BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); 269762306a36Sopenharmony_ci 269862306a36Sopenharmony_ci if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | 269962306a36Sopenharmony_ci AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | 270062306a36Sopenharmony_ci OPEN_TREE_CLOEXEC)) 270162306a36Sopenharmony_ci return -EINVAL; 270262306a36Sopenharmony_ci 270362306a36Sopenharmony_ci if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) 270462306a36Sopenharmony_ci return -EINVAL; 270562306a36Sopenharmony_ci 270662306a36Sopenharmony_ci if (flags & AT_NO_AUTOMOUNT) 270762306a36Sopenharmony_ci lookup_flags &= ~LOOKUP_AUTOMOUNT; 270862306a36Sopenharmony_ci if (flags & AT_SYMLINK_NOFOLLOW) 270962306a36Sopenharmony_ci lookup_flags &= ~LOOKUP_FOLLOW; 271062306a36Sopenharmony_ci if (flags & AT_EMPTY_PATH) 271162306a36Sopenharmony_ci lookup_flags |= LOOKUP_EMPTY; 271262306a36Sopenharmony_ci 271362306a36Sopenharmony_ci if (detached && !may_mount()) 271462306a36Sopenharmony_ci return -EPERM; 271562306a36Sopenharmony_ci 271662306a36Sopenharmony_ci fd = get_unused_fd_flags(flags & O_CLOEXEC); 271762306a36Sopenharmony_ci if (fd < 0) 271862306a36Sopenharmony_ci return fd; 271962306a36Sopenharmony_ci 272062306a36Sopenharmony_ci error = user_path_at(dfd, filename, lookup_flags, &path); 272162306a36Sopenharmony_ci if (unlikely(error)) { 272262306a36Sopenharmony_ci file = ERR_PTR(error); 272362306a36Sopenharmony_ci } else { 272462306a36Sopenharmony_ci if (detached) 272562306a36Sopenharmony_ci file = open_detached_copy(&path, flags & AT_RECURSIVE); 272662306a36Sopenharmony_ci else 272762306a36Sopenharmony_ci file = dentry_open(&path, O_PATH, current_cred()); 272862306a36Sopenharmony_ci path_put(&path); 272962306a36Sopenharmony_ci } 273062306a36Sopenharmony_ci if (IS_ERR(file)) { 273162306a36Sopenharmony_ci put_unused_fd(fd); 273262306a36Sopenharmony_ci return PTR_ERR(file); 273362306a36Sopenharmony_ci } 273462306a36Sopenharmony_ci fd_install(fd, file); 273562306a36Sopenharmony_ci return fd; 273662306a36Sopenharmony_ci} 273762306a36Sopenharmony_ci 273862306a36Sopenharmony_ci/* 273962306a36Sopenharmony_ci * Don't allow locked mount flags to be cleared. 274062306a36Sopenharmony_ci * 274162306a36Sopenharmony_ci * No locks need to be held here while testing the various MNT_LOCK 274262306a36Sopenharmony_ci * flags because those flags can never be cleared once they are set. 274362306a36Sopenharmony_ci */ 274462306a36Sopenharmony_cistatic bool can_change_locked_flags(struct mount *mnt, unsigned int mnt_flags) 274562306a36Sopenharmony_ci{ 274662306a36Sopenharmony_ci unsigned int fl = mnt->mnt.mnt_flags; 274762306a36Sopenharmony_ci 274862306a36Sopenharmony_ci if ((fl & MNT_LOCK_READONLY) && 274962306a36Sopenharmony_ci !(mnt_flags & MNT_READONLY)) 275062306a36Sopenharmony_ci return false; 275162306a36Sopenharmony_ci 275262306a36Sopenharmony_ci if ((fl & MNT_LOCK_NODEV) && 275362306a36Sopenharmony_ci !(mnt_flags & MNT_NODEV)) 275462306a36Sopenharmony_ci return false; 275562306a36Sopenharmony_ci 275662306a36Sopenharmony_ci if ((fl & MNT_LOCK_NOSUID) && 275762306a36Sopenharmony_ci !(mnt_flags & MNT_NOSUID)) 275862306a36Sopenharmony_ci return false; 275962306a36Sopenharmony_ci 276062306a36Sopenharmony_ci if ((fl & MNT_LOCK_NOEXEC) && 276162306a36Sopenharmony_ci !(mnt_flags & MNT_NOEXEC)) 276262306a36Sopenharmony_ci return false; 276362306a36Sopenharmony_ci 276462306a36Sopenharmony_ci if ((fl & MNT_LOCK_ATIME) && 276562306a36Sopenharmony_ci ((fl & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) 276662306a36Sopenharmony_ci return false; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci return true; 276962306a36Sopenharmony_ci} 277062306a36Sopenharmony_ci 277162306a36Sopenharmony_cistatic int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags) 277262306a36Sopenharmony_ci{ 277362306a36Sopenharmony_ci bool readonly_request = (mnt_flags & MNT_READONLY); 277462306a36Sopenharmony_ci 277562306a36Sopenharmony_ci if (readonly_request == __mnt_is_readonly(&mnt->mnt)) 277662306a36Sopenharmony_ci return 0; 277762306a36Sopenharmony_ci 277862306a36Sopenharmony_ci if (readonly_request) 277962306a36Sopenharmony_ci return mnt_make_readonly(mnt); 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci mnt->mnt.mnt_flags &= ~MNT_READONLY; 278262306a36Sopenharmony_ci return 0; 278362306a36Sopenharmony_ci} 278462306a36Sopenharmony_ci 278562306a36Sopenharmony_cistatic void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags) 278662306a36Sopenharmony_ci{ 278762306a36Sopenharmony_ci mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK; 278862306a36Sopenharmony_ci mnt->mnt.mnt_flags = mnt_flags; 278962306a36Sopenharmony_ci touch_mnt_namespace(mnt->mnt_ns); 279062306a36Sopenharmony_ci} 279162306a36Sopenharmony_ci 279262306a36Sopenharmony_cistatic void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt) 279362306a36Sopenharmony_ci{ 279462306a36Sopenharmony_ci struct super_block *sb = mnt->mnt_sb; 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci if (!__mnt_is_readonly(mnt) && 279762306a36Sopenharmony_ci (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) && 279862306a36Sopenharmony_ci (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) { 279962306a36Sopenharmony_ci char *buf = (char *)__get_free_page(GFP_KERNEL); 280062306a36Sopenharmony_ci char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM); 280162306a36Sopenharmony_ci 280262306a36Sopenharmony_ci pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n", 280362306a36Sopenharmony_ci sb->s_type->name, 280462306a36Sopenharmony_ci is_mounted(mnt) ? "remounted" : "mounted", 280562306a36Sopenharmony_ci mntpath, &sb->s_time_max, 280662306a36Sopenharmony_ci (unsigned long long)sb->s_time_max); 280762306a36Sopenharmony_ci 280862306a36Sopenharmony_ci free_page((unsigned long)buf); 280962306a36Sopenharmony_ci sb->s_iflags |= SB_I_TS_EXPIRY_WARNED; 281062306a36Sopenharmony_ci } 281162306a36Sopenharmony_ci} 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_ci/* 281462306a36Sopenharmony_ci * Handle reconfiguration of the mountpoint only without alteration of the 281562306a36Sopenharmony_ci * superblock it refers to. This is triggered by specifying MS_REMOUNT|MS_BIND 281662306a36Sopenharmony_ci * to mount(2). 281762306a36Sopenharmony_ci */ 281862306a36Sopenharmony_cistatic int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags) 281962306a36Sopenharmony_ci{ 282062306a36Sopenharmony_ci struct super_block *sb = path->mnt->mnt_sb; 282162306a36Sopenharmony_ci struct mount *mnt = real_mount(path->mnt); 282262306a36Sopenharmony_ci int ret; 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci if (!check_mnt(mnt)) 282562306a36Sopenharmony_ci return -EINVAL; 282662306a36Sopenharmony_ci 282762306a36Sopenharmony_ci if (!path_mounted(path)) 282862306a36Sopenharmony_ci return -EINVAL; 282962306a36Sopenharmony_ci 283062306a36Sopenharmony_ci if (!can_change_locked_flags(mnt, mnt_flags)) 283162306a36Sopenharmony_ci return -EPERM; 283262306a36Sopenharmony_ci 283362306a36Sopenharmony_ci /* 283462306a36Sopenharmony_ci * We're only checking whether the superblock is read-only not 283562306a36Sopenharmony_ci * changing it, so only take down_read(&sb->s_umount). 283662306a36Sopenharmony_ci */ 283762306a36Sopenharmony_ci down_read(&sb->s_umount); 283862306a36Sopenharmony_ci lock_mount_hash(); 283962306a36Sopenharmony_ci ret = change_mount_ro_state(mnt, mnt_flags); 284062306a36Sopenharmony_ci if (ret == 0) 284162306a36Sopenharmony_ci set_mount_attributes(mnt, mnt_flags); 284262306a36Sopenharmony_ci unlock_mount_hash(); 284362306a36Sopenharmony_ci up_read(&sb->s_umount); 284462306a36Sopenharmony_ci 284562306a36Sopenharmony_ci mnt_warn_timestamp_expiry(path, &mnt->mnt); 284662306a36Sopenharmony_ci 284762306a36Sopenharmony_ci return ret; 284862306a36Sopenharmony_ci} 284962306a36Sopenharmony_ci 285062306a36Sopenharmony_ci/* 285162306a36Sopenharmony_ci * change filesystem flags. dir should be a physical root of filesystem. 285262306a36Sopenharmony_ci * If you've mounted a non-root directory somewhere and want to do remount 285362306a36Sopenharmony_ci * on it - tough luck. 285462306a36Sopenharmony_ci */ 285562306a36Sopenharmony_cistatic int do_remount(struct path *path, int ms_flags, int sb_flags, 285662306a36Sopenharmony_ci int mnt_flags, void *data) 285762306a36Sopenharmony_ci{ 285862306a36Sopenharmony_ci int err; 285962306a36Sopenharmony_ci struct super_block *sb = path->mnt->mnt_sb; 286062306a36Sopenharmony_ci struct mount *mnt = real_mount(path->mnt); 286162306a36Sopenharmony_ci struct fs_context *fc; 286262306a36Sopenharmony_ci 286362306a36Sopenharmony_ci if (!check_mnt(mnt)) 286462306a36Sopenharmony_ci return -EINVAL; 286562306a36Sopenharmony_ci 286662306a36Sopenharmony_ci if (!path_mounted(path)) 286762306a36Sopenharmony_ci return -EINVAL; 286862306a36Sopenharmony_ci 286962306a36Sopenharmony_ci if (!can_change_locked_flags(mnt, mnt_flags)) 287062306a36Sopenharmony_ci return -EPERM; 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci fc = fs_context_for_reconfigure(path->dentry, sb_flags, MS_RMT_MASK); 287362306a36Sopenharmony_ci if (IS_ERR(fc)) 287462306a36Sopenharmony_ci return PTR_ERR(fc); 287562306a36Sopenharmony_ci 287662306a36Sopenharmony_ci /* 287762306a36Sopenharmony_ci * Indicate to the filesystem that the remount request is coming 287862306a36Sopenharmony_ci * from the legacy mount system call. 287962306a36Sopenharmony_ci */ 288062306a36Sopenharmony_ci fc->oldapi = true; 288162306a36Sopenharmony_ci 288262306a36Sopenharmony_ci err = parse_monolithic_mount_data(fc, data); 288362306a36Sopenharmony_ci if (!err) { 288462306a36Sopenharmony_ci down_write(&sb->s_umount); 288562306a36Sopenharmony_ci err = -EPERM; 288662306a36Sopenharmony_ci if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) { 288762306a36Sopenharmony_ci err = reconfigure_super(fc); 288862306a36Sopenharmony_ci if (!err) { 288962306a36Sopenharmony_ci lock_mount_hash(); 289062306a36Sopenharmony_ci set_mount_attributes(mnt, mnt_flags); 289162306a36Sopenharmony_ci unlock_mount_hash(); 289262306a36Sopenharmony_ci } 289362306a36Sopenharmony_ci } 289462306a36Sopenharmony_ci up_write(&sb->s_umount); 289562306a36Sopenharmony_ci } 289662306a36Sopenharmony_ci 289762306a36Sopenharmony_ci mnt_warn_timestamp_expiry(path, &mnt->mnt); 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_ci put_fs_context(fc); 290062306a36Sopenharmony_ci return err; 290162306a36Sopenharmony_ci} 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_cistatic inline int tree_contains_unbindable(struct mount *mnt) 290462306a36Sopenharmony_ci{ 290562306a36Sopenharmony_ci struct mount *p; 290662306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) { 290762306a36Sopenharmony_ci if (IS_MNT_UNBINDABLE(p)) 290862306a36Sopenharmony_ci return 1; 290962306a36Sopenharmony_ci } 291062306a36Sopenharmony_ci return 0; 291162306a36Sopenharmony_ci} 291262306a36Sopenharmony_ci 291362306a36Sopenharmony_ci/* 291462306a36Sopenharmony_ci * Check that there aren't references to earlier/same mount namespaces in the 291562306a36Sopenharmony_ci * specified subtree. Such references can act as pins for mount namespaces 291662306a36Sopenharmony_ci * that aren't checked by the mount-cycle checking code, thereby allowing 291762306a36Sopenharmony_ci * cycles to be made. 291862306a36Sopenharmony_ci */ 291962306a36Sopenharmony_cistatic bool check_for_nsfs_mounts(struct mount *subtree) 292062306a36Sopenharmony_ci{ 292162306a36Sopenharmony_ci struct mount *p; 292262306a36Sopenharmony_ci bool ret = false; 292362306a36Sopenharmony_ci 292462306a36Sopenharmony_ci lock_mount_hash(); 292562306a36Sopenharmony_ci for (p = subtree; p; p = next_mnt(p, subtree)) 292662306a36Sopenharmony_ci if (mnt_ns_loop(p->mnt.mnt_root)) 292762306a36Sopenharmony_ci goto out; 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci ret = true; 293062306a36Sopenharmony_ciout: 293162306a36Sopenharmony_ci unlock_mount_hash(); 293262306a36Sopenharmony_ci return ret; 293362306a36Sopenharmony_ci} 293462306a36Sopenharmony_ci 293562306a36Sopenharmony_cistatic int do_set_group(struct path *from_path, struct path *to_path) 293662306a36Sopenharmony_ci{ 293762306a36Sopenharmony_ci struct mount *from, *to; 293862306a36Sopenharmony_ci int err; 293962306a36Sopenharmony_ci 294062306a36Sopenharmony_ci from = real_mount(from_path->mnt); 294162306a36Sopenharmony_ci to = real_mount(to_path->mnt); 294262306a36Sopenharmony_ci 294362306a36Sopenharmony_ci namespace_lock(); 294462306a36Sopenharmony_ci 294562306a36Sopenharmony_ci err = -EINVAL; 294662306a36Sopenharmony_ci /* To and From must be mounted */ 294762306a36Sopenharmony_ci if (!is_mounted(&from->mnt)) 294862306a36Sopenharmony_ci goto out; 294962306a36Sopenharmony_ci if (!is_mounted(&to->mnt)) 295062306a36Sopenharmony_ci goto out; 295162306a36Sopenharmony_ci 295262306a36Sopenharmony_ci err = -EPERM; 295362306a36Sopenharmony_ci /* We should be allowed to modify mount namespaces of both mounts */ 295462306a36Sopenharmony_ci if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN)) 295562306a36Sopenharmony_ci goto out; 295662306a36Sopenharmony_ci if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN)) 295762306a36Sopenharmony_ci goto out; 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_ci err = -EINVAL; 296062306a36Sopenharmony_ci /* To and From paths should be mount roots */ 296162306a36Sopenharmony_ci if (!path_mounted(from_path)) 296262306a36Sopenharmony_ci goto out; 296362306a36Sopenharmony_ci if (!path_mounted(to_path)) 296462306a36Sopenharmony_ci goto out; 296562306a36Sopenharmony_ci 296662306a36Sopenharmony_ci /* Setting sharing groups is only allowed across same superblock */ 296762306a36Sopenharmony_ci if (from->mnt.mnt_sb != to->mnt.mnt_sb) 296862306a36Sopenharmony_ci goto out; 296962306a36Sopenharmony_ci 297062306a36Sopenharmony_ci /* From mount root should be wider than To mount root */ 297162306a36Sopenharmony_ci if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root)) 297262306a36Sopenharmony_ci goto out; 297362306a36Sopenharmony_ci 297462306a36Sopenharmony_ci /* From mount should not have locked children in place of To's root */ 297562306a36Sopenharmony_ci if (has_locked_children(from, to->mnt.mnt_root)) 297662306a36Sopenharmony_ci goto out; 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci /* Setting sharing groups is only allowed on private mounts */ 297962306a36Sopenharmony_ci if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to)) 298062306a36Sopenharmony_ci goto out; 298162306a36Sopenharmony_ci 298262306a36Sopenharmony_ci /* From should not be private */ 298362306a36Sopenharmony_ci if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from)) 298462306a36Sopenharmony_ci goto out; 298562306a36Sopenharmony_ci 298662306a36Sopenharmony_ci if (IS_MNT_SLAVE(from)) { 298762306a36Sopenharmony_ci struct mount *m = from->mnt_master; 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_ci list_add(&to->mnt_slave, &m->mnt_slave_list); 299062306a36Sopenharmony_ci to->mnt_master = m; 299162306a36Sopenharmony_ci } 299262306a36Sopenharmony_ci 299362306a36Sopenharmony_ci if (IS_MNT_SHARED(from)) { 299462306a36Sopenharmony_ci to->mnt_group_id = from->mnt_group_id; 299562306a36Sopenharmony_ci list_add(&to->mnt_share, &from->mnt_share); 299662306a36Sopenharmony_ci lock_mount_hash(); 299762306a36Sopenharmony_ci set_mnt_shared(to); 299862306a36Sopenharmony_ci unlock_mount_hash(); 299962306a36Sopenharmony_ci } 300062306a36Sopenharmony_ci 300162306a36Sopenharmony_ci err = 0; 300262306a36Sopenharmony_ciout: 300362306a36Sopenharmony_ci namespace_unlock(); 300462306a36Sopenharmony_ci return err; 300562306a36Sopenharmony_ci} 300662306a36Sopenharmony_ci 300762306a36Sopenharmony_ci/** 300862306a36Sopenharmony_ci * path_overmounted - check if path is overmounted 300962306a36Sopenharmony_ci * @path: path to check 301062306a36Sopenharmony_ci * 301162306a36Sopenharmony_ci * Check if path is overmounted, i.e., if there's a mount on top of 301262306a36Sopenharmony_ci * @path->mnt with @path->dentry as mountpoint. 301362306a36Sopenharmony_ci * 301462306a36Sopenharmony_ci * Context: This function expects namespace_lock() to be held. 301562306a36Sopenharmony_ci * Return: If path is overmounted true is returned, false if not. 301662306a36Sopenharmony_ci */ 301762306a36Sopenharmony_cistatic inline bool path_overmounted(const struct path *path) 301862306a36Sopenharmony_ci{ 301962306a36Sopenharmony_ci rcu_read_lock(); 302062306a36Sopenharmony_ci if (unlikely(__lookup_mnt(path->mnt, path->dentry))) { 302162306a36Sopenharmony_ci rcu_read_unlock(); 302262306a36Sopenharmony_ci return true; 302362306a36Sopenharmony_ci } 302462306a36Sopenharmony_ci rcu_read_unlock(); 302562306a36Sopenharmony_ci return false; 302662306a36Sopenharmony_ci} 302762306a36Sopenharmony_ci 302862306a36Sopenharmony_ci/** 302962306a36Sopenharmony_ci * can_move_mount_beneath - check that we can mount beneath the top mount 303062306a36Sopenharmony_ci * @from: mount to mount beneath 303162306a36Sopenharmony_ci * @to: mount under which to mount 303262306a36Sopenharmony_ci * 303362306a36Sopenharmony_ci * - Make sure that @to->dentry is actually the root of a mount under 303462306a36Sopenharmony_ci * which we can mount another mount. 303562306a36Sopenharmony_ci * - Make sure that nothing can be mounted beneath the caller's current 303662306a36Sopenharmony_ci * root or the rootfs of the namespace. 303762306a36Sopenharmony_ci * - Make sure that the caller can unmount the topmost mount ensuring 303862306a36Sopenharmony_ci * that the caller could reveal the underlying mountpoint. 303962306a36Sopenharmony_ci * - Ensure that nothing has been mounted on top of @from before we 304062306a36Sopenharmony_ci * grabbed @namespace_sem to avoid creating pointless shadow mounts. 304162306a36Sopenharmony_ci * - Prevent mounting beneath a mount if the propagation relationship 304262306a36Sopenharmony_ci * between the source mount, parent mount, and top mount would lead to 304362306a36Sopenharmony_ci * nonsensical mount trees. 304462306a36Sopenharmony_ci * 304562306a36Sopenharmony_ci * Context: This function expects namespace_lock() to be held. 304662306a36Sopenharmony_ci * Return: On success 0, and on error a negative error code is returned. 304762306a36Sopenharmony_ci */ 304862306a36Sopenharmony_cistatic int can_move_mount_beneath(const struct path *from, 304962306a36Sopenharmony_ci const struct path *to, 305062306a36Sopenharmony_ci const struct mountpoint *mp) 305162306a36Sopenharmony_ci{ 305262306a36Sopenharmony_ci struct mount *mnt_from = real_mount(from->mnt), 305362306a36Sopenharmony_ci *mnt_to = real_mount(to->mnt), 305462306a36Sopenharmony_ci *parent_mnt_to = mnt_to->mnt_parent; 305562306a36Sopenharmony_ci 305662306a36Sopenharmony_ci if (!mnt_has_parent(mnt_to)) 305762306a36Sopenharmony_ci return -EINVAL; 305862306a36Sopenharmony_ci 305962306a36Sopenharmony_ci if (!path_mounted(to)) 306062306a36Sopenharmony_ci return -EINVAL; 306162306a36Sopenharmony_ci 306262306a36Sopenharmony_ci if (IS_MNT_LOCKED(mnt_to)) 306362306a36Sopenharmony_ci return -EINVAL; 306462306a36Sopenharmony_ci 306562306a36Sopenharmony_ci /* Avoid creating shadow mounts during mount propagation. */ 306662306a36Sopenharmony_ci if (path_overmounted(from)) 306762306a36Sopenharmony_ci return -EINVAL; 306862306a36Sopenharmony_ci 306962306a36Sopenharmony_ci /* 307062306a36Sopenharmony_ci * Mounting beneath the rootfs only makes sense when the 307162306a36Sopenharmony_ci * semantics of pivot_root(".", ".") are used. 307262306a36Sopenharmony_ci */ 307362306a36Sopenharmony_ci if (&mnt_to->mnt == current->fs->root.mnt) 307462306a36Sopenharmony_ci return -EINVAL; 307562306a36Sopenharmony_ci if (parent_mnt_to == current->nsproxy->mnt_ns->root) 307662306a36Sopenharmony_ci return -EINVAL; 307762306a36Sopenharmony_ci 307862306a36Sopenharmony_ci for (struct mount *p = mnt_from; mnt_has_parent(p); p = p->mnt_parent) 307962306a36Sopenharmony_ci if (p == mnt_to) 308062306a36Sopenharmony_ci return -EINVAL; 308162306a36Sopenharmony_ci 308262306a36Sopenharmony_ci /* 308362306a36Sopenharmony_ci * If the parent mount propagates to the child mount this would 308462306a36Sopenharmony_ci * mean mounting @mnt_from on @mnt_to->mnt_parent and then 308562306a36Sopenharmony_ci * propagating a copy @c of @mnt_from on top of @mnt_to. This 308662306a36Sopenharmony_ci * defeats the whole purpose of mounting beneath another mount. 308762306a36Sopenharmony_ci */ 308862306a36Sopenharmony_ci if (propagation_would_overmount(parent_mnt_to, mnt_to, mp)) 308962306a36Sopenharmony_ci return -EINVAL; 309062306a36Sopenharmony_ci 309162306a36Sopenharmony_ci /* 309262306a36Sopenharmony_ci * If @mnt_to->mnt_parent propagates to @mnt_from this would 309362306a36Sopenharmony_ci * mean propagating a copy @c of @mnt_from on top of @mnt_from. 309462306a36Sopenharmony_ci * Afterwards @mnt_from would be mounted on top of 309562306a36Sopenharmony_ci * @mnt_to->mnt_parent and @mnt_to would be unmounted from 309662306a36Sopenharmony_ci * @mnt->mnt_parent and remounted on @mnt_from. But since @c is 309762306a36Sopenharmony_ci * already mounted on @mnt_from, @mnt_to would ultimately be 309862306a36Sopenharmony_ci * remounted on top of @c. Afterwards, @mnt_from would be 309962306a36Sopenharmony_ci * covered by a copy @c of @mnt_from and @c would be covered by 310062306a36Sopenharmony_ci * @mnt_from itself. This defeats the whole purpose of mounting 310162306a36Sopenharmony_ci * @mnt_from beneath @mnt_to. 310262306a36Sopenharmony_ci */ 310362306a36Sopenharmony_ci if (propagation_would_overmount(parent_mnt_to, mnt_from, mp)) 310462306a36Sopenharmony_ci return -EINVAL; 310562306a36Sopenharmony_ci 310662306a36Sopenharmony_ci return 0; 310762306a36Sopenharmony_ci} 310862306a36Sopenharmony_ci 310962306a36Sopenharmony_cistatic int do_move_mount(struct path *old_path, struct path *new_path, 311062306a36Sopenharmony_ci bool beneath) 311162306a36Sopenharmony_ci{ 311262306a36Sopenharmony_ci struct mnt_namespace *ns; 311362306a36Sopenharmony_ci struct mount *p; 311462306a36Sopenharmony_ci struct mount *old; 311562306a36Sopenharmony_ci struct mount *parent; 311662306a36Sopenharmony_ci struct mountpoint *mp, *old_mp; 311762306a36Sopenharmony_ci int err; 311862306a36Sopenharmony_ci bool attached; 311962306a36Sopenharmony_ci enum mnt_tree_flags_t flags = 0; 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ci mp = do_lock_mount(new_path, beneath); 312262306a36Sopenharmony_ci if (IS_ERR(mp)) 312362306a36Sopenharmony_ci return PTR_ERR(mp); 312462306a36Sopenharmony_ci 312562306a36Sopenharmony_ci old = real_mount(old_path->mnt); 312662306a36Sopenharmony_ci p = real_mount(new_path->mnt); 312762306a36Sopenharmony_ci parent = old->mnt_parent; 312862306a36Sopenharmony_ci attached = mnt_has_parent(old); 312962306a36Sopenharmony_ci if (attached) 313062306a36Sopenharmony_ci flags |= MNT_TREE_MOVE; 313162306a36Sopenharmony_ci old_mp = old->mnt_mp; 313262306a36Sopenharmony_ci ns = old->mnt_ns; 313362306a36Sopenharmony_ci 313462306a36Sopenharmony_ci err = -EINVAL; 313562306a36Sopenharmony_ci /* The mountpoint must be in our namespace. */ 313662306a36Sopenharmony_ci if (!check_mnt(p)) 313762306a36Sopenharmony_ci goto out; 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_ci /* The thing moved must be mounted... */ 314062306a36Sopenharmony_ci if (!is_mounted(&old->mnt)) 314162306a36Sopenharmony_ci goto out; 314262306a36Sopenharmony_ci 314362306a36Sopenharmony_ci /* ... and either ours or the root of anon namespace */ 314462306a36Sopenharmony_ci if (!(attached ? check_mnt(old) : is_anon_ns(ns))) 314562306a36Sopenharmony_ci goto out; 314662306a36Sopenharmony_ci 314762306a36Sopenharmony_ci if (old->mnt.mnt_flags & MNT_LOCKED) 314862306a36Sopenharmony_ci goto out; 314962306a36Sopenharmony_ci 315062306a36Sopenharmony_ci if (!path_mounted(old_path)) 315162306a36Sopenharmony_ci goto out; 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_ci if (d_is_dir(new_path->dentry) != 315462306a36Sopenharmony_ci d_is_dir(old_path->dentry)) 315562306a36Sopenharmony_ci goto out; 315662306a36Sopenharmony_ci /* 315762306a36Sopenharmony_ci * Don't move a mount residing in a shared parent. 315862306a36Sopenharmony_ci */ 315962306a36Sopenharmony_ci if (attached && IS_MNT_SHARED(parent)) 316062306a36Sopenharmony_ci goto out; 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_ci if (beneath) { 316362306a36Sopenharmony_ci err = can_move_mount_beneath(old_path, new_path, mp); 316462306a36Sopenharmony_ci if (err) 316562306a36Sopenharmony_ci goto out; 316662306a36Sopenharmony_ci 316762306a36Sopenharmony_ci err = -EINVAL; 316862306a36Sopenharmony_ci p = p->mnt_parent; 316962306a36Sopenharmony_ci flags |= MNT_TREE_BENEATH; 317062306a36Sopenharmony_ci } 317162306a36Sopenharmony_ci 317262306a36Sopenharmony_ci /* 317362306a36Sopenharmony_ci * Don't move a mount tree containing unbindable mounts to a destination 317462306a36Sopenharmony_ci * mount which is shared. 317562306a36Sopenharmony_ci */ 317662306a36Sopenharmony_ci if (IS_MNT_SHARED(p) && tree_contains_unbindable(old)) 317762306a36Sopenharmony_ci goto out; 317862306a36Sopenharmony_ci err = -ELOOP; 317962306a36Sopenharmony_ci if (!check_for_nsfs_mounts(old)) 318062306a36Sopenharmony_ci goto out; 318162306a36Sopenharmony_ci for (; mnt_has_parent(p); p = p->mnt_parent) 318262306a36Sopenharmony_ci if (p == old) 318362306a36Sopenharmony_ci goto out; 318462306a36Sopenharmony_ci 318562306a36Sopenharmony_ci err = attach_recursive_mnt(old, real_mount(new_path->mnt), mp, flags); 318662306a36Sopenharmony_ci if (err) 318762306a36Sopenharmony_ci goto out; 318862306a36Sopenharmony_ci 318962306a36Sopenharmony_ci /* if the mount is moved, it should no longer be expire 319062306a36Sopenharmony_ci * automatically */ 319162306a36Sopenharmony_ci list_del_init(&old->mnt_expire); 319262306a36Sopenharmony_ci if (attached) 319362306a36Sopenharmony_ci put_mountpoint(old_mp); 319462306a36Sopenharmony_ciout: 319562306a36Sopenharmony_ci unlock_mount(mp); 319662306a36Sopenharmony_ci if (!err) { 319762306a36Sopenharmony_ci if (attached) 319862306a36Sopenharmony_ci mntput_no_expire(parent); 319962306a36Sopenharmony_ci else 320062306a36Sopenharmony_ci free_mnt_ns(ns); 320162306a36Sopenharmony_ci } 320262306a36Sopenharmony_ci return err; 320362306a36Sopenharmony_ci} 320462306a36Sopenharmony_ci 320562306a36Sopenharmony_cistatic int do_move_mount_old(struct path *path, const char *old_name) 320662306a36Sopenharmony_ci{ 320762306a36Sopenharmony_ci struct path old_path; 320862306a36Sopenharmony_ci int err; 320962306a36Sopenharmony_ci 321062306a36Sopenharmony_ci if (!old_name || !*old_name) 321162306a36Sopenharmony_ci return -EINVAL; 321262306a36Sopenharmony_ci 321362306a36Sopenharmony_ci err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); 321462306a36Sopenharmony_ci if (err) 321562306a36Sopenharmony_ci return err; 321662306a36Sopenharmony_ci 321762306a36Sopenharmony_ci err = do_move_mount(&old_path, path, false); 321862306a36Sopenharmony_ci path_put(&old_path); 321962306a36Sopenharmony_ci return err; 322062306a36Sopenharmony_ci} 322162306a36Sopenharmony_ci 322262306a36Sopenharmony_ci/* 322362306a36Sopenharmony_ci * add a mount into a namespace's mount tree 322462306a36Sopenharmony_ci */ 322562306a36Sopenharmony_cistatic int do_add_mount(struct mount *newmnt, struct mountpoint *mp, 322662306a36Sopenharmony_ci const struct path *path, int mnt_flags) 322762306a36Sopenharmony_ci{ 322862306a36Sopenharmony_ci struct mount *parent = real_mount(path->mnt); 322962306a36Sopenharmony_ci 323062306a36Sopenharmony_ci mnt_flags &= ~MNT_INTERNAL_FLAGS; 323162306a36Sopenharmony_ci 323262306a36Sopenharmony_ci if (unlikely(!check_mnt(parent))) { 323362306a36Sopenharmony_ci /* that's acceptable only for automounts done in private ns */ 323462306a36Sopenharmony_ci if (!(mnt_flags & MNT_SHRINKABLE)) 323562306a36Sopenharmony_ci return -EINVAL; 323662306a36Sopenharmony_ci /* ... and for those we'd better have mountpoint still alive */ 323762306a36Sopenharmony_ci if (!parent->mnt_ns) 323862306a36Sopenharmony_ci return -EINVAL; 323962306a36Sopenharmony_ci } 324062306a36Sopenharmony_ci 324162306a36Sopenharmony_ci /* Refuse the same filesystem on the same mount point */ 324262306a36Sopenharmony_ci if (path->mnt->mnt_sb == newmnt->mnt.mnt_sb && path_mounted(path)) 324362306a36Sopenharmony_ci return -EBUSY; 324462306a36Sopenharmony_ci 324562306a36Sopenharmony_ci if (d_is_symlink(newmnt->mnt.mnt_root)) 324662306a36Sopenharmony_ci return -EINVAL; 324762306a36Sopenharmony_ci 324862306a36Sopenharmony_ci newmnt->mnt.mnt_flags = mnt_flags; 324962306a36Sopenharmony_ci return graft_tree(newmnt, parent, mp); 325062306a36Sopenharmony_ci} 325162306a36Sopenharmony_ci 325262306a36Sopenharmony_cistatic bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags); 325362306a36Sopenharmony_ci 325462306a36Sopenharmony_ci/* 325562306a36Sopenharmony_ci * Create a new mount using a superblock configuration and request it 325662306a36Sopenharmony_ci * be added to the namespace tree. 325762306a36Sopenharmony_ci */ 325862306a36Sopenharmony_cistatic int do_new_mount_fc(struct fs_context *fc, struct path *mountpoint, 325962306a36Sopenharmony_ci unsigned int mnt_flags) 326062306a36Sopenharmony_ci{ 326162306a36Sopenharmony_ci struct vfsmount *mnt; 326262306a36Sopenharmony_ci struct mountpoint *mp; 326362306a36Sopenharmony_ci struct super_block *sb = fc->root->d_sb; 326462306a36Sopenharmony_ci int error; 326562306a36Sopenharmony_ci 326662306a36Sopenharmony_ci error = security_sb_kern_mount(sb); 326762306a36Sopenharmony_ci if (!error && mount_too_revealing(sb, &mnt_flags)) 326862306a36Sopenharmony_ci error = -EPERM; 326962306a36Sopenharmony_ci 327062306a36Sopenharmony_ci if (unlikely(error)) { 327162306a36Sopenharmony_ci fc_drop_locked(fc); 327262306a36Sopenharmony_ci return error; 327362306a36Sopenharmony_ci } 327462306a36Sopenharmony_ci 327562306a36Sopenharmony_ci up_write(&sb->s_umount); 327662306a36Sopenharmony_ci 327762306a36Sopenharmony_ci mnt = vfs_create_mount(fc); 327862306a36Sopenharmony_ci if (IS_ERR(mnt)) 327962306a36Sopenharmony_ci return PTR_ERR(mnt); 328062306a36Sopenharmony_ci 328162306a36Sopenharmony_ci mnt_warn_timestamp_expiry(mountpoint, mnt); 328262306a36Sopenharmony_ci 328362306a36Sopenharmony_ci mp = lock_mount(mountpoint); 328462306a36Sopenharmony_ci if (IS_ERR(mp)) { 328562306a36Sopenharmony_ci mntput(mnt); 328662306a36Sopenharmony_ci return PTR_ERR(mp); 328762306a36Sopenharmony_ci } 328862306a36Sopenharmony_ci error = do_add_mount(real_mount(mnt), mp, mountpoint, mnt_flags); 328962306a36Sopenharmony_ci unlock_mount(mp); 329062306a36Sopenharmony_ci if (error < 0) 329162306a36Sopenharmony_ci mntput(mnt); 329262306a36Sopenharmony_ci return error; 329362306a36Sopenharmony_ci} 329462306a36Sopenharmony_ci 329562306a36Sopenharmony_ci/* 329662306a36Sopenharmony_ci * create a new mount for userspace and request it to be added into the 329762306a36Sopenharmony_ci * namespace's tree 329862306a36Sopenharmony_ci */ 329962306a36Sopenharmony_cistatic int do_new_mount(struct path *path, const char *fstype, int sb_flags, 330062306a36Sopenharmony_ci int mnt_flags, const char *name, void *data) 330162306a36Sopenharmony_ci{ 330262306a36Sopenharmony_ci struct file_system_type *type; 330362306a36Sopenharmony_ci struct fs_context *fc; 330462306a36Sopenharmony_ci const char *subtype = NULL; 330562306a36Sopenharmony_ci int err = 0; 330662306a36Sopenharmony_ci 330762306a36Sopenharmony_ci if (!fstype) 330862306a36Sopenharmony_ci return -EINVAL; 330962306a36Sopenharmony_ci 331062306a36Sopenharmony_ci type = get_fs_type(fstype); 331162306a36Sopenharmony_ci if (!type) 331262306a36Sopenharmony_ci return -ENODEV; 331362306a36Sopenharmony_ci 331462306a36Sopenharmony_ci if (type->fs_flags & FS_HAS_SUBTYPE) { 331562306a36Sopenharmony_ci subtype = strchr(fstype, '.'); 331662306a36Sopenharmony_ci if (subtype) { 331762306a36Sopenharmony_ci subtype++; 331862306a36Sopenharmony_ci if (!*subtype) { 331962306a36Sopenharmony_ci put_filesystem(type); 332062306a36Sopenharmony_ci return -EINVAL; 332162306a36Sopenharmony_ci } 332262306a36Sopenharmony_ci } 332362306a36Sopenharmony_ci } 332462306a36Sopenharmony_ci 332562306a36Sopenharmony_ci fc = fs_context_for_mount(type, sb_flags); 332662306a36Sopenharmony_ci put_filesystem(type); 332762306a36Sopenharmony_ci if (IS_ERR(fc)) 332862306a36Sopenharmony_ci return PTR_ERR(fc); 332962306a36Sopenharmony_ci 333062306a36Sopenharmony_ci /* 333162306a36Sopenharmony_ci * Indicate to the filesystem that the mount request is coming 333262306a36Sopenharmony_ci * from the legacy mount system call. 333362306a36Sopenharmony_ci */ 333462306a36Sopenharmony_ci fc->oldapi = true; 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_ci if (subtype) 333762306a36Sopenharmony_ci err = vfs_parse_fs_string(fc, "subtype", 333862306a36Sopenharmony_ci subtype, strlen(subtype)); 333962306a36Sopenharmony_ci if (!err && name) 334062306a36Sopenharmony_ci err = vfs_parse_fs_string(fc, "source", name, strlen(name)); 334162306a36Sopenharmony_ci if (!err) 334262306a36Sopenharmony_ci err = parse_monolithic_mount_data(fc, data); 334362306a36Sopenharmony_ci if (!err && !mount_capable(fc)) 334462306a36Sopenharmony_ci err = -EPERM; 334562306a36Sopenharmony_ci if (!err) 334662306a36Sopenharmony_ci err = vfs_get_tree(fc); 334762306a36Sopenharmony_ci if (!err) 334862306a36Sopenharmony_ci err = do_new_mount_fc(fc, path, mnt_flags); 334962306a36Sopenharmony_ci 335062306a36Sopenharmony_ci put_fs_context(fc); 335162306a36Sopenharmony_ci return err; 335262306a36Sopenharmony_ci} 335362306a36Sopenharmony_ci 335462306a36Sopenharmony_ciint finish_automount(struct vfsmount *m, const struct path *path) 335562306a36Sopenharmony_ci{ 335662306a36Sopenharmony_ci struct dentry *dentry = path->dentry; 335762306a36Sopenharmony_ci struct mountpoint *mp; 335862306a36Sopenharmony_ci struct mount *mnt; 335962306a36Sopenharmony_ci int err; 336062306a36Sopenharmony_ci 336162306a36Sopenharmony_ci if (!m) 336262306a36Sopenharmony_ci return 0; 336362306a36Sopenharmony_ci if (IS_ERR(m)) 336462306a36Sopenharmony_ci return PTR_ERR(m); 336562306a36Sopenharmony_ci 336662306a36Sopenharmony_ci mnt = real_mount(m); 336762306a36Sopenharmony_ci /* The new mount record should have at least 2 refs to prevent it being 336862306a36Sopenharmony_ci * expired before we get a chance to add it 336962306a36Sopenharmony_ci */ 337062306a36Sopenharmony_ci BUG_ON(mnt_get_count(mnt) < 2); 337162306a36Sopenharmony_ci 337262306a36Sopenharmony_ci if (m->mnt_sb == path->mnt->mnt_sb && 337362306a36Sopenharmony_ci m->mnt_root == dentry) { 337462306a36Sopenharmony_ci err = -ELOOP; 337562306a36Sopenharmony_ci goto discard; 337662306a36Sopenharmony_ci } 337762306a36Sopenharmony_ci 337862306a36Sopenharmony_ci /* 337962306a36Sopenharmony_ci * we don't want to use lock_mount() - in this case finding something 338062306a36Sopenharmony_ci * that overmounts our mountpoint to be means "quitely drop what we've 338162306a36Sopenharmony_ci * got", not "try to mount it on top". 338262306a36Sopenharmony_ci */ 338362306a36Sopenharmony_ci inode_lock(dentry->d_inode); 338462306a36Sopenharmony_ci namespace_lock(); 338562306a36Sopenharmony_ci if (unlikely(cant_mount(dentry))) { 338662306a36Sopenharmony_ci err = -ENOENT; 338762306a36Sopenharmony_ci goto discard_locked; 338862306a36Sopenharmony_ci } 338962306a36Sopenharmony_ci if (path_overmounted(path)) { 339062306a36Sopenharmony_ci err = 0; 339162306a36Sopenharmony_ci goto discard_locked; 339262306a36Sopenharmony_ci } 339362306a36Sopenharmony_ci mp = get_mountpoint(dentry); 339462306a36Sopenharmony_ci if (IS_ERR(mp)) { 339562306a36Sopenharmony_ci err = PTR_ERR(mp); 339662306a36Sopenharmony_ci goto discard_locked; 339762306a36Sopenharmony_ci } 339862306a36Sopenharmony_ci 339962306a36Sopenharmony_ci err = do_add_mount(mnt, mp, path, path->mnt->mnt_flags | MNT_SHRINKABLE); 340062306a36Sopenharmony_ci unlock_mount(mp); 340162306a36Sopenharmony_ci if (unlikely(err)) 340262306a36Sopenharmony_ci goto discard; 340362306a36Sopenharmony_ci mntput(m); 340462306a36Sopenharmony_ci return 0; 340562306a36Sopenharmony_ci 340662306a36Sopenharmony_cidiscard_locked: 340762306a36Sopenharmony_ci namespace_unlock(); 340862306a36Sopenharmony_ci inode_unlock(dentry->d_inode); 340962306a36Sopenharmony_cidiscard: 341062306a36Sopenharmony_ci /* remove m from any expiration list it may be on */ 341162306a36Sopenharmony_ci if (!list_empty(&mnt->mnt_expire)) { 341262306a36Sopenharmony_ci namespace_lock(); 341362306a36Sopenharmony_ci list_del_init(&mnt->mnt_expire); 341462306a36Sopenharmony_ci namespace_unlock(); 341562306a36Sopenharmony_ci } 341662306a36Sopenharmony_ci mntput(m); 341762306a36Sopenharmony_ci mntput(m); 341862306a36Sopenharmony_ci return err; 341962306a36Sopenharmony_ci} 342062306a36Sopenharmony_ci 342162306a36Sopenharmony_ci/** 342262306a36Sopenharmony_ci * mnt_set_expiry - Put a mount on an expiration list 342362306a36Sopenharmony_ci * @mnt: The mount to list. 342462306a36Sopenharmony_ci * @expiry_list: The list to add the mount to. 342562306a36Sopenharmony_ci */ 342662306a36Sopenharmony_civoid mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) 342762306a36Sopenharmony_ci{ 342862306a36Sopenharmony_ci namespace_lock(); 342962306a36Sopenharmony_ci 343062306a36Sopenharmony_ci list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); 343162306a36Sopenharmony_ci 343262306a36Sopenharmony_ci namespace_unlock(); 343362306a36Sopenharmony_ci} 343462306a36Sopenharmony_ciEXPORT_SYMBOL(mnt_set_expiry); 343562306a36Sopenharmony_ci 343662306a36Sopenharmony_ci/* 343762306a36Sopenharmony_ci * process a list of expirable mountpoints with the intent of discarding any 343862306a36Sopenharmony_ci * mountpoints that aren't in use and haven't been touched since last we came 343962306a36Sopenharmony_ci * here 344062306a36Sopenharmony_ci */ 344162306a36Sopenharmony_civoid mark_mounts_for_expiry(struct list_head *mounts) 344262306a36Sopenharmony_ci{ 344362306a36Sopenharmony_ci struct mount *mnt, *next; 344462306a36Sopenharmony_ci LIST_HEAD(graveyard); 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_ci if (list_empty(mounts)) 344762306a36Sopenharmony_ci return; 344862306a36Sopenharmony_ci 344962306a36Sopenharmony_ci namespace_lock(); 345062306a36Sopenharmony_ci lock_mount_hash(); 345162306a36Sopenharmony_ci 345262306a36Sopenharmony_ci /* extract from the expiration list every vfsmount that matches the 345362306a36Sopenharmony_ci * following criteria: 345462306a36Sopenharmony_ci * - only referenced by its parent vfsmount 345562306a36Sopenharmony_ci * - still marked for expiry (marked on the last call here; marks are 345662306a36Sopenharmony_ci * cleared by mntput()) 345762306a36Sopenharmony_ci */ 345862306a36Sopenharmony_ci list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { 345962306a36Sopenharmony_ci if (!xchg(&mnt->mnt_expiry_mark, 1) || 346062306a36Sopenharmony_ci propagate_mount_busy(mnt, 1)) 346162306a36Sopenharmony_ci continue; 346262306a36Sopenharmony_ci list_move(&mnt->mnt_expire, &graveyard); 346362306a36Sopenharmony_ci } 346462306a36Sopenharmony_ci while (!list_empty(&graveyard)) { 346562306a36Sopenharmony_ci mnt = list_first_entry(&graveyard, struct mount, mnt_expire); 346662306a36Sopenharmony_ci touch_mnt_namespace(mnt->mnt_ns); 346762306a36Sopenharmony_ci umount_tree(mnt, UMOUNT_PROPAGATE|UMOUNT_SYNC); 346862306a36Sopenharmony_ci } 346962306a36Sopenharmony_ci unlock_mount_hash(); 347062306a36Sopenharmony_ci namespace_unlock(); 347162306a36Sopenharmony_ci} 347262306a36Sopenharmony_ci 347362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(mark_mounts_for_expiry); 347462306a36Sopenharmony_ci 347562306a36Sopenharmony_ci/* 347662306a36Sopenharmony_ci * Ripoff of 'select_parent()' 347762306a36Sopenharmony_ci * 347862306a36Sopenharmony_ci * search the list of submounts for a given mountpoint, and move any 347962306a36Sopenharmony_ci * shrinkable submounts to the 'graveyard' list. 348062306a36Sopenharmony_ci */ 348162306a36Sopenharmony_cistatic int select_submounts(struct mount *parent, struct list_head *graveyard) 348262306a36Sopenharmony_ci{ 348362306a36Sopenharmony_ci struct mount *this_parent = parent; 348462306a36Sopenharmony_ci struct list_head *next; 348562306a36Sopenharmony_ci int found = 0; 348662306a36Sopenharmony_ci 348762306a36Sopenharmony_cirepeat: 348862306a36Sopenharmony_ci next = this_parent->mnt_mounts.next; 348962306a36Sopenharmony_ciresume: 349062306a36Sopenharmony_ci while (next != &this_parent->mnt_mounts) { 349162306a36Sopenharmony_ci struct list_head *tmp = next; 349262306a36Sopenharmony_ci struct mount *mnt = list_entry(tmp, struct mount, mnt_child); 349362306a36Sopenharmony_ci 349462306a36Sopenharmony_ci next = tmp->next; 349562306a36Sopenharmony_ci if (!(mnt->mnt.mnt_flags & MNT_SHRINKABLE)) 349662306a36Sopenharmony_ci continue; 349762306a36Sopenharmony_ci /* 349862306a36Sopenharmony_ci * Descend a level if the d_mounts list is non-empty. 349962306a36Sopenharmony_ci */ 350062306a36Sopenharmony_ci if (!list_empty(&mnt->mnt_mounts)) { 350162306a36Sopenharmony_ci this_parent = mnt; 350262306a36Sopenharmony_ci goto repeat; 350362306a36Sopenharmony_ci } 350462306a36Sopenharmony_ci 350562306a36Sopenharmony_ci if (!propagate_mount_busy(mnt, 1)) { 350662306a36Sopenharmony_ci list_move_tail(&mnt->mnt_expire, graveyard); 350762306a36Sopenharmony_ci found++; 350862306a36Sopenharmony_ci } 350962306a36Sopenharmony_ci } 351062306a36Sopenharmony_ci /* 351162306a36Sopenharmony_ci * All done at this level ... ascend and resume the search 351262306a36Sopenharmony_ci */ 351362306a36Sopenharmony_ci if (this_parent != parent) { 351462306a36Sopenharmony_ci next = this_parent->mnt_child.next; 351562306a36Sopenharmony_ci this_parent = this_parent->mnt_parent; 351662306a36Sopenharmony_ci goto resume; 351762306a36Sopenharmony_ci } 351862306a36Sopenharmony_ci return found; 351962306a36Sopenharmony_ci} 352062306a36Sopenharmony_ci 352162306a36Sopenharmony_ci/* 352262306a36Sopenharmony_ci * process a list of expirable mountpoints with the intent of discarding any 352362306a36Sopenharmony_ci * submounts of a specific parent mountpoint 352462306a36Sopenharmony_ci * 352562306a36Sopenharmony_ci * mount_lock must be held for write 352662306a36Sopenharmony_ci */ 352762306a36Sopenharmony_cistatic void shrink_submounts(struct mount *mnt) 352862306a36Sopenharmony_ci{ 352962306a36Sopenharmony_ci LIST_HEAD(graveyard); 353062306a36Sopenharmony_ci struct mount *m; 353162306a36Sopenharmony_ci 353262306a36Sopenharmony_ci /* extract submounts of 'mountpoint' from the expiration list */ 353362306a36Sopenharmony_ci while (select_submounts(mnt, &graveyard)) { 353462306a36Sopenharmony_ci while (!list_empty(&graveyard)) { 353562306a36Sopenharmony_ci m = list_first_entry(&graveyard, struct mount, 353662306a36Sopenharmony_ci mnt_expire); 353762306a36Sopenharmony_ci touch_mnt_namespace(m->mnt_ns); 353862306a36Sopenharmony_ci umount_tree(m, UMOUNT_PROPAGATE|UMOUNT_SYNC); 353962306a36Sopenharmony_ci } 354062306a36Sopenharmony_ci } 354162306a36Sopenharmony_ci} 354262306a36Sopenharmony_ci 354362306a36Sopenharmony_cistatic void *copy_mount_options(const void __user * data) 354462306a36Sopenharmony_ci{ 354562306a36Sopenharmony_ci char *copy; 354662306a36Sopenharmony_ci unsigned left, offset; 354762306a36Sopenharmony_ci 354862306a36Sopenharmony_ci if (!data) 354962306a36Sopenharmony_ci return NULL; 355062306a36Sopenharmony_ci 355162306a36Sopenharmony_ci copy = kmalloc(PAGE_SIZE, GFP_KERNEL); 355262306a36Sopenharmony_ci if (!copy) 355362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 355462306a36Sopenharmony_ci 355562306a36Sopenharmony_ci left = copy_from_user(copy, data, PAGE_SIZE); 355662306a36Sopenharmony_ci 355762306a36Sopenharmony_ci /* 355862306a36Sopenharmony_ci * Not all architectures have an exact copy_from_user(). Resort to 355962306a36Sopenharmony_ci * byte at a time. 356062306a36Sopenharmony_ci */ 356162306a36Sopenharmony_ci offset = PAGE_SIZE - left; 356262306a36Sopenharmony_ci while (left) { 356362306a36Sopenharmony_ci char c; 356462306a36Sopenharmony_ci if (get_user(c, (const char __user *)data + offset)) 356562306a36Sopenharmony_ci break; 356662306a36Sopenharmony_ci copy[offset] = c; 356762306a36Sopenharmony_ci left--; 356862306a36Sopenharmony_ci offset++; 356962306a36Sopenharmony_ci } 357062306a36Sopenharmony_ci 357162306a36Sopenharmony_ci if (left == PAGE_SIZE) { 357262306a36Sopenharmony_ci kfree(copy); 357362306a36Sopenharmony_ci return ERR_PTR(-EFAULT); 357462306a36Sopenharmony_ci } 357562306a36Sopenharmony_ci 357662306a36Sopenharmony_ci return copy; 357762306a36Sopenharmony_ci} 357862306a36Sopenharmony_ci 357962306a36Sopenharmony_cistatic char *copy_mount_string(const void __user *data) 358062306a36Sopenharmony_ci{ 358162306a36Sopenharmony_ci return data ? strndup_user(data, PATH_MAX) : NULL; 358262306a36Sopenharmony_ci} 358362306a36Sopenharmony_ci 358462306a36Sopenharmony_ci/* 358562306a36Sopenharmony_ci * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to 358662306a36Sopenharmony_ci * be given to the mount() call (ie: read-only, no-dev, no-suid etc). 358762306a36Sopenharmony_ci * 358862306a36Sopenharmony_ci * data is a (void *) that can point to any structure up to 358962306a36Sopenharmony_ci * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent 359062306a36Sopenharmony_ci * information (or be NULL). 359162306a36Sopenharmony_ci * 359262306a36Sopenharmony_ci * Pre-0.97 versions of mount() didn't have a flags word. 359362306a36Sopenharmony_ci * When the flags word was introduced its top half was required 359462306a36Sopenharmony_ci * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9. 359562306a36Sopenharmony_ci * Therefore, if this magic number is present, it carries no information 359662306a36Sopenharmony_ci * and must be discarded. 359762306a36Sopenharmony_ci */ 359862306a36Sopenharmony_ciint path_mount(const char *dev_name, struct path *path, 359962306a36Sopenharmony_ci const char *type_page, unsigned long flags, void *data_page) 360062306a36Sopenharmony_ci{ 360162306a36Sopenharmony_ci unsigned int mnt_flags = 0, sb_flags; 360262306a36Sopenharmony_ci int ret; 360362306a36Sopenharmony_ci 360462306a36Sopenharmony_ci /* Discard magic */ 360562306a36Sopenharmony_ci if ((flags & MS_MGC_MSK) == MS_MGC_VAL) 360662306a36Sopenharmony_ci flags &= ~MS_MGC_MSK; 360762306a36Sopenharmony_ci 360862306a36Sopenharmony_ci /* Basic sanity checks */ 360962306a36Sopenharmony_ci if (data_page) 361062306a36Sopenharmony_ci ((char *)data_page)[PAGE_SIZE - 1] = 0; 361162306a36Sopenharmony_ci 361262306a36Sopenharmony_ci if (flags & MS_NOUSER) 361362306a36Sopenharmony_ci return -EINVAL; 361462306a36Sopenharmony_ci 361562306a36Sopenharmony_ci ret = security_sb_mount(dev_name, path, type_page, flags, data_page); 361662306a36Sopenharmony_ci if (ret) 361762306a36Sopenharmony_ci return ret; 361862306a36Sopenharmony_ci if (!may_mount()) 361962306a36Sopenharmony_ci return -EPERM; 362062306a36Sopenharmony_ci if (flags & SB_MANDLOCK) 362162306a36Sopenharmony_ci warn_mandlock(); 362262306a36Sopenharmony_ci 362362306a36Sopenharmony_ci /* Default to relatime unless overriden */ 362462306a36Sopenharmony_ci if (!(flags & MS_NOATIME)) 362562306a36Sopenharmony_ci mnt_flags |= MNT_RELATIME; 362662306a36Sopenharmony_ci 362762306a36Sopenharmony_ci /* Separate the per-mountpoint flags */ 362862306a36Sopenharmony_ci if (flags & MS_NOSUID) 362962306a36Sopenharmony_ci mnt_flags |= MNT_NOSUID; 363062306a36Sopenharmony_ci if (flags & MS_NODEV) 363162306a36Sopenharmony_ci mnt_flags |= MNT_NODEV; 363262306a36Sopenharmony_ci if (flags & MS_NOEXEC) 363362306a36Sopenharmony_ci mnt_flags |= MNT_NOEXEC; 363462306a36Sopenharmony_ci if (flags & MS_NOATIME) 363562306a36Sopenharmony_ci mnt_flags |= MNT_NOATIME; 363662306a36Sopenharmony_ci if (flags & MS_NODIRATIME) 363762306a36Sopenharmony_ci mnt_flags |= MNT_NODIRATIME; 363862306a36Sopenharmony_ci if (flags & MS_STRICTATIME) 363962306a36Sopenharmony_ci mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME); 364062306a36Sopenharmony_ci if (flags & MS_RDONLY) 364162306a36Sopenharmony_ci mnt_flags |= MNT_READONLY; 364262306a36Sopenharmony_ci if (flags & MS_NOSYMFOLLOW) 364362306a36Sopenharmony_ci mnt_flags |= MNT_NOSYMFOLLOW; 364462306a36Sopenharmony_ci 364562306a36Sopenharmony_ci /* The default atime for remount is preservation */ 364662306a36Sopenharmony_ci if ((flags & MS_REMOUNT) && 364762306a36Sopenharmony_ci ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME | 364862306a36Sopenharmony_ci MS_STRICTATIME)) == 0)) { 364962306a36Sopenharmony_ci mnt_flags &= ~MNT_ATIME_MASK; 365062306a36Sopenharmony_ci mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK; 365162306a36Sopenharmony_ci } 365262306a36Sopenharmony_ci 365362306a36Sopenharmony_ci sb_flags = flags & (SB_RDONLY | 365462306a36Sopenharmony_ci SB_SYNCHRONOUS | 365562306a36Sopenharmony_ci SB_MANDLOCK | 365662306a36Sopenharmony_ci SB_DIRSYNC | 365762306a36Sopenharmony_ci SB_SILENT | 365862306a36Sopenharmony_ci SB_POSIXACL | 365962306a36Sopenharmony_ci SB_LAZYTIME | 366062306a36Sopenharmony_ci SB_I_VERSION); 366162306a36Sopenharmony_ci 366262306a36Sopenharmony_ci if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND)) 366362306a36Sopenharmony_ci return do_reconfigure_mnt(path, mnt_flags); 366462306a36Sopenharmony_ci if (flags & MS_REMOUNT) 366562306a36Sopenharmony_ci return do_remount(path, flags, sb_flags, mnt_flags, data_page); 366662306a36Sopenharmony_ci if (flags & MS_BIND) 366762306a36Sopenharmony_ci return do_loopback(path, dev_name, flags & MS_REC); 366862306a36Sopenharmony_ci if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 366962306a36Sopenharmony_ci return do_change_type(path, flags); 367062306a36Sopenharmony_ci if (flags & MS_MOVE) 367162306a36Sopenharmony_ci return do_move_mount_old(path, dev_name); 367262306a36Sopenharmony_ci 367362306a36Sopenharmony_ci return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name, 367462306a36Sopenharmony_ci data_page); 367562306a36Sopenharmony_ci} 367662306a36Sopenharmony_ci 367762306a36Sopenharmony_cilong do_mount(const char *dev_name, const char __user *dir_name, 367862306a36Sopenharmony_ci const char *type_page, unsigned long flags, void *data_page) 367962306a36Sopenharmony_ci{ 368062306a36Sopenharmony_ci struct path path; 368162306a36Sopenharmony_ci int ret; 368262306a36Sopenharmony_ci 368362306a36Sopenharmony_ci ret = user_path_at(AT_FDCWD, dir_name, LOOKUP_FOLLOW, &path); 368462306a36Sopenharmony_ci if (ret) 368562306a36Sopenharmony_ci return ret; 368662306a36Sopenharmony_ci ret = path_mount(dev_name, &path, type_page, flags, data_page); 368762306a36Sopenharmony_ci path_put(&path); 368862306a36Sopenharmony_ci return ret; 368962306a36Sopenharmony_ci} 369062306a36Sopenharmony_ci 369162306a36Sopenharmony_cistatic struct ucounts *inc_mnt_namespaces(struct user_namespace *ns) 369262306a36Sopenharmony_ci{ 369362306a36Sopenharmony_ci return inc_ucount(ns, current_euid(), UCOUNT_MNT_NAMESPACES); 369462306a36Sopenharmony_ci} 369562306a36Sopenharmony_ci 369662306a36Sopenharmony_cistatic void dec_mnt_namespaces(struct ucounts *ucounts) 369762306a36Sopenharmony_ci{ 369862306a36Sopenharmony_ci dec_ucount(ucounts, UCOUNT_MNT_NAMESPACES); 369962306a36Sopenharmony_ci} 370062306a36Sopenharmony_ci 370162306a36Sopenharmony_cistatic void free_mnt_ns(struct mnt_namespace *ns) 370262306a36Sopenharmony_ci{ 370362306a36Sopenharmony_ci if (!is_anon_ns(ns)) 370462306a36Sopenharmony_ci ns_free_inum(&ns->ns); 370562306a36Sopenharmony_ci dec_mnt_namespaces(ns->ucounts); 370662306a36Sopenharmony_ci put_user_ns(ns->user_ns); 370762306a36Sopenharmony_ci kfree(ns); 370862306a36Sopenharmony_ci} 370962306a36Sopenharmony_ci 371062306a36Sopenharmony_ci/* 371162306a36Sopenharmony_ci * Assign a sequence number so we can detect when we attempt to bind 371262306a36Sopenharmony_ci * mount a reference to an older mount namespace into the current 371362306a36Sopenharmony_ci * mount namespace, preventing reference counting loops. A 64bit 371462306a36Sopenharmony_ci * number incrementing at 10Ghz will take 12,427 years to wrap which 371562306a36Sopenharmony_ci * is effectively never, so we can ignore the possibility. 371662306a36Sopenharmony_ci */ 371762306a36Sopenharmony_cistatic atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); 371862306a36Sopenharmony_ci 371962306a36Sopenharmony_cistatic struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon) 372062306a36Sopenharmony_ci{ 372162306a36Sopenharmony_ci struct mnt_namespace *new_ns; 372262306a36Sopenharmony_ci struct ucounts *ucounts; 372362306a36Sopenharmony_ci int ret; 372462306a36Sopenharmony_ci 372562306a36Sopenharmony_ci ucounts = inc_mnt_namespaces(user_ns); 372662306a36Sopenharmony_ci if (!ucounts) 372762306a36Sopenharmony_ci return ERR_PTR(-ENOSPC); 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT); 373062306a36Sopenharmony_ci if (!new_ns) { 373162306a36Sopenharmony_ci dec_mnt_namespaces(ucounts); 373262306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 373362306a36Sopenharmony_ci } 373462306a36Sopenharmony_ci if (!anon) { 373562306a36Sopenharmony_ci ret = ns_alloc_inum(&new_ns->ns); 373662306a36Sopenharmony_ci if (ret) { 373762306a36Sopenharmony_ci kfree(new_ns); 373862306a36Sopenharmony_ci dec_mnt_namespaces(ucounts); 373962306a36Sopenharmony_ci return ERR_PTR(ret); 374062306a36Sopenharmony_ci } 374162306a36Sopenharmony_ci } 374262306a36Sopenharmony_ci new_ns->ns.ops = &mntns_operations; 374362306a36Sopenharmony_ci if (!anon) 374462306a36Sopenharmony_ci new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); 374562306a36Sopenharmony_ci refcount_set(&new_ns->ns.count, 1); 374662306a36Sopenharmony_ci INIT_LIST_HEAD(&new_ns->list); 374762306a36Sopenharmony_ci init_waitqueue_head(&new_ns->poll); 374862306a36Sopenharmony_ci spin_lock_init(&new_ns->ns_lock); 374962306a36Sopenharmony_ci new_ns->user_ns = get_user_ns(user_ns); 375062306a36Sopenharmony_ci new_ns->ucounts = ucounts; 375162306a36Sopenharmony_ci return new_ns; 375262306a36Sopenharmony_ci} 375362306a36Sopenharmony_ci 375462306a36Sopenharmony_ci__latent_entropy 375562306a36Sopenharmony_cistruct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, 375662306a36Sopenharmony_ci struct user_namespace *user_ns, struct fs_struct *new_fs) 375762306a36Sopenharmony_ci{ 375862306a36Sopenharmony_ci struct mnt_namespace *new_ns; 375962306a36Sopenharmony_ci struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; 376062306a36Sopenharmony_ci struct mount *p, *q; 376162306a36Sopenharmony_ci struct mount *old; 376262306a36Sopenharmony_ci struct mount *new; 376362306a36Sopenharmony_ci int copy_flags; 376462306a36Sopenharmony_ci 376562306a36Sopenharmony_ci BUG_ON(!ns); 376662306a36Sopenharmony_ci 376762306a36Sopenharmony_ci if (likely(!(flags & CLONE_NEWNS))) { 376862306a36Sopenharmony_ci get_mnt_ns(ns); 376962306a36Sopenharmony_ci return ns; 377062306a36Sopenharmony_ci } 377162306a36Sopenharmony_ci 377262306a36Sopenharmony_ci old = ns->root; 377362306a36Sopenharmony_ci 377462306a36Sopenharmony_ci new_ns = alloc_mnt_ns(user_ns, false); 377562306a36Sopenharmony_ci if (IS_ERR(new_ns)) 377662306a36Sopenharmony_ci return new_ns; 377762306a36Sopenharmony_ci 377862306a36Sopenharmony_ci namespace_lock(); 377962306a36Sopenharmony_ci /* First pass: copy the tree topology */ 378062306a36Sopenharmony_ci copy_flags = CL_COPY_UNBINDABLE | CL_EXPIRE; 378162306a36Sopenharmony_ci if (user_ns != ns->user_ns) 378262306a36Sopenharmony_ci copy_flags |= CL_SHARED_TO_SLAVE; 378362306a36Sopenharmony_ci new = copy_tree(old, old->mnt.mnt_root, copy_flags); 378462306a36Sopenharmony_ci if (IS_ERR(new)) { 378562306a36Sopenharmony_ci namespace_unlock(); 378662306a36Sopenharmony_ci free_mnt_ns(new_ns); 378762306a36Sopenharmony_ci return ERR_CAST(new); 378862306a36Sopenharmony_ci } 378962306a36Sopenharmony_ci if (user_ns != ns->user_ns) { 379062306a36Sopenharmony_ci lock_mount_hash(); 379162306a36Sopenharmony_ci lock_mnt_tree(new); 379262306a36Sopenharmony_ci unlock_mount_hash(); 379362306a36Sopenharmony_ci } 379462306a36Sopenharmony_ci new_ns->root = new; 379562306a36Sopenharmony_ci list_add_tail(&new_ns->list, &new->mnt_list); 379662306a36Sopenharmony_ci 379762306a36Sopenharmony_ci /* 379862306a36Sopenharmony_ci * Second pass: switch the tsk->fs->* elements and mark new vfsmounts 379962306a36Sopenharmony_ci * as belonging to new namespace. We have already acquired a private 380062306a36Sopenharmony_ci * fs_struct, so tsk->fs->lock is not needed. 380162306a36Sopenharmony_ci */ 380262306a36Sopenharmony_ci p = old; 380362306a36Sopenharmony_ci q = new; 380462306a36Sopenharmony_ci while (p) { 380562306a36Sopenharmony_ci q->mnt_ns = new_ns; 380662306a36Sopenharmony_ci new_ns->mounts++; 380762306a36Sopenharmony_ci if (new_fs) { 380862306a36Sopenharmony_ci if (&p->mnt == new_fs->root.mnt) { 380962306a36Sopenharmony_ci new_fs->root.mnt = mntget(&q->mnt); 381062306a36Sopenharmony_ci rootmnt = &p->mnt; 381162306a36Sopenharmony_ci } 381262306a36Sopenharmony_ci if (&p->mnt == new_fs->pwd.mnt) { 381362306a36Sopenharmony_ci new_fs->pwd.mnt = mntget(&q->mnt); 381462306a36Sopenharmony_ci pwdmnt = &p->mnt; 381562306a36Sopenharmony_ci } 381662306a36Sopenharmony_ci } 381762306a36Sopenharmony_ci p = next_mnt(p, old); 381862306a36Sopenharmony_ci q = next_mnt(q, new); 381962306a36Sopenharmony_ci if (!q) 382062306a36Sopenharmony_ci break; 382162306a36Sopenharmony_ci // an mntns binding we'd skipped? 382262306a36Sopenharmony_ci while (p->mnt.mnt_root != q->mnt.mnt_root) 382362306a36Sopenharmony_ci p = next_mnt(skip_mnt_tree(p), old); 382462306a36Sopenharmony_ci } 382562306a36Sopenharmony_ci namespace_unlock(); 382662306a36Sopenharmony_ci 382762306a36Sopenharmony_ci if (rootmnt) 382862306a36Sopenharmony_ci mntput(rootmnt); 382962306a36Sopenharmony_ci if (pwdmnt) 383062306a36Sopenharmony_ci mntput(pwdmnt); 383162306a36Sopenharmony_ci 383262306a36Sopenharmony_ci return new_ns; 383362306a36Sopenharmony_ci} 383462306a36Sopenharmony_ci 383562306a36Sopenharmony_cistruct dentry *mount_subtree(struct vfsmount *m, const char *name) 383662306a36Sopenharmony_ci{ 383762306a36Sopenharmony_ci struct mount *mnt = real_mount(m); 383862306a36Sopenharmony_ci struct mnt_namespace *ns; 383962306a36Sopenharmony_ci struct super_block *s; 384062306a36Sopenharmony_ci struct path path; 384162306a36Sopenharmony_ci int err; 384262306a36Sopenharmony_ci 384362306a36Sopenharmony_ci ns = alloc_mnt_ns(&init_user_ns, true); 384462306a36Sopenharmony_ci if (IS_ERR(ns)) { 384562306a36Sopenharmony_ci mntput(m); 384662306a36Sopenharmony_ci return ERR_CAST(ns); 384762306a36Sopenharmony_ci } 384862306a36Sopenharmony_ci mnt->mnt_ns = ns; 384962306a36Sopenharmony_ci ns->root = mnt; 385062306a36Sopenharmony_ci ns->mounts++; 385162306a36Sopenharmony_ci list_add(&mnt->mnt_list, &ns->list); 385262306a36Sopenharmony_ci 385362306a36Sopenharmony_ci err = vfs_path_lookup(m->mnt_root, m, 385462306a36Sopenharmony_ci name, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); 385562306a36Sopenharmony_ci 385662306a36Sopenharmony_ci put_mnt_ns(ns); 385762306a36Sopenharmony_ci 385862306a36Sopenharmony_ci if (err) 385962306a36Sopenharmony_ci return ERR_PTR(err); 386062306a36Sopenharmony_ci 386162306a36Sopenharmony_ci /* trade a vfsmount reference for active sb one */ 386262306a36Sopenharmony_ci s = path.mnt->mnt_sb; 386362306a36Sopenharmony_ci atomic_inc(&s->s_active); 386462306a36Sopenharmony_ci mntput(path.mnt); 386562306a36Sopenharmony_ci /* lock the sucker */ 386662306a36Sopenharmony_ci down_write(&s->s_umount); 386762306a36Sopenharmony_ci /* ... and return the root of (sub)tree on it */ 386862306a36Sopenharmony_ci return path.dentry; 386962306a36Sopenharmony_ci} 387062306a36Sopenharmony_ciEXPORT_SYMBOL(mount_subtree); 387162306a36Sopenharmony_ci 387262306a36Sopenharmony_ciSYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, 387362306a36Sopenharmony_ci char __user *, type, unsigned long, flags, void __user *, data) 387462306a36Sopenharmony_ci{ 387562306a36Sopenharmony_ci int ret; 387662306a36Sopenharmony_ci char *kernel_type; 387762306a36Sopenharmony_ci char *kernel_dev; 387862306a36Sopenharmony_ci void *options; 387962306a36Sopenharmony_ci 388062306a36Sopenharmony_ci kernel_type = copy_mount_string(type); 388162306a36Sopenharmony_ci ret = PTR_ERR(kernel_type); 388262306a36Sopenharmony_ci if (IS_ERR(kernel_type)) 388362306a36Sopenharmony_ci goto out_type; 388462306a36Sopenharmony_ci 388562306a36Sopenharmony_ci kernel_dev = copy_mount_string(dev_name); 388662306a36Sopenharmony_ci ret = PTR_ERR(kernel_dev); 388762306a36Sopenharmony_ci if (IS_ERR(kernel_dev)) 388862306a36Sopenharmony_ci goto out_dev; 388962306a36Sopenharmony_ci 389062306a36Sopenharmony_ci options = copy_mount_options(data); 389162306a36Sopenharmony_ci ret = PTR_ERR(options); 389262306a36Sopenharmony_ci if (IS_ERR(options)) 389362306a36Sopenharmony_ci goto out_data; 389462306a36Sopenharmony_ci 389562306a36Sopenharmony_ci ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options); 389662306a36Sopenharmony_ci 389762306a36Sopenharmony_ci kfree(options); 389862306a36Sopenharmony_ciout_data: 389962306a36Sopenharmony_ci kfree(kernel_dev); 390062306a36Sopenharmony_ciout_dev: 390162306a36Sopenharmony_ci kfree(kernel_type); 390262306a36Sopenharmony_ciout_type: 390362306a36Sopenharmony_ci return ret; 390462306a36Sopenharmony_ci} 390562306a36Sopenharmony_ci 390662306a36Sopenharmony_ci#define FSMOUNT_VALID_FLAGS \ 390762306a36Sopenharmony_ci (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \ 390862306a36Sopenharmony_ci MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME | \ 390962306a36Sopenharmony_ci MOUNT_ATTR_NOSYMFOLLOW) 391062306a36Sopenharmony_ci 391162306a36Sopenharmony_ci#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP) 391262306a36Sopenharmony_ci 391362306a36Sopenharmony_ci#define MOUNT_SETATTR_PROPAGATION_FLAGS \ 391462306a36Sopenharmony_ci (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED) 391562306a36Sopenharmony_ci 391662306a36Sopenharmony_cistatic unsigned int attr_flags_to_mnt_flags(u64 attr_flags) 391762306a36Sopenharmony_ci{ 391862306a36Sopenharmony_ci unsigned int mnt_flags = 0; 391962306a36Sopenharmony_ci 392062306a36Sopenharmony_ci if (attr_flags & MOUNT_ATTR_RDONLY) 392162306a36Sopenharmony_ci mnt_flags |= MNT_READONLY; 392262306a36Sopenharmony_ci if (attr_flags & MOUNT_ATTR_NOSUID) 392362306a36Sopenharmony_ci mnt_flags |= MNT_NOSUID; 392462306a36Sopenharmony_ci if (attr_flags & MOUNT_ATTR_NODEV) 392562306a36Sopenharmony_ci mnt_flags |= MNT_NODEV; 392662306a36Sopenharmony_ci if (attr_flags & MOUNT_ATTR_NOEXEC) 392762306a36Sopenharmony_ci mnt_flags |= MNT_NOEXEC; 392862306a36Sopenharmony_ci if (attr_flags & MOUNT_ATTR_NODIRATIME) 392962306a36Sopenharmony_ci mnt_flags |= MNT_NODIRATIME; 393062306a36Sopenharmony_ci if (attr_flags & MOUNT_ATTR_NOSYMFOLLOW) 393162306a36Sopenharmony_ci mnt_flags |= MNT_NOSYMFOLLOW; 393262306a36Sopenharmony_ci 393362306a36Sopenharmony_ci return mnt_flags; 393462306a36Sopenharmony_ci} 393562306a36Sopenharmony_ci 393662306a36Sopenharmony_ci/* 393762306a36Sopenharmony_ci * Create a kernel mount representation for a new, prepared superblock 393862306a36Sopenharmony_ci * (specified by fs_fd) and attach to an open_tree-like file descriptor. 393962306a36Sopenharmony_ci */ 394062306a36Sopenharmony_ciSYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags, 394162306a36Sopenharmony_ci unsigned int, attr_flags) 394262306a36Sopenharmony_ci{ 394362306a36Sopenharmony_ci struct mnt_namespace *ns; 394462306a36Sopenharmony_ci struct fs_context *fc; 394562306a36Sopenharmony_ci struct file *file; 394662306a36Sopenharmony_ci struct path newmount; 394762306a36Sopenharmony_ci struct mount *mnt; 394862306a36Sopenharmony_ci struct fd f; 394962306a36Sopenharmony_ci unsigned int mnt_flags = 0; 395062306a36Sopenharmony_ci long ret; 395162306a36Sopenharmony_ci 395262306a36Sopenharmony_ci if (!may_mount()) 395362306a36Sopenharmony_ci return -EPERM; 395462306a36Sopenharmony_ci 395562306a36Sopenharmony_ci if ((flags & ~(FSMOUNT_CLOEXEC)) != 0) 395662306a36Sopenharmony_ci return -EINVAL; 395762306a36Sopenharmony_ci 395862306a36Sopenharmony_ci if (attr_flags & ~FSMOUNT_VALID_FLAGS) 395962306a36Sopenharmony_ci return -EINVAL; 396062306a36Sopenharmony_ci 396162306a36Sopenharmony_ci mnt_flags = attr_flags_to_mnt_flags(attr_flags); 396262306a36Sopenharmony_ci 396362306a36Sopenharmony_ci switch (attr_flags & MOUNT_ATTR__ATIME) { 396462306a36Sopenharmony_ci case MOUNT_ATTR_STRICTATIME: 396562306a36Sopenharmony_ci break; 396662306a36Sopenharmony_ci case MOUNT_ATTR_NOATIME: 396762306a36Sopenharmony_ci mnt_flags |= MNT_NOATIME; 396862306a36Sopenharmony_ci break; 396962306a36Sopenharmony_ci case MOUNT_ATTR_RELATIME: 397062306a36Sopenharmony_ci mnt_flags |= MNT_RELATIME; 397162306a36Sopenharmony_ci break; 397262306a36Sopenharmony_ci default: 397362306a36Sopenharmony_ci return -EINVAL; 397462306a36Sopenharmony_ci } 397562306a36Sopenharmony_ci 397662306a36Sopenharmony_ci f = fdget(fs_fd); 397762306a36Sopenharmony_ci if (!f.file) 397862306a36Sopenharmony_ci return -EBADF; 397962306a36Sopenharmony_ci 398062306a36Sopenharmony_ci ret = -EINVAL; 398162306a36Sopenharmony_ci if (f.file->f_op != &fscontext_fops) 398262306a36Sopenharmony_ci goto err_fsfd; 398362306a36Sopenharmony_ci 398462306a36Sopenharmony_ci fc = f.file->private_data; 398562306a36Sopenharmony_ci 398662306a36Sopenharmony_ci ret = mutex_lock_interruptible(&fc->uapi_mutex); 398762306a36Sopenharmony_ci if (ret < 0) 398862306a36Sopenharmony_ci goto err_fsfd; 398962306a36Sopenharmony_ci 399062306a36Sopenharmony_ci /* There must be a valid superblock or we can't mount it */ 399162306a36Sopenharmony_ci ret = -EINVAL; 399262306a36Sopenharmony_ci if (!fc->root) 399362306a36Sopenharmony_ci goto err_unlock; 399462306a36Sopenharmony_ci 399562306a36Sopenharmony_ci ret = -EPERM; 399662306a36Sopenharmony_ci if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) { 399762306a36Sopenharmony_ci pr_warn("VFS: Mount too revealing\n"); 399862306a36Sopenharmony_ci goto err_unlock; 399962306a36Sopenharmony_ci } 400062306a36Sopenharmony_ci 400162306a36Sopenharmony_ci ret = -EBUSY; 400262306a36Sopenharmony_ci if (fc->phase != FS_CONTEXT_AWAITING_MOUNT) 400362306a36Sopenharmony_ci goto err_unlock; 400462306a36Sopenharmony_ci 400562306a36Sopenharmony_ci if (fc->sb_flags & SB_MANDLOCK) 400662306a36Sopenharmony_ci warn_mandlock(); 400762306a36Sopenharmony_ci 400862306a36Sopenharmony_ci newmount.mnt = vfs_create_mount(fc); 400962306a36Sopenharmony_ci if (IS_ERR(newmount.mnt)) { 401062306a36Sopenharmony_ci ret = PTR_ERR(newmount.mnt); 401162306a36Sopenharmony_ci goto err_unlock; 401262306a36Sopenharmony_ci } 401362306a36Sopenharmony_ci newmount.dentry = dget(fc->root); 401462306a36Sopenharmony_ci newmount.mnt->mnt_flags = mnt_flags; 401562306a36Sopenharmony_ci 401662306a36Sopenharmony_ci /* We've done the mount bit - now move the file context into more or 401762306a36Sopenharmony_ci * less the same state as if we'd done an fspick(). We don't want to 401862306a36Sopenharmony_ci * do any memory allocation or anything like that at this point as we 401962306a36Sopenharmony_ci * don't want to have to handle any errors incurred. 402062306a36Sopenharmony_ci */ 402162306a36Sopenharmony_ci vfs_clean_context(fc); 402262306a36Sopenharmony_ci 402362306a36Sopenharmony_ci ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true); 402462306a36Sopenharmony_ci if (IS_ERR(ns)) { 402562306a36Sopenharmony_ci ret = PTR_ERR(ns); 402662306a36Sopenharmony_ci goto err_path; 402762306a36Sopenharmony_ci } 402862306a36Sopenharmony_ci mnt = real_mount(newmount.mnt); 402962306a36Sopenharmony_ci mnt->mnt_ns = ns; 403062306a36Sopenharmony_ci ns->root = mnt; 403162306a36Sopenharmony_ci ns->mounts = 1; 403262306a36Sopenharmony_ci list_add(&mnt->mnt_list, &ns->list); 403362306a36Sopenharmony_ci mntget(newmount.mnt); 403462306a36Sopenharmony_ci 403562306a36Sopenharmony_ci /* Attach to an apparent O_PATH fd with a note that we need to unmount 403662306a36Sopenharmony_ci * it, not just simply put it. 403762306a36Sopenharmony_ci */ 403862306a36Sopenharmony_ci file = dentry_open(&newmount, O_PATH, fc->cred); 403962306a36Sopenharmony_ci if (IS_ERR(file)) { 404062306a36Sopenharmony_ci dissolve_on_fput(newmount.mnt); 404162306a36Sopenharmony_ci ret = PTR_ERR(file); 404262306a36Sopenharmony_ci goto err_path; 404362306a36Sopenharmony_ci } 404462306a36Sopenharmony_ci file->f_mode |= FMODE_NEED_UNMOUNT; 404562306a36Sopenharmony_ci 404662306a36Sopenharmony_ci ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0); 404762306a36Sopenharmony_ci if (ret >= 0) 404862306a36Sopenharmony_ci fd_install(ret, file); 404962306a36Sopenharmony_ci else 405062306a36Sopenharmony_ci fput(file); 405162306a36Sopenharmony_ci 405262306a36Sopenharmony_cierr_path: 405362306a36Sopenharmony_ci path_put(&newmount); 405462306a36Sopenharmony_cierr_unlock: 405562306a36Sopenharmony_ci mutex_unlock(&fc->uapi_mutex); 405662306a36Sopenharmony_cierr_fsfd: 405762306a36Sopenharmony_ci fdput(f); 405862306a36Sopenharmony_ci return ret; 405962306a36Sopenharmony_ci} 406062306a36Sopenharmony_ci 406162306a36Sopenharmony_ci/* 406262306a36Sopenharmony_ci * Move a mount from one place to another. In combination with 406362306a36Sopenharmony_ci * fsopen()/fsmount() this is used to install a new mount and in combination 406462306a36Sopenharmony_ci * with open_tree(OPEN_TREE_CLONE [| AT_RECURSIVE]) it can be used to copy 406562306a36Sopenharmony_ci * a mount subtree. 406662306a36Sopenharmony_ci * 406762306a36Sopenharmony_ci * Note the flags value is a combination of MOVE_MOUNT_* flags. 406862306a36Sopenharmony_ci */ 406962306a36Sopenharmony_ciSYSCALL_DEFINE5(move_mount, 407062306a36Sopenharmony_ci int, from_dfd, const char __user *, from_pathname, 407162306a36Sopenharmony_ci int, to_dfd, const char __user *, to_pathname, 407262306a36Sopenharmony_ci unsigned int, flags) 407362306a36Sopenharmony_ci{ 407462306a36Sopenharmony_ci struct path from_path, to_path; 407562306a36Sopenharmony_ci unsigned int lflags; 407662306a36Sopenharmony_ci int ret = 0; 407762306a36Sopenharmony_ci 407862306a36Sopenharmony_ci if (!may_mount()) 407962306a36Sopenharmony_ci return -EPERM; 408062306a36Sopenharmony_ci 408162306a36Sopenharmony_ci if (flags & ~MOVE_MOUNT__MASK) 408262306a36Sopenharmony_ci return -EINVAL; 408362306a36Sopenharmony_ci 408462306a36Sopenharmony_ci if ((flags & (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP)) == 408562306a36Sopenharmony_ci (MOVE_MOUNT_BENEATH | MOVE_MOUNT_SET_GROUP)) 408662306a36Sopenharmony_ci return -EINVAL; 408762306a36Sopenharmony_ci 408862306a36Sopenharmony_ci /* If someone gives a pathname, they aren't permitted to move 408962306a36Sopenharmony_ci * from an fd that requires unmount as we can't get at the flag 409062306a36Sopenharmony_ci * to clear it afterwards. 409162306a36Sopenharmony_ci */ 409262306a36Sopenharmony_ci lflags = 0; 409362306a36Sopenharmony_ci if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; 409462306a36Sopenharmony_ci if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; 409562306a36Sopenharmony_ci if (flags & MOVE_MOUNT_F_EMPTY_PATH) lflags |= LOOKUP_EMPTY; 409662306a36Sopenharmony_ci 409762306a36Sopenharmony_ci ret = user_path_at(from_dfd, from_pathname, lflags, &from_path); 409862306a36Sopenharmony_ci if (ret < 0) 409962306a36Sopenharmony_ci return ret; 410062306a36Sopenharmony_ci 410162306a36Sopenharmony_ci lflags = 0; 410262306a36Sopenharmony_ci if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; 410362306a36Sopenharmony_ci if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; 410462306a36Sopenharmony_ci if (flags & MOVE_MOUNT_T_EMPTY_PATH) lflags |= LOOKUP_EMPTY; 410562306a36Sopenharmony_ci 410662306a36Sopenharmony_ci ret = user_path_at(to_dfd, to_pathname, lflags, &to_path); 410762306a36Sopenharmony_ci if (ret < 0) 410862306a36Sopenharmony_ci goto out_from; 410962306a36Sopenharmony_ci 411062306a36Sopenharmony_ci ret = security_move_mount(&from_path, &to_path); 411162306a36Sopenharmony_ci if (ret < 0) 411262306a36Sopenharmony_ci goto out_to; 411362306a36Sopenharmony_ci 411462306a36Sopenharmony_ci if (flags & MOVE_MOUNT_SET_GROUP) 411562306a36Sopenharmony_ci ret = do_set_group(&from_path, &to_path); 411662306a36Sopenharmony_ci else 411762306a36Sopenharmony_ci ret = do_move_mount(&from_path, &to_path, 411862306a36Sopenharmony_ci (flags & MOVE_MOUNT_BENEATH)); 411962306a36Sopenharmony_ci 412062306a36Sopenharmony_ciout_to: 412162306a36Sopenharmony_ci path_put(&to_path); 412262306a36Sopenharmony_ciout_from: 412362306a36Sopenharmony_ci path_put(&from_path); 412462306a36Sopenharmony_ci return ret; 412562306a36Sopenharmony_ci} 412662306a36Sopenharmony_ci 412762306a36Sopenharmony_ci/* 412862306a36Sopenharmony_ci * Return true if path is reachable from root 412962306a36Sopenharmony_ci * 413062306a36Sopenharmony_ci * namespace_sem or mount_lock is held 413162306a36Sopenharmony_ci */ 413262306a36Sopenharmony_cibool is_path_reachable(struct mount *mnt, struct dentry *dentry, 413362306a36Sopenharmony_ci const struct path *root) 413462306a36Sopenharmony_ci{ 413562306a36Sopenharmony_ci while (&mnt->mnt != root->mnt && mnt_has_parent(mnt)) { 413662306a36Sopenharmony_ci dentry = mnt->mnt_mountpoint; 413762306a36Sopenharmony_ci mnt = mnt->mnt_parent; 413862306a36Sopenharmony_ci } 413962306a36Sopenharmony_ci return &mnt->mnt == root->mnt && is_subdir(dentry, root->dentry); 414062306a36Sopenharmony_ci} 414162306a36Sopenharmony_ci 414262306a36Sopenharmony_cibool path_is_under(const struct path *path1, const struct path *path2) 414362306a36Sopenharmony_ci{ 414462306a36Sopenharmony_ci bool res; 414562306a36Sopenharmony_ci read_seqlock_excl(&mount_lock); 414662306a36Sopenharmony_ci res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); 414762306a36Sopenharmony_ci read_sequnlock_excl(&mount_lock); 414862306a36Sopenharmony_ci return res; 414962306a36Sopenharmony_ci} 415062306a36Sopenharmony_ciEXPORT_SYMBOL(path_is_under); 415162306a36Sopenharmony_ci 415262306a36Sopenharmony_ci/* 415362306a36Sopenharmony_ci * pivot_root Semantics: 415462306a36Sopenharmony_ci * Moves the root file system of the current process to the directory put_old, 415562306a36Sopenharmony_ci * makes new_root as the new root file system of the current process, and sets 415662306a36Sopenharmony_ci * root/cwd of all processes which had them on the current root to new_root. 415762306a36Sopenharmony_ci * 415862306a36Sopenharmony_ci * Restrictions: 415962306a36Sopenharmony_ci * The new_root and put_old must be directories, and must not be on the 416062306a36Sopenharmony_ci * same file system as the current process root. The put_old must be 416162306a36Sopenharmony_ci * underneath new_root, i.e. adding a non-zero number of /.. to the string 416262306a36Sopenharmony_ci * pointed to by put_old must yield the same directory as new_root. No other 416362306a36Sopenharmony_ci * file system may be mounted on put_old. After all, new_root is a mountpoint. 416462306a36Sopenharmony_ci * 416562306a36Sopenharmony_ci * Also, the current root cannot be on the 'rootfs' (initial ramfs) filesystem. 416662306a36Sopenharmony_ci * See Documentation/filesystems/ramfs-rootfs-initramfs.rst for alternatives 416762306a36Sopenharmony_ci * in this situation. 416862306a36Sopenharmony_ci * 416962306a36Sopenharmony_ci * Notes: 417062306a36Sopenharmony_ci * - we don't move root/cwd if they are not at the root (reason: if something 417162306a36Sopenharmony_ci * cared enough to change them, it's probably wrong to force them elsewhere) 417262306a36Sopenharmony_ci * - it's okay to pick a root that isn't the root of a file system, e.g. 417362306a36Sopenharmony_ci * /nfs/my_root where /nfs is the mount point. It must be a mountpoint, 417462306a36Sopenharmony_ci * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root 417562306a36Sopenharmony_ci * first. 417662306a36Sopenharmony_ci */ 417762306a36Sopenharmony_ciSYSCALL_DEFINE2(pivot_root, const char __user *, new_root, 417862306a36Sopenharmony_ci const char __user *, put_old) 417962306a36Sopenharmony_ci{ 418062306a36Sopenharmony_ci struct path new, old, root; 418162306a36Sopenharmony_ci struct mount *new_mnt, *root_mnt, *old_mnt, *root_parent, *ex_parent; 418262306a36Sopenharmony_ci struct mountpoint *old_mp, *root_mp; 418362306a36Sopenharmony_ci int error; 418462306a36Sopenharmony_ci 418562306a36Sopenharmony_ci if (!may_mount()) 418662306a36Sopenharmony_ci return -EPERM; 418762306a36Sopenharmony_ci 418862306a36Sopenharmony_ci error = user_path_at(AT_FDCWD, new_root, 418962306a36Sopenharmony_ci LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &new); 419062306a36Sopenharmony_ci if (error) 419162306a36Sopenharmony_ci goto out0; 419262306a36Sopenharmony_ci 419362306a36Sopenharmony_ci error = user_path_at(AT_FDCWD, put_old, 419462306a36Sopenharmony_ci LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old); 419562306a36Sopenharmony_ci if (error) 419662306a36Sopenharmony_ci goto out1; 419762306a36Sopenharmony_ci 419862306a36Sopenharmony_ci error = security_sb_pivotroot(&old, &new); 419962306a36Sopenharmony_ci if (error) 420062306a36Sopenharmony_ci goto out2; 420162306a36Sopenharmony_ci 420262306a36Sopenharmony_ci get_fs_root(current->fs, &root); 420362306a36Sopenharmony_ci old_mp = lock_mount(&old); 420462306a36Sopenharmony_ci error = PTR_ERR(old_mp); 420562306a36Sopenharmony_ci if (IS_ERR(old_mp)) 420662306a36Sopenharmony_ci goto out3; 420762306a36Sopenharmony_ci 420862306a36Sopenharmony_ci error = -EINVAL; 420962306a36Sopenharmony_ci new_mnt = real_mount(new.mnt); 421062306a36Sopenharmony_ci root_mnt = real_mount(root.mnt); 421162306a36Sopenharmony_ci old_mnt = real_mount(old.mnt); 421262306a36Sopenharmony_ci ex_parent = new_mnt->mnt_parent; 421362306a36Sopenharmony_ci root_parent = root_mnt->mnt_parent; 421462306a36Sopenharmony_ci if (IS_MNT_SHARED(old_mnt) || 421562306a36Sopenharmony_ci IS_MNT_SHARED(ex_parent) || 421662306a36Sopenharmony_ci IS_MNT_SHARED(root_parent)) 421762306a36Sopenharmony_ci goto out4; 421862306a36Sopenharmony_ci if (!check_mnt(root_mnt) || !check_mnt(new_mnt)) 421962306a36Sopenharmony_ci goto out4; 422062306a36Sopenharmony_ci if (new_mnt->mnt.mnt_flags & MNT_LOCKED) 422162306a36Sopenharmony_ci goto out4; 422262306a36Sopenharmony_ci error = -ENOENT; 422362306a36Sopenharmony_ci if (d_unlinked(new.dentry)) 422462306a36Sopenharmony_ci goto out4; 422562306a36Sopenharmony_ci error = -EBUSY; 422662306a36Sopenharmony_ci if (new_mnt == root_mnt || old_mnt == root_mnt) 422762306a36Sopenharmony_ci goto out4; /* loop, on the same file system */ 422862306a36Sopenharmony_ci error = -EINVAL; 422962306a36Sopenharmony_ci if (!path_mounted(&root)) 423062306a36Sopenharmony_ci goto out4; /* not a mountpoint */ 423162306a36Sopenharmony_ci if (!mnt_has_parent(root_mnt)) 423262306a36Sopenharmony_ci goto out4; /* not attached */ 423362306a36Sopenharmony_ci if (!path_mounted(&new)) 423462306a36Sopenharmony_ci goto out4; /* not a mountpoint */ 423562306a36Sopenharmony_ci if (!mnt_has_parent(new_mnt)) 423662306a36Sopenharmony_ci goto out4; /* not attached */ 423762306a36Sopenharmony_ci /* make sure we can reach put_old from new_root */ 423862306a36Sopenharmony_ci if (!is_path_reachable(old_mnt, old.dentry, &new)) 423962306a36Sopenharmony_ci goto out4; 424062306a36Sopenharmony_ci /* make certain new is below the root */ 424162306a36Sopenharmony_ci if (!is_path_reachable(new_mnt, new.dentry, &root)) 424262306a36Sopenharmony_ci goto out4; 424362306a36Sopenharmony_ci lock_mount_hash(); 424462306a36Sopenharmony_ci umount_mnt(new_mnt); 424562306a36Sopenharmony_ci root_mp = unhash_mnt(root_mnt); /* we'll need its mountpoint */ 424662306a36Sopenharmony_ci if (root_mnt->mnt.mnt_flags & MNT_LOCKED) { 424762306a36Sopenharmony_ci new_mnt->mnt.mnt_flags |= MNT_LOCKED; 424862306a36Sopenharmony_ci root_mnt->mnt.mnt_flags &= ~MNT_LOCKED; 424962306a36Sopenharmony_ci } 425062306a36Sopenharmony_ci /* mount old root on put_old */ 425162306a36Sopenharmony_ci attach_mnt(root_mnt, old_mnt, old_mp, false); 425262306a36Sopenharmony_ci /* mount new_root on / */ 425362306a36Sopenharmony_ci attach_mnt(new_mnt, root_parent, root_mp, false); 425462306a36Sopenharmony_ci mnt_add_count(root_parent, -1); 425562306a36Sopenharmony_ci touch_mnt_namespace(current->nsproxy->mnt_ns); 425662306a36Sopenharmony_ci /* A moved mount should not expire automatically */ 425762306a36Sopenharmony_ci list_del_init(&new_mnt->mnt_expire); 425862306a36Sopenharmony_ci put_mountpoint(root_mp); 425962306a36Sopenharmony_ci unlock_mount_hash(); 426062306a36Sopenharmony_ci chroot_fs_refs(&root, &new); 426162306a36Sopenharmony_ci error = 0; 426262306a36Sopenharmony_ciout4: 426362306a36Sopenharmony_ci unlock_mount(old_mp); 426462306a36Sopenharmony_ci if (!error) 426562306a36Sopenharmony_ci mntput_no_expire(ex_parent); 426662306a36Sopenharmony_ciout3: 426762306a36Sopenharmony_ci path_put(&root); 426862306a36Sopenharmony_ciout2: 426962306a36Sopenharmony_ci path_put(&old); 427062306a36Sopenharmony_ciout1: 427162306a36Sopenharmony_ci path_put(&new); 427262306a36Sopenharmony_ciout0: 427362306a36Sopenharmony_ci return error; 427462306a36Sopenharmony_ci} 427562306a36Sopenharmony_ci 427662306a36Sopenharmony_cistatic unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) 427762306a36Sopenharmony_ci{ 427862306a36Sopenharmony_ci unsigned int flags = mnt->mnt.mnt_flags; 427962306a36Sopenharmony_ci 428062306a36Sopenharmony_ci /* flags to clear */ 428162306a36Sopenharmony_ci flags &= ~kattr->attr_clr; 428262306a36Sopenharmony_ci /* flags to raise */ 428362306a36Sopenharmony_ci flags |= kattr->attr_set; 428462306a36Sopenharmony_ci 428562306a36Sopenharmony_ci return flags; 428662306a36Sopenharmony_ci} 428762306a36Sopenharmony_ci 428862306a36Sopenharmony_cistatic int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) 428962306a36Sopenharmony_ci{ 429062306a36Sopenharmony_ci struct vfsmount *m = &mnt->mnt; 429162306a36Sopenharmony_ci struct user_namespace *fs_userns = m->mnt_sb->s_user_ns; 429262306a36Sopenharmony_ci 429362306a36Sopenharmony_ci if (!kattr->mnt_idmap) 429462306a36Sopenharmony_ci return 0; 429562306a36Sopenharmony_ci 429662306a36Sopenharmony_ci /* 429762306a36Sopenharmony_ci * Creating an idmapped mount with the filesystem wide idmapping 429862306a36Sopenharmony_ci * doesn't make sense so block that. We don't allow mushy semantics. 429962306a36Sopenharmony_ci */ 430062306a36Sopenharmony_ci if (!check_fsmapping(kattr->mnt_idmap, m->mnt_sb)) 430162306a36Sopenharmony_ci return -EINVAL; 430262306a36Sopenharmony_ci 430362306a36Sopenharmony_ci /* 430462306a36Sopenharmony_ci * Once a mount has been idmapped we don't allow it to change its 430562306a36Sopenharmony_ci * mapping. It makes things simpler and callers can just create 430662306a36Sopenharmony_ci * another bind-mount they can idmap if they want to. 430762306a36Sopenharmony_ci */ 430862306a36Sopenharmony_ci if (is_idmapped_mnt(m)) 430962306a36Sopenharmony_ci return -EPERM; 431062306a36Sopenharmony_ci 431162306a36Sopenharmony_ci /* The underlying filesystem doesn't support idmapped mounts yet. */ 431262306a36Sopenharmony_ci if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) 431362306a36Sopenharmony_ci return -EINVAL; 431462306a36Sopenharmony_ci 431562306a36Sopenharmony_ci /* We're not controlling the superblock. */ 431662306a36Sopenharmony_ci if (!ns_capable(fs_userns, CAP_SYS_ADMIN)) 431762306a36Sopenharmony_ci return -EPERM; 431862306a36Sopenharmony_ci 431962306a36Sopenharmony_ci /* Mount has already been visible in the filesystem hierarchy. */ 432062306a36Sopenharmony_ci if (!is_anon_ns(mnt->mnt_ns)) 432162306a36Sopenharmony_ci return -EINVAL; 432262306a36Sopenharmony_ci 432362306a36Sopenharmony_ci return 0; 432462306a36Sopenharmony_ci} 432562306a36Sopenharmony_ci 432662306a36Sopenharmony_ci/** 432762306a36Sopenharmony_ci * mnt_allow_writers() - check whether the attribute change allows writers 432862306a36Sopenharmony_ci * @kattr: the new mount attributes 432962306a36Sopenharmony_ci * @mnt: the mount to which @kattr will be applied 433062306a36Sopenharmony_ci * 433162306a36Sopenharmony_ci * Check whether thew new mount attributes in @kattr allow concurrent writers. 433262306a36Sopenharmony_ci * 433362306a36Sopenharmony_ci * Return: true if writers need to be held, false if not 433462306a36Sopenharmony_ci */ 433562306a36Sopenharmony_cistatic inline bool mnt_allow_writers(const struct mount_kattr *kattr, 433662306a36Sopenharmony_ci const struct mount *mnt) 433762306a36Sopenharmony_ci{ 433862306a36Sopenharmony_ci return (!(kattr->attr_set & MNT_READONLY) || 433962306a36Sopenharmony_ci (mnt->mnt.mnt_flags & MNT_READONLY)) && 434062306a36Sopenharmony_ci !kattr->mnt_idmap; 434162306a36Sopenharmony_ci} 434262306a36Sopenharmony_ci 434362306a36Sopenharmony_cistatic int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) 434462306a36Sopenharmony_ci{ 434562306a36Sopenharmony_ci struct mount *m; 434662306a36Sopenharmony_ci int err; 434762306a36Sopenharmony_ci 434862306a36Sopenharmony_ci for (m = mnt; m; m = next_mnt(m, mnt)) { 434962306a36Sopenharmony_ci if (!can_change_locked_flags(m, recalc_flags(kattr, m))) { 435062306a36Sopenharmony_ci err = -EPERM; 435162306a36Sopenharmony_ci break; 435262306a36Sopenharmony_ci } 435362306a36Sopenharmony_ci 435462306a36Sopenharmony_ci err = can_idmap_mount(kattr, m); 435562306a36Sopenharmony_ci if (err) 435662306a36Sopenharmony_ci break; 435762306a36Sopenharmony_ci 435862306a36Sopenharmony_ci if (!mnt_allow_writers(kattr, m)) { 435962306a36Sopenharmony_ci err = mnt_hold_writers(m); 436062306a36Sopenharmony_ci if (err) 436162306a36Sopenharmony_ci break; 436262306a36Sopenharmony_ci } 436362306a36Sopenharmony_ci 436462306a36Sopenharmony_ci if (!kattr->recurse) 436562306a36Sopenharmony_ci return 0; 436662306a36Sopenharmony_ci } 436762306a36Sopenharmony_ci 436862306a36Sopenharmony_ci if (err) { 436962306a36Sopenharmony_ci struct mount *p; 437062306a36Sopenharmony_ci 437162306a36Sopenharmony_ci /* 437262306a36Sopenharmony_ci * If we had to call mnt_hold_writers() MNT_WRITE_HOLD will 437362306a36Sopenharmony_ci * be set in @mnt_flags. The loop unsets MNT_WRITE_HOLD for all 437462306a36Sopenharmony_ci * mounts and needs to take care to include the first mount. 437562306a36Sopenharmony_ci */ 437662306a36Sopenharmony_ci for (p = mnt; p; p = next_mnt(p, mnt)) { 437762306a36Sopenharmony_ci /* If we had to hold writers unblock them. */ 437862306a36Sopenharmony_ci if (p->mnt.mnt_flags & MNT_WRITE_HOLD) 437962306a36Sopenharmony_ci mnt_unhold_writers(p); 438062306a36Sopenharmony_ci 438162306a36Sopenharmony_ci /* 438262306a36Sopenharmony_ci * We're done once the first mount we changed got 438362306a36Sopenharmony_ci * MNT_WRITE_HOLD unset. 438462306a36Sopenharmony_ci */ 438562306a36Sopenharmony_ci if (p == m) 438662306a36Sopenharmony_ci break; 438762306a36Sopenharmony_ci } 438862306a36Sopenharmony_ci } 438962306a36Sopenharmony_ci return err; 439062306a36Sopenharmony_ci} 439162306a36Sopenharmony_ci 439262306a36Sopenharmony_cistatic void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) 439362306a36Sopenharmony_ci{ 439462306a36Sopenharmony_ci if (!kattr->mnt_idmap) 439562306a36Sopenharmony_ci return; 439662306a36Sopenharmony_ci 439762306a36Sopenharmony_ci /* 439862306a36Sopenharmony_ci * Pairs with smp_load_acquire() in mnt_idmap(). 439962306a36Sopenharmony_ci * 440062306a36Sopenharmony_ci * Since we only allow a mount to change the idmapping once and 440162306a36Sopenharmony_ci * verified this in can_idmap_mount() we know that the mount has 440262306a36Sopenharmony_ci * @nop_mnt_idmap attached to it. So there's no need to drop any 440362306a36Sopenharmony_ci * references. 440462306a36Sopenharmony_ci */ 440562306a36Sopenharmony_ci smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap)); 440662306a36Sopenharmony_ci} 440762306a36Sopenharmony_ci 440862306a36Sopenharmony_cistatic void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) 440962306a36Sopenharmony_ci{ 441062306a36Sopenharmony_ci struct mount *m; 441162306a36Sopenharmony_ci 441262306a36Sopenharmony_ci for (m = mnt; m; m = next_mnt(m, mnt)) { 441362306a36Sopenharmony_ci unsigned int flags; 441462306a36Sopenharmony_ci 441562306a36Sopenharmony_ci do_idmap_mount(kattr, m); 441662306a36Sopenharmony_ci flags = recalc_flags(kattr, m); 441762306a36Sopenharmony_ci WRITE_ONCE(m->mnt.mnt_flags, flags); 441862306a36Sopenharmony_ci 441962306a36Sopenharmony_ci /* If we had to hold writers unblock them. */ 442062306a36Sopenharmony_ci if (m->mnt.mnt_flags & MNT_WRITE_HOLD) 442162306a36Sopenharmony_ci mnt_unhold_writers(m); 442262306a36Sopenharmony_ci 442362306a36Sopenharmony_ci if (kattr->propagation) 442462306a36Sopenharmony_ci change_mnt_propagation(m, kattr->propagation); 442562306a36Sopenharmony_ci if (!kattr->recurse) 442662306a36Sopenharmony_ci break; 442762306a36Sopenharmony_ci } 442862306a36Sopenharmony_ci touch_mnt_namespace(mnt->mnt_ns); 442962306a36Sopenharmony_ci} 443062306a36Sopenharmony_ci 443162306a36Sopenharmony_cistatic int do_mount_setattr(struct path *path, struct mount_kattr *kattr) 443262306a36Sopenharmony_ci{ 443362306a36Sopenharmony_ci struct mount *mnt = real_mount(path->mnt); 443462306a36Sopenharmony_ci int err = 0; 443562306a36Sopenharmony_ci 443662306a36Sopenharmony_ci if (!path_mounted(path)) 443762306a36Sopenharmony_ci return -EINVAL; 443862306a36Sopenharmony_ci 443962306a36Sopenharmony_ci if (kattr->mnt_userns) { 444062306a36Sopenharmony_ci struct mnt_idmap *mnt_idmap; 444162306a36Sopenharmony_ci 444262306a36Sopenharmony_ci mnt_idmap = alloc_mnt_idmap(kattr->mnt_userns); 444362306a36Sopenharmony_ci if (IS_ERR(mnt_idmap)) 444462306a36Sopenharmony_ci return PTR_ERR(mnt_idmap); 444562306a36Sopenharmony_ci kattr->mnt_idmap = mnt_idmap; 444662306a36Sopenharmony_ci } 444762306a36Sopenharmony_ci 444862306a36Sopenharmony_ci if (kattr->propagation) { 444962306a36Sopenharmony_ci /* 445062306a36Sopenharmony_ci * Only take namespace_lock() if we're actually changing 445162306a36Sopenharmony_ci * propagation. 445262306a36Sopenharmony_ci */ 445362306a36Sopenharmony_ci namespace_lock(); 445462306a36Sopenharmony_ci if (kattr->propagation == MS_SHARED) { 445562306a36Sopenharmony_ci err = invent_group_ids(mnt, kattr->recurse); 445662306a36Sopenharmony_ci if (err) { 445762306a36Sopenharmony_ci namespace_unlock(); 445862306a36Sopenharmony_ci return err; 445962306a36Sopenharmony_ci } 446062306a36Sopenharmony_ci } 446162306a36Sopenharmony_ci } 446262306a36Sopenharmony_ci 446362306a36Sopenharmony_ci err = -EINVAL; 446462306a36Sopenharmony_ci lock_mount_hash(); 446562306a36Sopenharmony_ci 446662306a36Sopenharmony_ci /* Ensure that this isn't anything purely vfs internal. */ 446762306a36Sopenharmony_ci if (!is_mounted(&mnt->mnt)) 446862306a36Sopenharmony_ci goto out; 446962306a36Sopenharmony_ci 447062306a36Sopenharmony_ci /* 447162306a36Sopenharmony_ci * If this is an attached mount make sure it's located in the callers 447262306a36Sopenharmony_ci * mount namespace. If it's not don't let the caller interact with it. 447362306a36Sopenharmony_ci * 447462306a36Sopenharmony_ci * If this mount doesn't have a parent it's most often simply a 447562306a36Sopenharmony_ci * detached mount with an anonymous mount namespace. IOW, something 447662306a36Sopenharmony_ci * that's simply not attached yet. But there are apparently also users 447762306a36Sopenharmony_ci * that do change mount properties on the rootfs itself. That obviously 447862306a36Sopenharmony_ci * neither has a parent nor is it a detached mount so we cannot 447962306a36Sopenharmony_ci * unconditionally check for detached mounts. 448062306a36Sopenharmony_ci */ 448162306a36Sopenharmony_ci if ((mnt_has_parent(mnt) || !is_anon_ns(mnt->mnt_ns)) && !check_mnt(mnt)) 448262306a36Sopenharmony_ci goto out; 448362306a36Sopenharmony_ci 448462306a36Sopenharmony_ci /* 448562306a36Sopenharmony_ci * First, we get the mount tree in a shape where we can change mount 448662306a36Sopenharmony_ci * properties without failure. If we succeeded to do so we commit all 448762306a36Sopenharmony_ci * changes and if we failed we clean up. 448862306a36Sopenharmony_ci */ 448962306a36Sopenharmony_ci err = mount_setattr_prepare(kattr, mnt); 449062306a36Sopenharmony_ci if (!err) 449162306a36Sopenharmony_ci mount_setattr_commit(kattr, mnt); 449262306a36Sopenharmony_ci 449362306a36Sopenharmony_ciout: 449462306a36Sopenharmony_ci unlock_mount_hash(); 449562306a36Sopenharmony_ci 449662306a36Sopenharmony_ci if (kattr->propagation) { 449762306a36Sopenharmony_ci if (err) 449862306a36Sopenharmony_ci cleanup_group_ids(mnt, NULL); 449962306a36Sopenharmony_ci namespace_unlock(); 450062306a36Sopenharmony_ci } 450162306a36Sopenharmony_ci 450262306a36Sopenharmony_ci return err; 450362306a36Sopenharmony_ci} 450462306a36Sopenharmony_ci 450562306a36Sopenharmony_cistatic int build_mount_idmapped(const struct mount_attr *attr, size_t usize, 450662306a36Sopenharmony_ci struct mount_kattr *kattr, unsigned int flags) 450762306a36Sopenharmony_ci{ 450862306a36Sopenharmony_ci int err = 0; 450962306a36Sopenharmony_ci struct ns_common *ns; 451062306a36Sopenharmony_ci struct user_namespace *mnt_userns; 451162306a36Sopenharmony_ci struct fd f; 451262306a36Sopenharmony_ci 451362306a36Sopenharmony_ci if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP)) 451462306a36Sopenharmony_ci return 0; 451562306a36Sopenharmony_ci 451662306a36Sopenharmony_ci /* 451762306a36Sopenharmony_ci * We currently do not support clearing an idmapped mount. If this ever 451862306a36Sopenharmony_ci * is a use-case we can revisit this but for now let's keep it simple 451962306a36Sopenharmony_ci * and not allow it. 452062306a36Sopenharmony_ci */ 452162306a36Sopenharmony_ci if (attr->attr_clr & MOUNT_ATTR_IDMAP) 452262306a36Sopenharmony_ci return -EINVAL; 452362306a36Sopenharmony_ci 452462306a36Sopenharmony_ci if (attr->userns_fd > INT_MAX) 452562306a36Sopenharmony_ci return -EINVAL; 452662306a36Sopenharmony_ci 452762306a36Sopenharmony_ci f = fdget(attr->userns_fd); 452862306a36Sopenharmony_ci if (!f.file) 452962306a36Sopenharmony_ci return -EBADF; 453062306a36Sopenharmony_ci 453162306a36Sopenharmony_ci if (!proc_ns_file(f.file)) { 453262306a36Sopenharmony_ci err = -EINVAL; 453362306a36Sopenharmony_ci goto out_fput; 453462306a36Sopenharmony_ci } 453562306a36Sopenharmony_ci 453662306a36Sopenharmony_ci ns = get_proc_ns(file_inode(f.file)); 453762306a36Sopenharmony_ci if (ns->ops->type != CLONE_NEWUSER) { 453862306a36Sopenharmony_ci err = -EINVAL; 453962306a36Sopenharmony_ci goto out_fput; 454062306a36Sopenharmony_ci } 454162306a36Sopenharmony_ci 454262306a36Sopenharmony_ci /* 454362306a36Sopenharmony_ci * The initial idmapping cannot be used to create an idmapped 454462306a36Sopenharmony_ci * mount. We use the initial idmapping as an indicator of a mount 454562306a36Sopenharmony_ci * that is not idmapped. It can simply be passed into helpers that 454662306a36Sopenharmony_ci * are aware of idmapped mounts as a convenient shortcut. A user 454762306a36Sopenharmony_ci * can just create a dedicated identity mapping to achieve the same 454862306a36Sopenharmony_ci * result. 454962306a36Sopenharmony_ci */ 455062306a36Sopenharmony_ci mnt_userns = container_of(ns, struct user_namespace, ns); 455162306a36Sopenharmony_ci if (mnt_userns == &init_user_ns) { 455262306a36Sopenharmony_ci err = -EPERM; 455362306a36Sopenharmony_ci goto out_fput; 455462306a36Sopenharmony_ci } 455562306a36Sopenharmony_ci 455662306a36Sopenharmony_ci /* We're not controlling the target namespace. */ 455762306a36Sopenharmony_ci if (!ns_capable(mnt_userns, CAP_SYS_ADMIN)) { 455862306a36Sopenharmony_ci err = -EPERM; 455962306a36Sopenharmony_ci goto out_fput; 456062306a36Sopenharmony_ci } 456162306a36Sopenharmony_ci 456262306a36Sopenharmony_ci kattr->mnt_userns = get_user_ns(mnt_userns); 456362306a36Sopenharmony_ci 456462306a36Sopenharmony_ciout_fput: 456562306a36Sopenharmony_ci fdput(f); 456662306a36Sopenharmony_ci return err; 456762306a36Sopenharmony_ci} 456862306a36Sopenharmony_ci 456962306a36Sopenharmony_cistatic int build_mount_kattr(const struct mount_attr *attr, size_t usize, 457062306a36Sopenharmony_ci struct mount_kattr *kattr, unsigned int flags) 457162306a36Sopenharmony_ci{ 457262306a36Sopenharmony_ci unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; 457362306a36Sopenharmony_ci 457462306a36Sopenharmony_ci if (flags & AT_NO_AUTOMOUNT) 457562306a36Sopenharmony_ci lookup_flags &= ~LOOKUP_AUTOMOUNT; 457662306a36Sopenharmony_ci if (flags & AT_SYMLINK_NOFOLLOW) 457762306a36Sopenharmony_ci lookup_flags &= ~LOOKUP_FOLLOW; 457862306a36Sopenharmony_ci if (flags & AT_EMPTY_PATH) 457962306a36Sopenharmony_ci lookup_flags |= LOOKUP_EMPTY; 458062306a36Sopenharmony_ci 458162306a36Sopenharmony_ci *kattr = (struct mount_kattr) { 458262306a36Sopenharmony_ci .lookup_flags = lookup_flags, 458362306a36Sopenharmony_ci .recurse = !!(flags & AT_RECURSIVE), 458462306a36Sopenharmony_ci }; 458562306a36Sopenharmony_ci 458662306a36Sopenharmony_ci if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS) 458762306a36Sopenharmony_ci return -EINVAL; 458862306a36Sopenharmony_ci if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1) 458962306a36Sopenharmony_ci return -EINVAL; 459062306a36Sopenharmony_ci kattr->propagation = attr->propagation; 459162306a36Sopenharmony_ci 459262306a36Sopenharmony_ci if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS) 459362306a36Sopenharmony_ci return -EINVAL; 459462306a36Sopenharmony_ci 459562306a36Sopenharmony_ci kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set); 459662306a36Sopenharmony_ci kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr); 459762306a36Sopenharmony_ci 459862306a36Sopenharmony_ci /* 459962306a36Sopenharmony_ci * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap, 460062306a36Sopenharmony_ci * users wanting to transition to a different atime setting cannot 460162306a36Sopenharmony_ci * simply specify the atime setting in @attr_set, but must also 460262306a36Sopenharmony_ci * specify MOUNT_ATTR__ATIME in the @attr_clr field. 460362306a36Sopenharmony_ci * So ensure that MOUNT_ATTR__ATIME can't be partially set in 460462306a36Sopenharmony_ci * @attr_clr and that @attr_set can't have any atime bits set if 460562306a36Sopenharmony_ci * MOUNT_ATTR__ATIME isn't set in @attr_clr. 460662306a36Sopenharmony_ci */ 460762306a36Sopenharmony_ci if (attr->attr_clr & MOUNT_ATTR__ATIME) { 460862306a36Sopenharmony_ci if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME) 460962306a36Sopenharmony_ci return -EINVAL; 461062306a36Sopenharmony_ci 461162306a36Sopenharmony_ci /* 461262306a36Sopenharmony_ci * Clear all previous time settings as they are mutually 461362306a36Sopenharmony_ci * exclusive. 461462306a36Sopenharmony_ci */ 461562306a36Sopenharmony_ci kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME; 461662306a36Sopenharmony_ci switch (attr->attr_set & MOUNT_ATTR__ATIME) { 461762306a36Sopenharmony_ci case MOUNT_ATTR_RELATIME: 461862306a36Sopenharmony_ci kattr->attr_set |= MNT_RELATIME; 461962306a36Sopenharmony_ci break; 462062306a36Sopenharmony_ci case MOUNT_ATTR_NOATIME: 462162306a36Sopenharmony_ci kattr->attr_set |= MNT_NOATIME; 462262306a36Sopenharmony_ci break; 462362306a36Sopenharmony_ci case MOUNT_ATTR_STRICTATIME: 462462306a36Sopenharmony_ci break; 462562306a36Sopenharmony_ci default: 462662306a36Sopenharmony_ci return -EINVAL; 462762306a36Sopenharmony_ci } 462862306a36Sopenharmony_ci } else { 462962306a36Sopenharmony_ci if (attr->attr_set & MOUNT_ATTR__ATIME) 463062306a36Sopenharmony_ci return -EINVAL; 463162306a36Sopenharmony_ci } 463262306a36Sopenharmony_ci 463362306a36Sopenharmony_ci return build_mount_idmapped(attr, usize, kattr, flags); 463462306a36Sopenharmony_ci} 463562306a36Sopenharmony_ci 463662306a36Sopenharmony_cistatic void finish_mount_kattr(struct mount_kattr *kattr) 463762306a36Sopenharmony_ci{ 463862306a36Sopenharmony_ci put_user_ns(kattr->mnt_userns); 463962306a36Sopenharmony_ci kattr->mnt_userns = NULL; 464062306a36Sopenharmony_ci 464162306a36Sopenharmony_ci if (kattr->mnt_idmap) 464262306a36Sopenharmony_ci mnt_idmap_put(kattr->mnt_idmap); 464362306a36Sopenharmony_ci} 464462306a36Sopenharmony_ci 464562306a36Sopenharmony_ciSYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, 464662306a36Sopenharmony_ci unsigned int, flags, struct mount_attr __user *, uattr, 464762306a36Sopenharmony_ci size_t, usize) 464862306a36Sopenharmony_ci{ 464962306a36Sopenharmony_ci int err; 465062306a36Sopenharmony_ci struct path target; 465162306a36Sopenharmony_ci struct mount_attr attr; 465262306a36Sopenharmony_ci struct mount_kattr kattr; 465362306a36Sopenharmony_ci 465462306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0); 465562306a36Sopenharmony_ci 465662306a36Sopenharmony_ci if (flags & ~(AT_EMPTY_PATH | 465762306a36Sopenharmony_ci AT_RECURSIVE | 465862306a36Sopenharmony_ci AT_SYMLINK_NOFOLLOW | 465962306a36Sopenharmony_ci AT_NO_AUTOMOUNT)) 466062306a36Sopenharmony_ci return -EINVAL; 466162306a36Sopenharmony_ci 466262306a36Sopenharmony_ci if (unlikely(usize > PAGE_SIZE)) 466362306a36Sopenharmony_ci return -E2BIG; 466462306a36Sopenharmony_ci if (unlikely(usize < MOUNT_ATTR_SIZE_VER0)) 466562306a36Sopenharmony_ci return -EINVAL; 466662306a36Sopenharmony_ci 466762306a36Sopenharmony_ci if (!may_mount()) 466862306a36Sopenharmony_ci return -EPERM; 466962306a36Sopenharmony_ci 467062306a36Sopenharmony_ci err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); 467162306a36Sopenharmony_ci if (err) 467262306a36Sopenharmony_ci return err; 467362306a36Sopenharmony_ci 467462306a36Sopenharmony_ci /* Don't bother walking through the mounts if this is a nop. */ 467562306a36Sopenharmony_ci if (attr.attr_set == 0 && 467662306a36Sopenharmony_ci attr.attr_clr == 0 && 467762306a36Sopenharmony_ci attr.propagation == 0) 467862306a36Sopenharmony_ci return 0; 467962306a36Sopenharmony_ci 468062306a36Sopenharmony_ci err = build_mount_kattr(&attr, usize, &kattr, flags); 468162306a36Sopenharmony_ci if (err) 468262306a36Sopenharmony_ci return err; 468362306a36Sopenharmony_ci 468462306a36Sopenharmony_ci err = user_path_at(dfd, path, kattr.lookup_flags, &target); 468562306a36Sopenharmony_ci if (!err) { 468662306a36Sopenharmony_ci err = do_mount_setattr(&target, &kattr); 468762306a36Sopenharmony_ci path_put(&target); 468862306a36Sopenharmony_ci } 468962306a36Sopenharmony_ci finish_mount_kattr(&kattr); 469062306a36Sopenharmony_ci return err; 469162306a36Sopenharmony_ci} 469262306a36Sopenharmony_ci 469362306a36Sopenharmony_cistatic void __init init_mount_tree(void) 469462306a36Sopenharmony_ci{ 469562306a36Sopenharmony_ci struct vfsmount *mnt; 469662306a36Sopenharmony_ci struct mount *m; 469762306a36Sopenharmony_ci struct mnt_namespace *ns; 469862306a36Sopenharmony_ci struct path root; 469962306a36Sopenharmony_ci 470062306a36Sopenharmony_ci mnt = vfs_kern_mount(&rootfs_fs_type, 0, "rootfs", NULL); 470162306a36Sopenharmony_ci if (IS_ERR(mnt)) 470262306a36Sopenharmony_ci panic("Can't create rootfs"); 470362306a36Sopenharmony_ci 470462306a36Sopenharmony_ci ns = alloc_mnt_ns(&init_user_ns, false); 470562306a36Sopenharmony_ci if (IS_ERR(ns)) 470662306a36Sopenharmony_ci panic("Can't allocate initial namespace"); 470762306a36Sopenharmony_ci m = real_mount(mnt); 470862306a36Sopenharmony_ci m->mnt_ns = ns; 470962306a36Sopenharmony_ci ns->root = m; 471062306a36Sopenharmony_ci ns->mounts = 1; 471162306a36Sopenharmony_ci list_add(&m->mnt_list, &ns->list); 471262306a36Sopenharmony_ci init_task.nsproxy->mnt_ns = ns; 471362306a36Sopenharmony_ci get_mnt_ns(ns); 471462306a36Sopenharmony_ci 471562306a36Sopenharmony_ci root.mnt = mnt; 471662306a36Sopenharmony_ci root.dentry = mnt->mnt_root; 471762306a36Sopenharmony_ci mnt->mnt_flags |= MNT_LOCKED; 471862306a36Sopenharmony_ci 471962306a36Sopenharmony_ci set_fs_pwd(current->fs, &root); 472062306a36Sopenharmony_ci set_fs_root(current->fs, &root); 472162306a36Sopenharmony_ci} 472262306a36Sopenharmony_ci 472362306a36Sopenharmony_civoid __init mnt_init(void) 472462306a36Sopenharmony_ci{ 472562306a36Sopenharmony_ci int err; 472662306a36Sopenharmony_ci 472762306a36Sopenharmony_ci mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 472862306a36Sopenharmony_ci 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); 472962306a36Sopenharmony_ci 473062306a36Sopenharmony_ci mount_hashtable = alloc_large_system_hash("Mount-cache", 473162306a36Sopenharmony_ci sizeof(struct hlist_head), 473262306a36Sopenharmony_ci mhash_entries, 19, 473362306a36Sopenharmony_ci HASH_ZERO, 473462306a36Sopenharmony_ci &m_hash_shift, &m_hash_mask, 0, 0); 473562306a36Sopenharmony_ci mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", 473662306a36Sopenharmony_ci sizeof(struct hlist_head), 473762306a36Sopenharmony_ci mphash_entries, 19, 473862306a36Sopenharmony_ci HASH_ZERO, 473962306a36Sopenharmony_ci &mp_hash_shift, &mp_hash_mask, 0, 0); 474062306a36Sopenharmony_ci 474162306a36Sopenharmony_ci if (!mount_hashtable || !mountpoint_hashtable) 474262306a36Sopenharmony_ci panic("Failed to allocate mount hash table\n"); 474362306a36Sopenharmony_ci 474462306a36Sopenharmony_ci kernfs_init(); 474562306a36Sopenharmony_ci 474662306a36Sopenharmony_ci err = sysfs_init(); 474762306a36Sopenharmony_ci if (err) 474862306a36Sopenharmony_ci printk(KERN_WARNING "%s: sysfs_init error: %d\n", 474962306a36Sopenharmony_ci __func__, err); 475062306a36Sopenharmony_ci fs_kobj = kobject_create_and_add("fs", NULL); 475162306a36Sopenharmony_ci if (!fs_kobj) 475262306a36Sopenharmony_ci printk(KERN_WARNING "%s: kobj create error\n", __func__); 475362306a36Sopenharmony_ci shmem_init(); 475462306a36Sopenharmony_ci init_rootfs(); 475562306a36Sopenharmony_ci init_mount_tree(); 475662306a36Sopenharmony_ci} 475762306a36Sopenharmony_ci 475862306a36Sopenharmony_civoid put_mnt_ns(struct mnt_namespace *ns) 475962306a36Sopenharmony_ci{ 476062306a36Sopenharmony_ci if (!refcount_dec_and_test(&ns->ns.count)) 476162306a36Sopenharmony_ci return; 476262306a36Sopenharmony_ci drop_collected_mounts(&ns->root->mnt); 476362306a36Sopenharmony_ci free_mnt_ns(ns); 476462306a36Sopenharmony_ci} 476562306a36Sopenharmony_ci 476662306a36Sopenharmony_cistruct vfsmount *kern_mount(struct file_system_type *type) 476762306a36Sopenharmony_ci{ 476862306a36Sopenharmony_ci struct vfsmount *mnt; 476962306a36Sopenharmony_ci mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); 477062306a36Sopenharmony_ci if (!IS_ERR(mnt)) { 477162306a36Sopenharmony_ci /* 477262306a36Sopenharmony_ci * it is a longterm mount, don't release mnt until 477362306a36Sopenharmony_ci * we unmount before file sys is unregistered 477462306a36Sopenharmony_ci */ 477562306a36Sopenharmony_ci real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; 477662306a36Sopenharmony_ci } 477762306a36Sopenharmony_ci return mnt; 477862306a36Sopenharmony_ci} 477962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(kern_mount); 478062306a36Sopenharmony_ci 478162306a36Sopenharmony_civoid kern_unmount(struct vfsmount *mnt) 478262306a36Sopenharmony_ci{ 478362306a36Sopenharmony_ci /* release long term mount so mount point can be released */ 478462306a36Sopenharmony_ci if (!IS_ERR(mnt)) { 478562306a36Sopenharmony_ci mnt_make_shortterm(mnt); 478662306a36Sopenharmony_ci synchronize_rcu(); /* yecchhh... */ 478762306a36Sopenharmony_ci mntput(mnt); 478862306a36Sopenharmony_ci } 478962306a36Sopenharmony_ci} 479062306a36Sopenharmony_ciEXPORT_SYMBOL(kern_unmount); 479162306a36Sopenharmony_ci 479262306a36Sopenharmony_civoid kern_unmount_array(struct vfsmount *mnt[], unsigned int num) 479362306a36Sopenharmony_ci{ 479462306a36Sopenharmony_ci unsigned int i; 479562306a36Sopenharmony_ci 479662306a36Sopenharmony_ci for (i = 0; i < num; i++) 479762306a36Sopenharmony_ci mnt_make_shortterm(mnt[i]); 479862306a36Sopenharmony_ci synchronize_rcu_expedited(); 479962306a36Sopenharmony_ci for (i = 0; i < num; i++) 480062306a36Sopenharmony_ci mntput(mnt[i]); 480162306a36Sopenharmony_ci} 480262306a36Sopenharmony_ciEXPORT_SYMBOL(kern_unmount_array); 480362306a36Sopenharmony_ci 480462306a36Sopenharmony_cibool our_mnt(struct vfsmount *mnt) 480562306a36Sopenharmony_ci{ 480662306a36Sopenharmony_ci return check_mnt(real_mount(mnt)); 480762306a36Sopenharmony_ci} 480862306a36Sopenharmony_ci 480962306a36Sopenharmony_cibool current_chrooted(void) 481062306a36Sopenharmony_ci{ 481162306a36Sopenharmony_ci /* Does the current process have a non-standard root */ 481262306a36Sopenharmony_ci struct path ns_root; 481362306a36Sopenharmony_ci struct path fs_root; 481462306a36Sopenharmony_ci bool chrooted; 481562306a36Sopenharmony_ci 481662306a36Sopenharmony_ci /* Find the namespace root */ 481762306a36Sopenharmony_ci ns_root.mnt = ¤t->nsproxy->mnt_ns->root->mnt; 481862306a36Sopenharmony_ci ns_root.dentry = ns_root.mnt->mnt_root; 481962306a36Sopenharmony_ci path_get(&ns_root); 482062306a36Sopenharmony_ci while (d_mountpoint(ns_root.dentry) && follow_down_one(&ns_root)) 482162306a36Sopenharmony_ci ; 482262306a36Sopenharmony_ci 482362306a36Sopenharmony_ci get_fs_root(current->fs, &fs_root); 482462306a36Sopenharmony_ci 482562306a36Sopenharmony_ci chrooted = !path_equal(&fs_root, &ns_root); 482662306a36Sopenharmony_ci 482762306a36Sopenharmony_ci path_put(&fs_root); 482862306a36Sopenharmony_ci path_put(&ns_root); 482962306a36Sopenharmony_ci 483062306a36Sopenharmony_ci return chrooted; 483162306a36Sopenharmony_ci} 483262306a36Sopenharmony_ci 483362306a36Sopenharmony_cistatic bool mnt_already_visible(struct mnt_namespace *ns, 483462306a36Sopenharmony_ci const struct super_block *sb, 483562306a36Sopenharmony_ci int *new_mnt_flags) 483662306a36Sopenharmony_ci{ 483762306a36Sopenharmony_ci int new_flags = *new_mnt_flags; 483862306a36Sopenharmony_ci struct mount *mnt; 483962306a36Sopenharmony_ci bool visible = false; 484062306a36Sopenharmony_ci 484162306a36Sopenharmony_ci down_read(&namespace_sem); 484262306a36Sopenharmony_ci lock_ns_list(ns); 484362306a36Sopenharmony_ci list_for_each_entry(mnt, &ns->list, mnt_list) { 484462306a36Sopenharmony_ci struct mount *child; 484562306a36Sopenharmony_ci int mnt_flags; 484662306a36Sopenharmony_ci 484762306a36Sopenharmony_ci if (mnt_is_cursor(mnt)) 484862306a36Sopenharmony_ci continue; 484962306a36Sopenharmony_ci 485062306a36Sopenharmony_ci if (mnt->mnt.mnt_sb->s_type != sb->s_type) 485162306a36Sopenharmony_ci continue; 485262306a36Sopenharmony_ci 485362306a36Sopenharmony_ci /* This mount is not fully visible if it's root directory 485462306a36Sopenharmony_ci * is not the root directory of the filesystem. 485562306a36Sopenharmony_ci */ 485662306a36Sopenharmony_ci if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) 485762306a36Sopenharmony_ci continue; 485862306a36Sopenharmony_ci 485962306a36Sopenharmony_ci /* A local view of the mount flags */ 486062306a36Sopenharmony_ci mnt_flags = mnt->mnt.mnt_flags; 486162306a36Sopenharmony_ci 486262306a36Sopenharmony_ci /* Don't miss readonly hidden in the superblock flags */ 486362306a36Sopenharmony_ci if (sb_rdonly(mnt->mnt.mnt_sb)) 486462306a36Sopenharmony_ci mnt_flags |= MNT_LOCK_READONLY; 486562306a36Sopenharmony_ci 486662306a36Sopenharmony_ci /* Verify the mount flags are equal to or more permissive 486762306a36Sopenharmony_ci * than the proposed new mount. 486862306a36Sopenharmony_ci */ 486962306a36Sopenharmony_ci if ((mnt_flags & MNT_LOCK_READONLY) && 487062306a36Sopenharmony_ci !(new_flags & MNT_READONLY)) 487162306a36Sopenharmony_ci continue; 487262306a36Sopenharmony_ci if ((mnt_flags & MNT_LOCK_ATIME) && 487362306a36Sopenharmony_ci ((mnt_flags & MNT_ATIME_MASK) != (new_flags & MNT_ATIME_MASK))) 487462306a36Sopenharmony_ci continue; 487562306a36Sopenharmony_ci 487662306a36Sopenharmony_ci /* This mount is not fully visible if there are any 487762306a36Sopenharmony_ci * locked child mounts that cover anything except for 487862306a36Sopenharmony_ci * empty directories. 487962306a36Sopenharmony_ci */ 488062306a36Sopenharmony_ci list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { 488162306a36Sopenharmony_ci struct inode *inode = child->mnt_mountpoint->d_inode; 488262306a36Sopenharmony_ci /* Only worry about locked mounts */ 488362306a36Sopenharmony_ci if (!(child->mnt.mnt_flags & MNT_LOCKED)) 488462306a36Sopenharmony_ci continue; 488562306a36Sopenharmony_ci /* Is the directory permanetly empty? */ 488662306a36Sopenharmony_ci if (!is_empty_dir_inode(inode)) 488762306a36Sopenharmony_ci goto next; 488862306a36Sopenharmony_ci } 488962306a36Sopenharmony_ci /* Preserve the locked attributes */ 489062306a36Sopenharmony_ci *new_mnt_flags |= mnt_flags & (MNT_LOCK_READONLY | \ 489162306a36Sopenharmony_ci MNT_LOCK_ATIME); 489262306a36Sopenharmony_ci visible = true; 489362306a36Sopenharmony_ci goto found; 489462306a36Sopenharmony_ci next: ; 489562306a36Sopenharmony_ci } 489662306a36Sopenharmony_cifound: 489762306a36Sopenharmony_ci unlock_ns_list(ns); 489862306a36Sopenharmony_ci up_read(&namespace_sem); 489962306a36Sopenharmony_ci return visible; 490062306a36Sopenharmony_ci} 490162306a36Sopenharmony_ci 490262306a36Sopenharmony_cistatic bool mount_too_revealing(const struct super_block *sb, int *new_mnt_flags) 490362306a36Sopenharmony_ci{ 490462306a36Sopenharmony_ci const unsigned long required_iflags = SB_I_NOEXEC | SB_I_NODEV; 490562306a36Sopenharmony_ci struct mnt_namespace *ns = current->nsproxy->mnt_ns; 490662306a36Sopenharmony_ci unsigned long s_iflags; 490762306a36Sopenharmony_ci 490862306a36Sopenharmony_ci if (ns->user_ns == &init_user_ns) 490962306a36Sopenharmony_ci return false; 491062306a36Sopenharmony_ci 491162306a36Sopenharmony_ci /* Can this filesystem be too revealing? */ 491262306a36Sopenharmony_ci s_iflags = sb->s_iflags; 491362306a36Sopenharmony_ci if (!(s_iflags & SB_I_USERNS_VISIBLE)) 491462306a36Sopenharmony_ci return false; 491562306a36Sopenharmony_ci 491662306a36Sopenharmony_ci if ((s_iflags & required_iflags) != required_iflags) { 491762306a36Sopenharmony_ci WARN_ONCE(1, "Expected s_iflags to contain 0x%lx\n", 491862306a36Sopenharmony_ci required_iflags); 491962306a36Sopenharmony_ci return true; 492062306a36Sopenharmony_ci } 492162306a36Sopenharmony_ci 492262306a36Sopenharmony_ci return !mnt_already_visible(ns, sb, new_mnt_flags); 492362306a36Sopenharmony_ci} 492462306a36Sopenharmony_ci 492562306a36Sopenharmony_cibool mnt_may_suid(struct vfsmount *mnt) 492662306a36Sopenharmony_ci{ 492762306a36Sopenharmony_ci /* 492862306a36Sopenharmony_ci * Foreign mounts (accessed via fchdir or through /proc 492962306a36Sopenharmony_ci * symlinks) are always treated as if they are nosuid. This 493062306a36Sopenharmony_ci * prevents namespaces from trusting potentially unsafe 493162306a36Sopenharmony_ci * suid/sgid bits, file caps, or security labels that originate 493262306a36Sopenharmony_ci * in other namespaces. 493362306a36Sopenharmony_ci */ 493462306a36Sopenharmony_ci return !(mnt->mnt_flags & MNT_NOSUID) && check_mnt(real_mount(mnt)) && 493562306a36Sopenharmony_ci current_in_userns(mnt->mnt_sb->s_user_ns); 493662306a36Sopenharmony_ci} 493762306a36Sopenharmony_ci 493862306a36Sopenharmony_cistatic struct ns_common *mntns_get(struct task_struct *task) 493962306a36Sopenharmony_ci{ 494062306a36Sopenharmony_ci struct ns_common *ns = NULL; 494162306a36Sopenharmony_ci struct nsproxy *nsproxy; 494262306a36Sopenharmony_ci 494362306a36Sopenharmony_ci task_lock(task); 494462306a36Sopenharmony_ci nsproxy = task->nsproxy; 494562306a36Sopenharmony_ci if (nsproxy) { 494662306a36Sopenharmony_ci ns = &nsproxy->mnt_ns->ns; 494762306a36Sopenharmony_ci get_mnt_ns(to_mnt_ns(ns)); 494862306a36Sopenharmony_ci } 494962306a36Sopenharmony_ci task_unlock(task); 495062306a36Sopenharmony_ci 495162306a36Sopenharmony_ci return ns; 495262306a36Sopenharmony_ci} 495362306a36Sopenharmony_ci 495462306a36Sopenharmony_cistatic void mntns_put(struct ns_common *ns) 495562306a36Sopenharmony_ci{ 495662306a36Sopenharmony_ci put_mnt_ns(to_mnt_ns(ns)); 495762306a36Sopenharmony_ci} 495862306a36Sopenharmony_ci 495962306a36Sopenharmony_cistatic int mntns_install(struct nsset *nsset, struct ns_common *ns) 496062306a36Sopenharmony_ci{ 496162306a36Sopenharmony_ci struct nsproxy *nsproxy = nsset->nsproxy; 496262306a36Sopenharmony_ci struct fs_struct *fs = nsset->fs; 496362306a36Sopenharmony_ci struct mnt_namespace *mnt_ns = to_mnt_ns(ns), *old_mnt_ns; 496462306a36Sopenharmony_ci struct user_namespace *user_ns = nsset->cred->user_ns; 496562306a36Sopenharmony_ci struct path root; 496662306a36Sopenharmony_ci int err; 496762306a36Sopenharmony_ci 496862306a36Sopenharmony_ci if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || 496962306a36Sopenharmony_ci !ns_capable(user_ns, CAP_SYS_CHROOT) || 497062306a36Sopenharmony_ci !ns_capable(user_ns, CAP_SYS_ADMIN)) 497162306a36Sopenharmony_ci return -EPERM; 497262306a36Sopenharmony_ci 497362306a36Sopenharmony_ci if (is_anon_ns(mnt_ns)) 497462306a36Sopenharmony_ci return -EINVAL; 497562306a36Sopenharmony_ci 497662306a36Sopenharmony_ci if (fs->users != 1) 497762306a36Sopenharmony_ci return -EINVAL; 497862306a36Sopenharmony_ci 497962306a36Sopenharmony_ci get_mnt_ns(mnt_ns); 498062306a36Sopenharmony_ci old_mnt_ns = nsproxy->mnt_ns; 498162306a36Sopenharmony_ci nsproxy->mnt_ns = mnt_ns; 498262306a36Sopenharmony_ci 498362306a36Sopenharmony_ci /* Find the root */ 498462306a36Sopenharmony_ci err = vfs_path_lookup(mnt_ns->root->mnt.mnt_root, &mnt_ns->root->mnt, 498562306a36Sopenharmony_ci "/", LOOKUP_DOWN, &root); 498662306a36Sopenharmony_ci if (err) { 498762306a36Sopenharmony_ci /* revert to old namespace */ 498862306a36Sopenharmony_ci nsproxy->mnt_ns = old_mnt_ns; 498962306a36Sopenharmony_ci put_mnt_ns(mnt_ns); 499062306a36Sopenharmony_ci return err; 499162306a36Sopenharmony_ci } 499262306a36Sopenharmony_ci 499362306a36Sopenharmony_ci put_mnt_ns(old_mnt_ns); 499462306a36Sopenharmony_ci 499562306a36Sopenharmony_ci /* Update the pwd and root */ 499662306a36Sopenharmony_ci set_fs_pwd(fs, &root); 499762306a36Sopenharmony_ci set_fs_root(fs, &root); 499862306a36Sopenharmony_ci 499962306a36Sopenharmony_ci path_put(&root); 500062306a36Sopenharmony_ci return 0; 500162306a36Sopenharmony_ci} 500262306a36Sopenharmony_ci 500362306a36Sopenharmony_cistatic struct user_namespace *mntns_owner(struct ns_common *ns) 500462306a36Sopenharmony_ci{ 500562306a36Sopenharmony_ci return to_mnt_ns(ns)->user_ns; 500662306a36Sopenharmony_ci} 500762306a36Sopenharmony_ci 500862306a36Sopenharmony_ciconst struct proc_ns_operations mntns_operations = { 500962306a36Sopenharmony_ci .name = "mnt", 501062306a36Sopenharmony_ci .type = CLONE_NEWNS, 501162306a36Sopenharmony_ci .get = mntns_get, 501262306a36Sopenharmony_ci .put = mntns_put, 501362306a36Sopenharmony_ci .install = mntns_install, 501462306a36Sopenharmony_ci .owner = mntns_owner, 501562306a36Sopenharmony_ci}; 501662306a36Sopenharmony_ci 501762306a36Sopenharmony_ci#ifdef CONFIG_SYSCTL 501862306a36Sopenharmony_cistatic struct ctl_table fs_namespace_sysctls[] = { 501962306a36Sopenharmony_ci { 502062306a36Sopenharmony_ci .procname = "mount-max", 502162306a36Sopenharmony_ci .data = &sysctl_mount_max, 502262306a36Sopenharmony_ci .maxlen = sizeof(unsigned int), 502362306a36Sopenharmony_ci .mode = 0644, 502462306a36Sopenharmony_ci .proc_handler = proc_dointvec_minmax, 502562306a36Sopenharmony_ci .extra1 = SYSCTL_ONE, 502662306a36Sopenharmony_ci }, 502762306a36Sopenharmony_ci { } 502862306a36Sopenharmony_ci}; 502962306a36Sopenharmony_ci 503062306a36Sopenharmony_cistatic int __init init_fs_namespace_sysctls(void) 503162306a36Sopenharmony_ci{ 503262306a36Sopenharmony_ci register_sysctl_init("fs", fs_namespace_sysctls); 503362306a36Sopenharmony_ci return 0; 503462306a36Sopenharmony_ci} 503562306a36Sopenharmony_cifs_initcall(init_fs_namespace_sysctls); 503662306a36Sopenharmony_ci 503762306a36Sopenharmony_ci#endif /* CONFIG_SYSCTL */ 5038