162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (C) 2011 Novell Inc. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <linux/fs.h> 862306a36Sopenharmony_ci#include <linux/slab.h> 962306a36Sopenharmony_ci#include <linux/cred.h> 1062306a36Sopenharmony_ci#include <linux/xattr.h> 1162306a36Sopenharmony_ci#include <linux/posix_acl.h> 1262306a36Sopenharmony_ci#include <linux/ratelimit.h> 1362306a36Sopenharmony_ci#include <linux/fiemap.h> 1462306a36Sopenharmony_ci#include <linux/fileattr.h> 1562306a36Sopenharmony_ci#include <linux/security.h> 1662306a36Sopenharmony_ci#include <linux/namei.h> 1762306a36Sopenharmony_ci#include <linux/posix_acl.h> 1862306a36Sopenharmony_ci#include <linux/posix_acl_xattr.h> 1962306a36Sopenharmony_ci#include "overlayfs.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ciint ovl_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 2362306a36Sopenharmony_ci struct iattr *attr) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci int err; 2662306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 2762306a36Sopenharmony_ci bool full_copy_up = false; 2862306a36Sopenharmony_ci struct dentry *upperdentry; 2962306a36Sopenharmony_ci const struct cred *old_cred; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci err = setattr_prepare(&nop_mnt_idmap, dentry, attr); 3262306a36Sopenharmony_ci if (err) 3362306a36Sopenharmony_ci return err; 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci err = ovl_want_write(dentry); 3662306a36Sopenharmony_ci if (err) 3762306a36Sopenharmony_ci goto out; 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci if (attr->ia_valid & ATTR_SIZE) { 4062306a36Sopenharmony_ci /* Truncate should trigger data copy up as well */ 4162306a36Sopenharmony_ci full_copy_up = true; 4262306a36Sopenharmony_ci } 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci if (!full_copy_up) 4562306a36Sopenharmony_ci err = ovl_copy_up(dentry); 4662306a36Sopenharmony_ci else 4762306a36Sopenharmony_ci err = ovl_copy_up_with_data(dentry); 4862306a36Sopenharmony_ci if (!err) { 4962306a36Sopenharmony_ci struct inode *winode = NULL; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci upperdentry = ovl_dentry_upper(dentry); 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci if (attr->ia_valid & ATTR_SIZE) { 5462306a36Sopenharmony_ci winode = d_inode(upperdentry); 5562306a36Sopenharmony_ci err = get_write_access(winode); 5662306a36Sopenharmony_ci if (err) 5762306a36Sopenharmony_ci goto out_drop_write; 5862306a36Sopenharmony_ci } 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 6162306a36Sopenharmony_ci attr->ia_valid &= ~ATTR_MODE; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci /* 6462306a36Sopenharmony_ci * We might have to translate ovl file into real file object 6562306a36Sopenharmony_ci * once use cases emerge. For now, simply don't let underlying 6662306a36Sopenharmony_ci * filesystem rely on attr->ia_file 6762306a36Sopenharmony_ci */ 6862306a36Sopenharmony_ci attr->ia_valid &= ~ATTR_FILE; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci /* 7162306a36Sopenharmony_ci * If open(O_TRUNC) is done, VFS calls ->setattr with ATTR_OPEN 7262306a36Sopenharmony_ci * set. Overlayfs does not pass O_TRUNC flag to underlying 7362306a36Sopenharmony_ci * filesystem during open -> do not pass ATTR_OPEN. This 7462306a36Sopenharmony_ci * disables optimization in fuse which assumes open(O_TRUNC) 7562306a36Sopenharmony_ci * already set file size to 0. But we never passed O_TRUNC to 7662306a36Sopenharmony_ci * fuse. So by clearing ATTR_OPEN, fuse will be forced to send 7762306a36Sopenharmony_ci * setattr request to server. 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_ci attr->ia_valid &= ~ATTR_OPEN; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci inode_lock(upperdentry->d_inode); 8262306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 8362306a36Sopenharmony_ci err = ovl_do_notify_change(ofs, upperdentry, attr); 8462306a36Sopenharmony_ci revert_creds(old_cred); 8562306a36Sopenharmony_ci if (!err) 8662306a36Sopenharmony_ci ovl_copyattr(dentry->d_inode); 8762306a36Sopenharmony_ci inode_unlock(upperdentry->d_inode); 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci if (winode) 9062306a36Sopenharmony_ci put_write_access(winode); 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ciout_drop_write: 9362306a36Sopenharmony_ci ovl_drop_write(dentry); 9462306a36Sopenharmony_ciout: 9562306a36Sopenharmony_ci return err; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cistatic void ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 10162306a36Sopenharmony_ci bool samefs = ovl_same_fs(ofs); 10262306a36Sopenharmony_ci unsigned int xinobits = ovl_xino_bits(ofs); 10362306a36Sopenharmony_ci unsigned int xinoshift = 64 - xinobits; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci if (samefs) { 10662306a36Sopenharmony_ci /* 10762306a36Sopenharmony_ci * When all layers are on the same fs, all real inode 10862306a36Sopenharmony_ci * number are unique, so we use the overlay st_dev, 10962306a36Sopenharmony_ci * which is friendly to du -x. 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_ci stat->dev = dentry->d_sb->s_dev; 11262306a36Sopenharmony_ci return; 11362306a36Sopenharmony_ci } else if (xinobits) { 11462306a36Sopenharmony_ci /* 11562306a36Sopenharmony_ci * All inode numbers of underlying fs should not be using the 11662306a36Sopenharmony_ci * high xinobits, so we use high xinobits to partition the 11762306a36Sopenharmony_ci * overlay st_ino address space. The high bits holds the fsid 11862306a36Sopenharmony_ci * (upper fsid is 0). The lowest xinobit is reserved for mapping 11962306a36Sopenharmony_ci * the non-persistent inode numbers range in case of overflow. 12062306a36Sopenharmony_ci * This way all overlay inode numbers are unique and use the 12162306a36Sopenharmony_ci * overlay st_dev. 12262306a36Sopenharmony_ci */ 12362306a36Sopenharmony_ci if (likely(!(stat->ino >> xinoshift))) { 12462306a36Sopenharmony_ci stat->ino |= ((u64)fsid) << (xinoshift + 1); 12562306a36Sopenharmony_ci stat->dev = dentry->d_sb->s_dev; 12662306a36Sopenharmony_ci return; 12762306a36Sopenharmony_ci } else if (ovl_xino_warn(ofs)) { 12862306a36Sopenharmony_ci pr_warn_ratelimited("inode number too big (%pd2, ino=%llu, xinobits=%d)\n", 12962306a36Sopenharmony_ci dentry, stat->ino, xinobits); 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci /* The inode could not be mapped to a unified st_ino address space */ 13462306a36Sopenharmony_ci if (S_ISDIR(dentry->d_inode->i_mode)) { 13562306a36Sopenharmony_ci /* 13662306a36Sopenharmony_ci * Always use the overlay st_dev for directories, so 'find 13762306a36Sopenharmony_ci * -xdev' will scan the entire overlay mount and won't cross the 13862306a36Sopenharmony_ci * overlay mount boundaries. 13962306a36Sopenharmony_ci * 14062306a36Sopenharmony_ci * If not all layers are on the same fs the pair {real st_ino; 14162306a36Sopenharmony_ci * overlay st_dev} is not unique, so use the non persistent 14262306a36Sopenharmony_ci * overlay st_ino for directories. 14362306a36Sopenharmony_ci */ 14462306a36Sopenharmony_ci stat->dev = dentry->d_sb->s_dev; 14562306a36Sopenharmony_ci stat->ino = dentry->d_inode->i_ino; 14662306a36Sopenharmony_ci } else { 14762306a36Sopenharmony_ci /* 14862306a36Sopenharmony_ci * For non-samefs setup, if we cannot map all layers st_ino 14962306a36Sopenharmony_ci * to a unified address space, we need to make sure that st_dev 15062306a36Sopenharmony_ci * is unique per underlying fs, so we use the unique anonymous 15162306a36Sopenharmony_ci * bdev assigned to the underlying fs. 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_ci stat->dev = ofs->fs[fsid].pseudo_dev; 15462306a36Sopenharmony_ci } 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ciint ovl_getattr(struct mnt_idmap *idmap, const struct path *path, 15862306a36Sopenharmony_ci struct kstat *stat, u32 request_mask, unsigned int flags) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci struct dentry *dentry = path->dentry; 16162306a36Sopenharmony_ci enum ovl_path_type type; 16262306a36Sopenharmony_ci struct path realpath; 16362306a36Sopenharmony_ci const struct cred *old_cred; 16462306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 16562306a36Sopenharmony_ci bool is_dir = S_ISDIR(inode->i_mode); 16662306a36Sopenharmony_ci int fsid = 0; 16762306a36Sopenharmony_ci int err; 16862306a36Sopenharmony_ci bool metacopy_blocks = false; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci metacopy_blocks = ovl_is_metacopy_dentry(dentry); 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci type = ovl_path_real(dentry, &realpath); 17362306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 17462306a36Sopenharmony_ci err = ovl_do_getattr(&realpath, stat, request_mask, flags); 17562306a36Sopenharmony_ci if (err) 17662306a36Sopenharmony_ci goto out; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci /* Report the effective immutable/append-only STATX flags */ 17962306a36Sopenharmony_ci generic_fill_statx_attr(inode, stat); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci /* 18262306a36Sopenharmony_ci * For non-dir or same fs, we use st_ino of the copy up origin. 18362306a36Sopenharmony_ci * This guaranties constant st_dev/st_ino across copy up. 18462306a36Sopenharmony_ci * With xino feature and non-samefs, we use st_ino of the copy up 18562306a36Sopenharmony_ci * origin masked with high bits that represent the layer id. 18662306a36Sopenharmony_ci * 18762306a36Sopenharmony_ci * If lower filesystem supports NFS file handles, this also guaranties 18862306a36Sopenharmony_ci * persistent st_ino across mount cycle. 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ci if (!is_dir || ovl_same_dev(OVL_FS(dentry->d_sb))) { 19162306a36Sopenharmony_ci if (!OVL_TYPE_UPPER(type)) { 19262306a36Sopenharmony_ci fsid = ovl_layer_lower(dentry)->fsid; 19362306a36Sopenharmony_ci } else if (OVL_TYPE_ORIGIN(type)) { 19462306a36Sopenharmony_ci struct kstat lowerstat; 19562306a36Sopenharmony_ci u32 lowermask = STATX_INO | STATX_BLOCKS | 19662306a36Sopenharmony_ci (!is_dir ? STATX_NLINK : 0); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci ovl_path_lower(dentry, &realpath); 19962306a36Sopenharmony_ci err = ovl_do_getattr(&realpath, &lowerstat, lowermask, 20062306a36Sopenharmony_ci flags); 20162306a36Sopenharmony_ci if (err) 20262306a36Sopenharmony_ci goto out; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* 20562306a36Sopenharmony_ci * Lower hardlinks may be broken on copy up to different 20662306a36Sopenharmony_ci * upper files, so we cannot use the lower origin st_ino 20762306a36Sopenharmony_ci * for those different files, even for the same fs case. 20862306a36Sopenharmony_ci * 20962306a36Sopenharmony_ci * Similarly, several redirected dirs can point to the 21062306a36Sopenharmony_ci * same dir on a lower layer. With the "verify_lower" 21162306a36Sopenharmony_ci * feature, we do not use the lower origin st_ino, if 21262306a36Sopenharmony_ci * we haven't verified that this redirect is unique. 21362306a36Sopenharmony_ci * 21462306a36Sopenharmony_ci * With inodes index enabled, it is safe to use st_ino 21562306a36Sopenharmony_ci * of an indexed origin. The index validates that the 21662306a36Sopenharmony_ci * upper hardlink is not broken and that a redirected 21762306a36Sopenharmony_ci * dir is the only redirect to that origin. 21862306a36Sopenharmony_ci */ 21962306a36Sopenharmony_ci if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) || 22062306a36Sopenharmony_ci (!ovl_verify_lower(dentry->d_sb) && 22162306a36Sopenharmony_ci (is_dir || lowerstat.nlink == 1))) { 22262306a36Sopenharmony_ci fsid = ovl_layer_lower(dentry)->fsid; 22362306a36Sopenharmony_ci stat->ino = lowerstat.ino; 22462306a36Sopenharmony_ci } 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* 22762306a36Sopenharmony_ci * If we are querying a metacopy dentry and lower 22862306a36Sopenharmony_ci * dentry is data dentry, then use the blocks we 22962306a36Sopenharmony_ci * queried just now. We don't have to do additional 23062306a36Sopenharmony_ci * vfs_getattr(). If lower itself is metacopy, then 23162306a36Sopenharmony_ci * additional vfs_getattr() is unavoidable. 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_ci if (metacopy_blocks && 23462306a36Sopenharmony_ci realpath.dentry == ovl_dentry_lowerdata(dentry)) { 23562306a36Sopenharmony_ci stat->blocks = lowerstat.blocks; 23662306a36Sopenharmony_ci metacopy_blocks = false; 23762306a36Sopenharmony_ci } 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci if (metacopy_blocks) { 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * If lower is not same as lowerdata or if there was 24362306a36Sopenharmony_ci * no origin on upper, we can end up here. 24462306a36Sopenharmony_ci * With lazy lowerdata lookup, guess lowerdata blocks 24562306a36Sopenharmony_ci * from size to avoid lowerdata lookup on stat(2). 24662306a36Sopenharmony_ci */ 24762306a36Sopenharmony_ci struct kstat lowerdatastat; 24862306a36Sopenharmony_ci u32 lowermask = STATX_BLOCKS; 24962306a36Sopenharmony_ci 25062306a36Sopenharmony_ci ovl_path_lowerdata(dentry, &realpath); 25162306a36Sopenharmony_ci if (realpath.dentry) { 25262306a36Sopenharmony_ci err = ovl_do_getattr(&realpath, &lowerdatastat, 25362306a36Sopenharmony_ci lowermask, flags); 25462306a36Sopenharmony_ci if (err) 25562306a36Sopenharmony_ci goto out; 25662306a36Sopenharmony_ci } else { 25762306a36Sopenharmony_ci lowerdatastat.blocks = 25862306a36Sopenharmony_ci round_up(stat->size, stat->blksize) >> 9; 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci stat->blocks = lowerdatastat.blocks; 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci ovl_map_dev_ino(dentry, stat, fsid); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci /* 26762306a36Sopenharmony_ci * It's probably not worth it to count subdirs to get the 26862306a36Sopenharmony_ci * correct link count. nlink=1 seems to pacify 'find' and 26962306a36Sopenharmony_ci * other utilities. 27062306a36Sopenharmony_ci */ 27162306a36Sopenharmony_ci if (is_dir && OVL_TYPE_MERGE(type)) 27262306a36Sopenharmony_ci stat->nlink = 1; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci /* 27562306a36Sopenharmony_ci * Return the overlay inode nlinks for indexed upper inodes. 27662306a36Sopenharmony_ci * Overlay inode nlink counts the union of the upper hardlinks 27762306a36Sopenharmony_ci * and non-covered lower hardlinks. It does not include the upper 27862306a36Sopenharmony_ci * index hardlink. 27962306a36Sopenharmony_ci */ 28062306a36Sopenharmony_ci if (!is_dir && ovl_test_flag(OVL_INDEX, d_inode(dentry))) 28162306a36Sopenharmony_ci stat->nlink = dentry->d_inode->i_nlink; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ciout: 28462306a36Sopenharmony_ci revert_creds(old_cred); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci return err; 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ciint ovl_permission(struct mnt_idmap *idmap, 29062306a36Sopenharmony_ci struct inode *inode, int mask) 29162306a36Sopenharmony_ci{ 29262306a36Sopenharmony_ci struct inode *upperinode = ovl_inode_upper(inode); 29362306a36Sopenharmony_ci struct inode *realinode; 29462306a36Sopenharmony_ci struct path realpath; 29562306a36Sopenharmony_ci const struct cred *old_cred; 29662306a36Sopenharmony_ci int err; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci /* Careful in RCU walk mode */ 29962306a36Sopenharmony_ci realinode = ovl_i_path_real(inode, &realpath); 30062306a36Sopenharmony_ci if (!realinode) { 30162306a36Sopenharmony_ci WARN_ON(!(mask & MAY_NOT_BLOCK)); 30262306a36Sopenharmony_ci return -ECHILD; 30362306a36Sopenharmony_ci } 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci /* 30662306a36Sopenharmony_ci * Check overlay inode with the creds of task and underlying inode 30762306a36Sopenharmony_ci * with creds of mounter 30862306a36Sopenharmony_ci */ 30962306a36Sopenharmony_ci err = generic_permission(&nop_mnt_idmap, inode, mask); 31062306a36Sopenharmony_ci if (err) 31162306a36Sopenharmony_ci return err; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci old_cred = ovl_override_creds(inode->i_sb); 31462306a36Sopenharmony_ci if (!upperinode && 31562306a36Sopenharmony_ci !special_file(realinode->i_mode) && mask & MAY_WRITE) { 31662306a36Sopenharmony_ci mask &= ~(MAY_WRITE | MAY_APPEND); 31762306a36Sopenharmony_ci /* Make sure mounter can read file for copy up later */ 31862306a36Sopenharmony_ci mask |= MAY_READ; 31962306a36Sopenharmony_ci } 32062306a36Sopenharmony_ci err = inode_permission(mnt_idmap(realpath.mnt), realinode, mask); 32162306a36Sopenharmony_ci revert_creds(old_cred); 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_ci return err; 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_cistatic const char *ovl_get_link(struct dentry *dentry, 32762306a36Sopenharmony_ci struct inode *inode, 32862306a36Sopenharmony_ci struct delayed_call *done) 32962306a36Sopenharmony_ci{ 33062306a36Sopenharmony_ci const struct cred *old_cred; 33162306a36Sopenharmony_ci const char *p; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci if (!dentry) 33462306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 33762306a36Sopenharmony_ci p = vfs_get_link(ovl_dentry_real(dentry), done); 33862306a36Sopenharmony_ci revert_creds(old_cred); 33962306a36Sopenharmony_ci return p; 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_cibool ovl_is_private_xattr(struct super_block *sb, const char *name) 34362306a36Sopenharmony_ci{ 34462306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(sb); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci if (ofs->config.userxattr) 34762306a36Sopenharmony_ci return strncmp(name, OVL_XATTR_USER_PREFIX, 34862306a36Sopenharmony_ci sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0; 34962306a36Sopenharmony_ci else 35062306a36Sopenharmony_ci return strncmp(name, OVL_XATTR_TRUSTED_PREFIX, 35162306a36Sopenharmony_ci sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0; 35262306a36Sopenharmony_ci} 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ciint ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, 35562306a36Sopenharmony_ci const void *value, size_t size, int flags) 35662306a36Sopenharmony_ci{ 35762306a36Sopenharmony_ci int err; 35862306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 35962306a36Sopenharmony_ci struct dentry *upperdentry = ovl_i_dentry_upper(inode); 36062306a36Sopenharmony_ci struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 36162306a36Sopenharmony_ci struct path realpath; 36262306a36Sopenharmony_ci const struct cred *old_cred; 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci err = ovl_want_write(dentry); 36562306a36Sopenharmony_ci if (err) 36662306a36Sopenharmony_ci goto out; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci if (!value && !upperdentry) { 36962306a36Sopenharmony_ci ovl_path_lower(dentry, &realpath); 37062306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 37162306a36Sopenharmony_ci err = vfs_getxattr(mnt_idmap(realpath.mnt), realdentry, name, NULL, 0); 37262306a36Sopenharmony_ci revert_creds(old_cred); 37362306a36Sopenharmony_ci if (err < 0) 37462306a36Sopenharmony_ci goto out_drop_write; 37562306a36Sopenharmony_ci } 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci if (!upperdentry) { 37862306a36Sopenharmony_ci err = ovl_copy_up(dentry); 37962306a36Sopenharmony_ci if (err) 38062306a36Sopenharmony_ci goto out_drop_write; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci realdentry = ovl_dentry_upper(dentry); 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 38662306a36Sopenharmony_ci if (value) { 38762306a36Sopenharmony_ci err = ovl_do_setxattr(ofs, realdentry, name, value, size, 38862306a36Sopenharmony_ci flags); 38962306a36Sopenharmony_ci } else { 39062306a36Sopenharmony_ci WARN_ON(flags != XATTR_REPLACE); 39162306a36Sopenharmony_ci err = ovl_do_removexattr(ofs, realdentry, name); 39262306a36Sopenharmony_ci } 39362306a36Sopenharmony_ci revert_creds(old_cred); 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci /* copy c/mtime */ 39662306a36Sopenharmony_ci ovl_copyattr(inode); 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ciout_drop_write: 39962306a36Sopenharmony_ci ovl_drop_write(dentry); 40062306a36Sopenharmony_ciout: 40162306a36Sopenharmony_ci return err; 40262306a36Sopenharmony_ci} 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ciint ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, 40562306a36Sopenharmony_ci void *value, size_t size) 40662306a36Sopenharmony_ci{ 40762306a36Sopenharmony_ci ssize_t res; 40862306a36Sopenharmony_ci const struct cred *old_cred; 40962306a36Sopenharmony_ci struct path realpath; 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci ovl_i_path_real(inode, &realpath); 41262306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 41362306a36Sopenharmony_ci res = vfs_getxattr(mnt_idmap(realpath.mnt), realpath.dentry, name, value, size); 41462306a36Sopenharmony_ci revert_creds(old_cred); 41562306a36Sopenharmony_ci return res; 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_cistatic bool ovl_can_list(struct super_block *sb, const char *s) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci /* Never list private (.overlay) */ 42162306a36Sopenharmony_ci if (ovl_is_private_xattr(sb, s)) 42262306a36Sopenharmony_ci return false; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci /* List all non-trusted xattrs */ 42562306a36Sopenharmony_ci if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) 42662306a36Sopenharmony_ci return true; 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci /* list other trusted for superuser only */ 42962306a36Sopenharmony_ci return ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_cissize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci struct dentry *realdentry = ovl_dentry_real(dentry); 43562306a36Sopenharmony_ci ssize_t res; 43662306a36Sopenharmony_ci size_t len; 43762306a36Sopenharmony_ci char *s; 43862306a36Sopenharmony_ci const struct cred *old_cred; 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 44162306a36Sopenharmony_ci res = vfs_listxattr(realdentry, list, size); 44262306a36Sopenharmony_ci revert_creds(old_cred); 44362306a36Sopenharmony_ci if (res <= 0 || size == 0) 44462306a36Sopenharmony_ci return res; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci /* filter out private xattrs */ 44762306a36Sopenharmony_ci for (s = list, len = res; len;) { 44862306a36Sopenharmony_ci size_t slen = strnlen(s, len) + 1; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci /* underlying fs providing us with an broken xattr list? */ 45162306a36Sopenharmony_ci if (WARN_ON(slen > len)) 45262306a36Sopenharmony_ci return -EIO; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci len -= slen; 45562306a36Sopenharmony_ci if (!ovl_can_list(dentry->d_sb, s)) { 45662306a36Sopenharmony_ci res -= slen; 45762306a36Sopenharmony_ci memmove(s, s + slen, len); 45862306a36Sopenharmony_ci } else { 45962306a36Sopenharmony_ci s += slen; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci return res; 46462306a36Sopenharmony_ci} 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL 46762306a36Sopenharmony_ci/* 46862306a36Sopenharmony_ci * Apply the idmapping of the layer to POSIX ACLs. The caller must pass a clone 46962306a36Sopenharmony_ci * of the POSIX ACLs retrieved from the lower layer to this function to not 47062306a36Sopenharmony_ci * alter the POSIX ACLs for the underlying filesystem. 47162306a36Sopenharmony_ci */ 47262306a36Sopenharmony_cistatic void ovl_idmap_posix_acl(const struct inode *realinode, 47362306a36Sopenharmony_ci struct mnt_idmap *idmap, 47462306a36Sopenharmony_ci struct posix_acl *acl) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci struct user_namespace *fs_userns = i_user_ns(realinode); 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci for (unsigned int i = 0; i < acl->a_count; i++) { 47962306a36Sopenharmony_ci vfsuid_t vfsuid; 48062306a36Sopenharmony_ci vfsgid_t vfsgid; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci struct posix_acl_entry *e = &acl->a_entries[i]; 48362306a36Sopenharmony_ci switch (e->e_tag) { 48462306a36Sopenharmony_ci case ACL_USER: 48562306a36Sopenharmony_ci vfsuid = make_vfsuid(idmap, fs_userns, e->e_uid); 48662306a36Sopenharmony_ci e->e_uid = vfsuid_into_kuid(vfsuid); 48762306a36Sopenharmony_ci break; 48862306a36Sopenharmony_ci case ACL_GROUP: 48962306a36Sopenharmony_ci vfsgid = make_vfsgid(idmap, fs_userns, e->e_gid); 49062306a36Sopenharmony_ci e->e_gid = vfsgid_into_kgid(vfsgid); 49162306a36Sopenharmony_ci break; 49262306a36Sopenharmony_ci } 49362306a36Sopenharmony_ci } 49462306a36Sopenharmony_ci} 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci/* 49762306a36Sopenharmony_ci * The @noperm argument is used to skip permission checking and is a temporary 49862306a36Sopenharmony_ci * measure. Quoting Miklos from an earlier discussion: 49962306a36Sopenharmony_ci * 50062306a36Sopenharmony_ci * > So there are two paths to getting an acl: 50162306a36Sopenharmony_ci * > 1) permission checking and 2) retrieving the value via getxattr(2). 50262306a36Sopenharmony_ci * > This is a similar situation as reading a symlink vs. following it. 50362306a36Sopenharmony_ci * > When following a symlink overlayfs always reads the link on the 50462306a36Sopenharmony_ci * > underlying fs just as if it was a readlink(2) call, calling 50562306a36Sopenharmony_ci * > security_inode_readlink() instead of security_inode_follow_link(). 50662306a36Sopenharmony_ci * > This is logical: we are reading the link from the underlying storage, 50762306a36Sopenharmony_ci * > and following it on overlayfs. 50862306a36Sopenharmony_ci * > 50962306a36Sopenharmony_ci * > Applying the same logic to acl: we do need to call the 51062306a36Sopenharmony_ci * > security_inode_getxattr() on the underlying fs, even if just want to 51162306a36Sopenharmony_ci * > check permissions on overlay. This is currently not done, which is an 51262306a36Sopenharmony_ci * > inconsistency. 51362306a36Sopenharmony_ci * > 51462306a36Sopenharmony_ci * > Maybe adding the check to ovl_get_acl() is the right way to go, but 51562306a36Sopenharmony_ci * > I'm a little afraid of a performance regression. Will look into that. 51662306a36Sopenharmony_ci * 51762306a36Sopenharmony_ci * Until we have made a decision allow this helper to take the @noperm 51862306a36Sopenharmony_ci * argument. We should hopefully be able to remove it soon. 51962306a36Sopenharmony_ci */ 52062306a36Sopenharmony_cistruct posix_acl *ovl_get_acl_path(const struct path *path, 52162306a36Sopenharmony_ci const char *acl_name, bool noperm) 52262306a36Sopenharmony_ci{ 52362306a36Sopenharmony_ci struct posix_acl *real_acl, *clone; 52462306a36Sopenharmony_ci struct mnt_idmap *idmap; 52562306a36Sopenharmony_ci struct inode *realinode = d_inode(path->dentry); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci idmap = mnt_idmap(path->mnt); 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci if (noperm) 53062306a36Sopenharmony_ci real_acl = get_inode_acl(realinode, posix_acl_type(acl_name)); 53162306a36Sopenharmony_ci else 53262306a36Sopenharmony_ci real_acl = vfs_get_acl(idmap, path->dentry, acl_name); 53362306a36Sopenharmony_ci if (IS_ERR_OR_NULL(real_acl)) 53462306a36Sopenharmony_ci return real_acl; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci if (!is_idmapped_mnt(path->mnt)) 53762306a36Sopenharmony_ci return real_acl; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci /* 54062306a36Sopenharmony_ci * We cannot alter the ACLs returned from the relevant layer as that 54162306a36Sopenharmony_ci * would alter the cached values filesystem wide for the lower 54262306a36Sopenharmony_ci * filesystem. Instead we can clone the ACLs and then apply the 54362306a36Sopenharmony_ci * relevant idmapping of the layer. 54462306a36Sopenharmony_ci */ 54562306a36Sopenharmony_ci clone = posix_acl_clone(real_acl, GFP_KERNEL); 54662306a36Sopenharmony_ci posix_acl_release(real_acl); /* release original acl */ 54762306a36Sopenharmony_ci if (!clone) 54862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci ovl_idmap_posix_acl(realinode, idmap, clone); 55162306a36Sopenharmony_ci return clone; 55262306a36Sopenharmony_ci} 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci/* 55562306a36Sopenharmony_ci * When the relevant layer is an idmapped mount we need to take the idmapping 55662306a36Sopenharmony_ci * of the layer into account and translate any ACL_{GROUP,USER} values 55762306a36Sopenharmony_ci * according to the idmapped mount. 55862306a36Sopenharmony_ci * 55962306a36Sopenharmony_ci * We cannot alter the ACLs returned from the relevant layer as that would 56062306a36Sopenharmony_ci * alter the cached values filesystem wide for the lower filesystem. Instead we 56162306a36Sopenharmony_ci * can clone the ACLs and then apply the relevant idmapping of the layer. 56262306a36Sopenharmony_ci * 56362306a36Sopenharmony_ci * This is obviously only relevant when idmapped layers are used. 56462306a36Sopenharmony_ci */ 56562306a36Sopenharmony_cistruct posix_acl *do_ovl_get_acl(struct mnt_idmap *idmap, 56662306a36Sopenharmony_ci struct inode *inode, int type, 56762306a36Sopenharmony_ci bool rcu, bool noperm) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci struct inode *realinode; 57062306a36Sopenharmony_ci struct posix_acl *acl; 57162306a36Sopenharmony_ci struct path realpath; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci /* Careful in RCU walk mode */ 57462306a36Sopenharmony_ci realinode = ovl_i_path_real(inode, &realpath); 57562306a36Sopenharmony_ci if (!realinode) { 57662306a36Sopenharmony_ci WARN_ON(!rcu); 57762306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 57862306a36Sopenharmony_ci } 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci if (!IS_POSIXACL(realinode)) 58162306a36Sopenharmony_ci return NULL; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci if (rcu) { 58462306a36Sopenharmony_ci /* 58562306a36Sopenharmony_ci * If the layer is idmapped drop out of RCU path walk 58662306a36Sopenharmony_ci * so we can clone the ACLs. 58762306a36Sopenharmony_ci */ 58862306a36Sopenharmony_ci if (is_idmapped_mnt(realpath.mnt)) 58962306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci acl = get_cached_acl_rcu(realinode, type); 59262306a36Sopenharmony_ci } else { 59362306a36Sopenharmony_ci const struct cred *old_cred; 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ci old_cred = ovl_override_creds(inode->i_sb); 59662306a36Sopenharmony_ci acl = ovl_get_acl_path(&realpath, posix_acl_xattr_name(type), noperm); 59762306a36Sopenharmony_ci revert_creds(old_cred); 59862306a36Sopenharmony_ci } 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci return acl; 60162306a36Sopenharmony_ci} 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_cistatic int ovl_set_or_remove_acl(struct dentry *dentry, struct inode *inode, 60462306a36Sopenharmony_ci struct posix_acl *acl, int type) 60562306a36Sopenharmony_ci{ 60662306a36Sopenharmony_ci int err; 60762306a36Sopenharmony_ci struct path realpath; 60862306a36Sopenharmony_ci const char *acl_name; 60962306a36Sopenharmony_ci const struct cred *old_cred; 61062306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(dentry->d_sb); 61162306a36Sopenharmony_ci struct dentry *upperdentry = ovl_dentry_upper(dentry); 61262306a36Sopenharmony_ci struct dentry *realdentry = upperdentry ?: ovl_dentry_lower(dentry); 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci err = ovl_want_write(dentry); 61562306a36Sopenharmony_ci if (err) 61662306a36Sopenharmony_ci return err; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci /* 61962306a36Sopenharmony_ci * If ACL is to be removed from a lower file, check if it exists in 62062306a36Sopenharmony_ci * the first place before copying it up. 62162306a36Sopenharmony_ci */ 62262306a36Sopenharmony_ci acl_name = posix_acl_xattr_name(type); 62362306a36Sopenharmony_ci if (!acl && !upperdentry) { 62462306a36Sopenharmony_ci struct posix_acl *real_acl; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci ovl_path_lower(dentry, &realpath); 62762306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 62862306a36Sopenharmony_ci real_acl = vfs_get_acl(mnt_idmap(realpath.mnt), realdentry, 62962306a36Sopenharmony_ci acl_name); 63062306a36Sopenharmony_ci revert_creds(old_cred); 63162306a36Sopenharmony_ci if (IS_ERR(real_acl)) { 63262306a36Sopenharmony_ci err = PTR_ERR(real_acl); 63362306a36Sopenharmony_ci goto out_drop_write; 63462306a36Sopenharmony_ci } 63562306a36Sopenharmony_ci posix_acl_release(real_acl); 63662306a36Sopenharmony_ci } 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci if (!upperdentry) { 63962306a36Sopenharmony_ci err = ovl_copy_up(dentry); 64062306a36Sopenharmony_ci if (err) 64162306a36Sopenharmony_ci goto out_drop_write; 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci realdentry = ovl_dentry_upper(dentry); 64462306a36Sopenharmony_ci } 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci old_cred = ovl_override_creds(dentry->d_sb); 64762306a36Sopenharmony_ci if (acl) 64862306a36Sopenharmony_ci err = ovl_do_set_acl(ofs, realdentry, acl_name, acl); 64962306a36Sopenharmony_ci else 65062306a36Sopenharmony_ci err = ovl_do_remove_acl(ofs, realdentry, acl_name); 65162306a36Sopenharmony_ci revert_creds(old_cred); 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci /* copy c/mtime */ 65462306a36Sopenharmony_ci ovl_copyattr(inode); 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ciout_drop_write: 65762306a36Sopenharmony_ci ovl_drop_write(dentry); 65862306a36Sopenharmony_ci return err; 65962306a36Sopenharmony_ci} 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ciint ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, 66262306a36Sopenharmony_ci struct posix_acl *acl, int type) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci int err; 66562306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 66662306a36Sopenharmony_ci struct dentry *workdir = ovl_workdir(dentry); 66762306a36Sopenharmony_ci struct inode *realinode = ovl_inode_real(inode); 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci if (!IS_POSIXACL(d_inode(workdir))) 67062306a36Sopenharmony_ci return -EOPNOTSUPP; 67162306a36Sopenharmony_ci if (!realinode->i_op->set_acl) 67262306a36Sopenharmony_ci return -EOPNOTSUPP; 67362306a36Sopenharmony_ci if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) 67462306a36Sopenharmony_ci return acl ? -EACCES : 0; 67562306a36Sopenharmony_ci if (!inode_owner_or_capable(&nop_mnt_idmap, inode)) 67662306a36Sopenharmony_ci return -EPERM; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci /* 67962306a36Sopenharmony_ci * Check if sgid bit needs to be cleared (actual setacl operation will 68062306a36Sopenharmony_ci * be done with mounter's capabilities and so that won't do it for us). 68162306a36Sopenharmony_ci */ 68262306a36Sopenharmony_ci if (unlikely(inode->i_mode & S_ISGID) && type == ACL_TYPE_ACCESS && 68362306a36Sopenharmony_ci !in_group_p(inode->i_gid) && 68462306a36Sopenharmony_ci !capable_wrt_inode_uidgid(&nop_mnt_idmap, inode, CAP_FSETID)) { 68562306a36Sopenharmony_ci struct iattr iattr = { .ia_valid = ATTR_KILL_SGID }; 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci err = ovl_setattr(&nop_mnt_idmap, dentry, &iattr); 68862306a36Sopenharmony_ci if (err) 68962306a36Sopenharmony_ci return err; 69062306a36Sopenharmony_ci } 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci return ovl_set_or_remove_acl(dentry, inode, acl, type); 69362306a36Sopenharmony_ci} 69462306a36Sopenharmony_ci#endif 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ciint ovl_update_time(struct inode *inode, int flags) 69762306a36Sopenharmony_ci{ 69862306a36Sopenharmony_ci if (flags & S_ATIME) { 69962306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(inode->i_sb); 70062306a36Sopenharmony_ci struct path upperpath = { 70162306a36Sopenharmony_ci .mnt = ovl_upper_mnt(ofs), 70262306a36Sopenharmony_ci .dentry = ovl_upperdentry_dereference(OVL_I(inode)), 70362306a36Sopenharmony_ci }; 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci if (upperpath.dentry) { 70662306a36Sopenharmony_ci touch_atime(&upperpath); 70762306a36Sopenharmony_ci inode->i_atime = d_inode(upperpath.dentry)->i_atime; 70862306a36Sopenharmony_ci } 70962306a36Sopenharmony_ci } 71062306a36Sopenharmony_ci return 0; 71162306a36Sopenharmony_ci} 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_cistatic int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 71462306a36Sopenharmony_ci u64 start, u64 len) 71562306a36Sopenharmony_ci{ 71662306a36Sopenharmony_ci int err; 71762306a36Sopenharmony_ci struct inode *realinode = ovl_inode_realdata(inode); 71862306a36Sopenharmony_ci const struct cred *old_cred; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci if (!realinode) 72162306a36Sopenharmony_ci return -EIO; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci if (!realinode->i_op->fiemap) 72462306a36Sopenharmony_ci return -EOPNOTSUPP; 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci old_cred = ovl_override_creds(inode->i_sb); 72762306a36Sopenharmony_ci err = realinode->i_op->fiemap(realinode, fieinfo, start, len); 72862306a36Sopenharmony_ci revert_creds(old_cred); 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci return err; 73162306a36Sopenharmony_ci} 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci/* 73462306a36Sopenharmony_ci * Work around the fact that security_file_ioctl() takes a file argument. 73562306a36Sopenharmony_ci * Introducing security_inode_fileattr_get/set() hooks would solve this issue 73662306a36Sopenharmony_ci * properly. 73762306a36Sopenharmony_ci */ 73862306a36Sopenharmony_cistatic int ovl_security_fileattr(const struct path *realpath, struct fileattr *fa, 73962306a36Sopenharmony_ci bool set) 74062306a36Sopenharmony_ci{ 74162306a36Sopenharmony_ci struct file *file; 74262306a36Sopenharmony_ci unsigned int cmd; 74362306a36Sopenharmony_ci int err; 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci file = dentry_open(realpath, O_RDONLY, current_cred()); 74662306a36Sopenharmony_ci if (IS_ERR(file)) 74762306a36Sopenharmony_ci return PTR_ERR(file); 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci if (set) 75062306a36Sopenharmony_ci cmd = fa->fsx_valid ? FS_IOC_FSSETXATTR : FS_IOC_SETFLAGS; 75162306a36Sopenharmony_ci else 75262306a36Sopenharmony_ci cmd = fa->fsx_valid ? FS_IOC_FSGETXATTR : FS_IOC_GETFLAGS; 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci err = security_file_ioctl(file, cmd, 0); 75562306a36Sopenharmony_ci fput(file); 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci return err; 75862306a36Sopenharmony_ci} 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ciint ovl_real_fileattr_set(const struct path *realpath, struct fileattr *fa) 76162306a36Sopenharmony_ci{ 76262306a36Sopenharmony_ci int err; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci err = ovl_security_fileattr(realpath, fa, true); 76562306a36Sopenharmony_ci if (err) 76662306a36Sopenharmony_ci return err; 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_ci return vfs_fileattr_set(mnt_idmap(realpath->mnt), realpath->dentry, fa); 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ciint ovl_fileattr_set(struct mnt_idmap *idmap, 77262306a36Sopenharmony_ci struct dentry *dentry, struct fileattr *fa) 77362306a36Sopenharmony_ci{ 77462306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 77562306a36Sopenharmony_ci struct path upperpath; 77662306a36Sopenharmony_ci const struct cred *old_cred; 77762306a36Sopenharmony_ci unsigned int flags; 77862306a36Sopenharmony_ci int err; 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci err = ovl_want_write(dentry); 78162306a36Sopenharmony_ci if (err) 78262306a36Sopenharmony_ci goto out; 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci err = ovl_copy_up(dentry); 78562306a36Sopenharmony_ci if (!err) { 78662306a36Sopenharmony_ci ovl_path_real(dentry, &upperpath); 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci old_cred = ovl_override_creds(inode->i_sb); 78962306a36Sopenharmony_ci /* 79062306a36Sopenharmony_ci * Store immutable/append-only flags in xattr and clear them 79162306a36Sopenharmony_ci * in upper fileattr (in case they were set by older kernel) 79262306a36Sopenharmony_ci * so children of "ovl-immutable" directories lower aliases of 79362306a36Sopenharmony_ci * "ovl-immutable" hardlinks could be copied up. 79462306a36Sopenharmony_ci * Clear xattr when flags are cleared. 79562306a36Sopenharmony_ci */ 79662306a36Sopenharmony_ci err = ovl_set_protattr(inode, upperpath.dentry, fa); 79762306a36Sopenharmony_ci if (!err) 79862306a36Sopenharmony_ci err = ovl_real_fileattr_set(&upperpath, fa); 79962306a36Sopenharmony_ci revert_creds(old_cred); 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * Merge real inode flags with inode flags read from 80362306a36Sopenharmony_ci * overlay.protattr xattr 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_ci flags = ovl_inode_real(inode)->i_flags & OVL_COPY_I_FLAGS_MASK; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci BUILD_BUG_ON(OVL_PROT_I_FLAGS_MASK & ~OVL_COPY_I_FLAGS_MASK); 80862306a36Sopenharmony_ci flags |= inode->i_flags & OVL_PROT_I_FLAGS_MASK; 80962306a36Sopenharmony_ci inode_set_flags(inode, flags, OVL_COPY_I_FLAGS_MASK); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci /* Update ctime */ 81262306a36Sopenharmony_ci ovl_copyattr(inode); 81362306a36Sopenharmony_ci } 81462306a36Sopenharmony_ci ovl_drop_write(dentry); 81562306a36Sopenharmony_ciout: 81662306a36Sopenharmony_ci return err; 81762306a36Sopenharmony_ci} 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci/* Convert inode protection flags to fileattr flags */ 82062306a36Sopenharmony_cistatic void ovl_fileattr_prot_flags(struct inode *inode, struct fileattr *fa) 82162306a36Sopenharmony_ci{ 82262306a36Sopenharmony_ci BUILD_BUG_ON(OVL_PROT_FS_FLAGS_MASK & ~FS_COMMON_FL); 82362306a36Sopenharmony_ci BUILD_BUG_ON(OVL_PROT_FSX_FLAGS_MASK & ~FS_XFLAG_COMMON); 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci if (inode->i_flags & S_APPEND) { 82662306a36Sopenharmony_ci fa->flags |= FS_APPEND_FL; 82762306a36Sopenharmony_ci fa->fsx_xflags |= FS_XFLAG_APPEND; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci if (inode->i_flags & S_IMMUTABLE) { 83062306a36Sopenharmony_ci fa->flags |= FS_IMMUTABLE_FL; 83162306a36Sopenharmony_ci fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci} 83462306a36Sopenharmony_ci 83562306a36Sopenharmony_ciint ovl_real_fileattr_get(const struct path *realpath, struct fileattr *fa) 83662306a36Sopenharmony_ci{ 83762306a36Sopenharmony_ci int err; 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci err = ovl_security_fileattr(realpath, fa, false); 84062306a36Sopenharmony_ci if (err) 84162306a36Sopenharmony_ci return err; 84262306a36Sopenharmony_ci 84362306a36Sopenharmony_ci err = vfs_fileattr_get(realpath->dentry, fa); 84462306a36Sopenharmony_ci if (err == -ENOIOCTLCMD) 84562306a36Sopenharmony_ci err = -ENOTTY; 84662306a36Sopenharmony_ci return err; 84762306a36Sopenharmony_ci} 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ciint ovl_fileattr_get(struct dentry *dentry, struct fileattr *fa) 85062306a36Sopenharmony_ci{ 85162306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 85262306a36Sopenharmony_ci struct path realpath; 85362306a36Sopenharmony_ci const struct cred *old_cred; 85462306a36Sopenharmony_ci int err; 85562306a36Sopenharmony_ci 85662306a36Sopenharmony_ci ovl_path_real(dentry, &realpath); 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci old_cred = ovl_override_creds(inode->i_sb); 85962306a36Sopenharmony_ci err = ovl_real_fileattr_get(&realpath, fa); 86062306a36Sopenharmony_ci ovl_fileattr_prot_flags(inode, fa); 86162306a36Sopenharmony_ci revert_creds(old_cred); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci return err; 86462306a36Sopenharmony_ci} 86562306a36Sopenharmony_ci 86662306a36Sopenharmony_cistatic const struct inode_operations ovl_file_inode_operations = { 86762306a36Sopenharmony_ci .setattr = ovl_setattr, 86862306a36Sopenharmony_ci .permission = ovl_permission, 86962306a36Sopenharmony_ci .getattr = ovl_getattr, 87062306a36Sopenharmony_ci .listxattr = ovl_listxattr, 87162306a36Sopenharmony_ci .get_inode_acl = ovl_get_inode_acl, 87262306a36Sopenharmony_ci .get_acl = ovl_get_acl, 87362306a36Sopenharmony_ci .set_acl = ovl_set_acl, 87462306a36Sopenharmony_ci .update_time = ovl_update_time, 87562306a36Sopenharmony_ci .fiemap = ovl_fiemap, 87662306a36Sopenharmony_ci .fileattr_get = ovl_fileattr_get, 87762306a36Sopenharmony_ci .fileattr_set = ovl_fileattr_set, 87862306a36Sopenharmony_ci}; 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_cistatic const struct inode_operations ovl_symlink_inode_operations = { 88162306a36Sopenharmony_ci .setattr = ovl_setattr, 88262306a36Sopenharmony_ci .get_link = ovl_get_link, 88362306a36Sopenharmony_ci .getattr = ovl_getattr, 88462306a36Sopenharmony_ci .listxattr = ovl_listxattr, 88562306a36Sopenharmony_ci .update_time = ovl_update_time, 88662306a36Sopenharmony_ci}; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_cistatic const struct inode_operations ovl_special_inode_operations = { 88962306a36Sopenharmony_ci .setattr = ovl_setattr, 89062306a36Sopenharmony_ci .permission = ovl_permission, 89162306a36Sopenharmony_ci .getattr = ovl_getattr, 89262306a36Sopenharmony_ci .listxattr = ovl_listxattr, 89362306a36Sopenharmony_ci .get_inode_acl = ovl_get_inode_acl, 89462306a36Sopenharmony_ci .get_acl = ovl_get_acl, 89562306a36Sopenharmony_ci .set_acl = ovl_set_acl, 89662306a36Sopenharmony_ci .update_time = ovl_update_time, 89762306a36Sopenharmony_ci}; 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_cistatic const struct address_space_operations ovl_aops = { 90062306a36Sopenharmony_ci /* For O_DIRECT dentry_open() checks f_mapping->a_ops->direct_IO */ 90162306a36Sopenharmony_ci .direct_IO = noop_direct_IO, 90262306a36Sopenharmony_ci}; 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci/* 90562306a36Sopenharmony_ci * It is possible to stack overlayfs instance on top of another 90662306a36Sopenharmony_ci * overlayfs instance as lower layer. We need to annotate the 90762306a36Sopenharmony_ci * stackable i_mutex locks according to stack level of the super 90862306a36Sopenharmony_ci * block instance. An overlayfs instance can never be in stack 90962306a36Sopenharmony_ci * depth 0 (there is always a real fs below it). An overlayfs 91062306a36Sopenharmony_ci * inode lock will use the lockdep annotation ovl_i_mutex_key[depth]. 91162306a36Sopenharmony_ci * 91262306a36Sopenharmony_ci * For example, here is a snip from /proc/lockdep_chains after 91362306a36Sopenharmony_ci * dir_iterate of nested overlayfs: 91462306a36Sopenharmony_ci * 91562306a36Sopenharmony_ci * [...] &ovl_i_mutex_dir_key[depth] (stack_depth=2) 91662306a36Sopenharmony_ci * [...] &ovl_i_mutex_dir_key[depth]#2 (stack_depth=1) 91762306a36Sopenharmony_ci * [...] &type->i_mutex_dir_key (stack_depth=0) 91862306a36Sopenharmony_ci * 91962306a36Sopenharmony_ci * Locking order w.r.t ovl_want_write() is important for nested overlayfs. 92062306a36Sopenharmony_ci * 92162306a36Sopenharmony_ci * This chain is valid: 92262306a36Sopenharmony_ci * - inode->i_rwsem (inode_lock[2]) 92362306a36Sopenharmony_ci * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) 92462306a36Sopenharmony_ci * - OVL_I(inode)->lock (ovl_inode_lock[2]) 92562306a36Sopenharmony_ci * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) 92662306a36Sopenharmony_ci * 92762306a36Sopenharmony_ci * And this chain is valid: 92862306a36Sopenharmony_ci * - inode->i_rwsem (inode_lock[2]) 92962306a36Sopenharmony_ci * - OVL_I(inode)->lock (ovl_inode_lock[2]) 93062306a36Sopenharmony_ci * - lowerinode->i_rwsem (inode_lock[1]) 93162306a36Sopenharmony_ci * - OVL_I(lowerinode)->lock (ovl_inode_lock[1]) 93262306a36Sopenharmony_ci * 93362306a36Sopenharmony_ci * But lowerinode->i_rwsem SHOULD NOT be acquired while ovl_want_write() is 93462306a36Sopenharmony_ci * held, because it is in reverse order of the non-nested case using the same 93562306a36Sopenharmony_ci * upper fs: 93662306a36Sopenharmony_ci * - inode->i_rwsem (inode_lock[1]) 93762306a36Sopenharmony_ci * - upper_mnt->mnt_sb->s_writers (ovl_want_write[0]) 93862306a36Sopenharmony_ci * - OVL_I(inode)->lock (ovl_inode_lock[1]) 93962306a36Sopenharmony_ci */ 94062306a36Sopenharmony_ci#define OVL_MAX_NESTING FILESYSTEM_MAX_STACK_DEPTH 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_cistatic inline void ovl_lockdep_annotate_inode_mutex_key(struct inode *inode) 94362306a36Sopenharmony_ci{ 94462306a36Sopenharmony_ci#ifdef CONFIG_LOCKDEP 94562306a36Sopenharmony_ci static struct lock_class_key ovl_i_mutex_key[OVL_MAX_NESTING]; 94662306a36Sopenharmony_ci static struct lock_class_key ovl_i_mutex_dir_key[OVL_MAX_NESTING]; 94762306a36Sopenharmony_ci static struct lock_class_key ovl_i_lock_key[OVL_MAX_NESTING]; 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci int depth = inode->i_sb->s_stack_depth - 1; 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci if (WARN_ON_ONCE(depth < 0 || depth >= OVL_MAX_NESTING)) 95262306a36Sopenharmony_ci depth = 0; 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 95562306a36Sopenharmony_ci lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_dir_key[depth]); 95662306a36Sopenharmony_ci else 95762306a36Sopenharmony_ci lockdep_set_class(&inode->i_rwsem, &ovl_i_mutex_key[depth]); 95862306a36Sopenharmony_ci 95962306a36Sopenharmony_ci lockdep_set_class(&OVL_I(inode)->lock, &ovl_i_lock_key[depth]); 96062306a36Sopenharmony_ci#endif 96162306a36Sopenharmony_ci} 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_cistatic void ovl_next_ino(struct inode *inode) 96462306a36Sopenharmony_ci{ 96562306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(inode->i_sb); 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci inode->i_ino = atomic_long_inc_return(&ofs->last_ino); 96862306a36Sopenharmony_ci if (unlikely(!inode->i_ino)) 96962306a36Sopenharmony_ci inode->i_ino = atomic_long_inc_return(&ofs->last_ino); 97062306a36Sopenharmony_ci} 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_cistatic void ovl_map_ino(struct inode *inode, unsigned long ino, int fsid) 97362306a36Sopenharmony_ci{ 97462306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(inode->i_sb); 97562306a36Sopenharmony_ci int xinobits = ovl_xino_bits(ofs); 97662306a36Sopenharmony_ci unsigned int xinoshift = 64 - xinobits; 97762306a36Sopenharmony_ci 97862306a36Sopenharmony_ci /* 97962306a36Sopenharmony_ci * When d_ino is consistent with st_ino (samefs or i_ino has enough 98062306a36Sopenharmony_ci * bits to encode layer), set the same value used for st_ino to i_ino, 98162306a36Sopenharmony_ci * so inode number exposed via /proc/locks and a like will be 98262306a36Sopenharmony_ci * consistent with d_ino and st_ino values. An i_ino value inconsistent 98362306a36Sopenharmony_ci * with d_ino also causes nfsd readdirplus to fail. 98462306a36Sopenharmony_ci */ 98562306a36Sopenharmony_ci inode->i_ino = ino; 98662306a36Sopenharmony_ci if (ovl_same_fs(ofs)) { 98762306a36Sopenharmony_ci return; 98862306a36Sopenharmony_ci } else if (xinobits && likely(!(ino >> xinoshift))) { 98962306a36Sopenharmony_ci inode->i_ino |= (unsigned long)fsid << (xinoshift + 1); 99062306a36Sopenharmony_ci return; 99162306a36Sopenharmony_ci } 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci /* 99462306a36Sopenharmony_ci * For directory inodes on non-samefs with xino disabled or xino 99562306a36Sopenharmony_ci * overflow, we allocate a non-persistent inode number, to be used for 99662306a36Sopenharmony_ci * resolving st_ino collisions in ovl_map_dev_ino(). 99762306a36Sopenharmony_ci * 99862306a36Sopenharmony_ci * To avoid ino collision with legitimate xino values from upper 99962306a36Sopenharmony_ci * layer (fsid 0), use the lowest xinobit to map the non 100062306a36Sopenharmony_ci * persistent inode numbers to the unified st_ino address space. 100162306a36Sopenharmony_ci */ 100262306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 100362306a36Sopenharmony_ci ovl_next_ino(inode); 100462306a36Sopenharmony_ci if (xinobits) { 100562306a36Sopenharmony_ci inode->i_ino &= ~0UL >> xinobits; 100662306a36Sopenharmony_ci inode->i_ino |= 1UL << xinoshift; 100762306a36Sopenharmony_ci } 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci} 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_civoid ovl_inode_init(struct inode *inode, struct ovl_inode_params *oip, 101262306a36Sopenharmony_ci unsigned long ino, int fsid) 101362306a36Sopenharmony_ci{ 101462306a36Sopenharmony_ci struct inode *realinode; 101562306a36Sopenharmony_ci struct ovl_inode *oi = OVL_I(inode); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci oi->__upperdentry = oip->upperdentry; 101862306a36Sopenharmony_ci oi->oe = oip->oe; 101962306a36Sopenharmony_ci oi->redirect = oip->redirect; 102062306a36Sopenharmony_ci oi->lowerdata_redirect = oip->lowerdata_redirect; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci realinode = ovl_inode_real(inode); 102362306a36Sopenharmony_ci ovl_copyattr(inode); 102462306a36Sopenharmony_ci ovl_copyflags(realinode, inode); 102562306a36Sopenharmony_ci ovl_map_ino(inode, ino, fsid); 102662306a36Sopenharmony_ci} 102762306a36Sopenharmony_ci 102862306a36Sopenharmony_cistatic void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev) 102962306a36Sopenharmony_ci{ 103062306a36Sopenharmony_ci inode->i_mode = mode; 103162306a36Sopenharmony_ci inode->i_flags |= S_NOCMTIME; 103262306a36Sopenharmony_ci#ifdef CONFIG_FS_POSIX_ACL 103362306a36Sopenharmony_ci inode->i_acl = inode->i_default_acl = ACL_DONT_CACHE; 103462306a36Sopenharmony_ci#endif 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci ovl_lockdep_annotate_inode_mutex_key(inode); 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci switch (mode & S_IFMT) { 103962306a36Sopenharmony_ci case S_IFREG: 104062306a36Sopenharmony_ci inode->i_op = &ovl_file_inode_operations; 104162306a36Sopenharmony_ci inode->i_fop = &ovl_file_operations; 104262306a36Sopenharmony_ci inode->i_mapping->a_ops = &ovl_aops; 104362306a36Sopenharmony_ci break; 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_ci case S_IFDIR: 104662306a36Sopenharmony_ci inode->i_op = &ovl_dir_inode_operations; 104762306a36Sopenharmony_ci inode->i_fop = &ovl_dir_operations; 104862306a36Sopenharmony_ci break; 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci case S_IFLNK: 105162306a36Sopenharmony_ci inode->i_op = &ovl_symlink_inode_operations; 105262306a36Sopenharmony_ci break; 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci default: 105562306a36Sopenharmony_ci inode->i_op = &ovl_special_inode_operations; 105662306a36Sopenharmony_ci init_special_inode(inode, mode, rdev); 105762306a36Sopenharmony_ci break; 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci} 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci/* 106262306a36Sopenharmony_ci * With inodes index enabled, an overlay inode nlink counts the union of upper 106362306a36Sopenharmony_ci * hardlinks and non-covered lower hardlinks. During the lifetime of a non-pure 106462306a36Sopenharmony_ci * upper inode, the following nlink modifying operations can happen: 106562306a36Sopenharmony_ci * 106662306a36Sopenharmony_ci * 1. Lower hardlink copy up 106762306a36Sopenharmony_ci * 2. Upper hardlink created, unlinked or renamed over 106862306a36Sopenharmony_ci * 3. Lower hardlink whiteout or renamed over 106962306a36Sopenharmony_ci * 107062306a36Sopenharmony_ci * For the first, copy up case, the union nlink does not change, whether the 107162306a36Sopenharmony_ci * operation succeeds or fails, but the upper inode nlink may change. 107262306a36Sopenharmony_ci * Therefore, before copy up, we store the union nlink value relative to the 107362306a36Sopenharmony_ci * lower inode nlink in the index inode xattr .overlay.nlink. 107462306a36Sopenharmony_ci * 107562306a36Sopenharmony_ci * For the second, upper hardlink case, the union nlink should be incremented 107662306a36Sopenharmony_ci * or decremented IFF the operation succeeds, aligned with nlink change of the 107762306a36Sopenharmony_ci * upper inode. Therefore, before link/unlink/rename, we store the union nlink 107862306a36Sopenharmony_ci * value relative to the upper inode nlink in the index inode. 107962306a36Sopenharmony_ci * 108062306a36Sopenharmony_ci * For the last, lower cover up case, we simplify things by preceding the 108162306a36Sopenharmony_ci * whiteout or cover up with copy up. This makes sure that there is an index 108262306a36Sopenharmony_ci * upper inode where the nlink xattr can be stored before the copied up upper 108362306a36Sopenharmony_ci * entry is unlink. 108462306a36Sopenharmony_ci */ 108562306a36Sopenharmony_ci#define OVL_NLINK_ADD_UPPER (1 << 0) 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci/* 108862306a36Sopenharmony_ci * On-disk format for indexed nlink: 108962306a36Sopenharmony_ci * 109062306a36Sopenharmony_ci * nlink relative to the upper inode - "U[+-]NUM" 109162306a36Sopenharmony_ci * nlink relative to the lower inode - "L[+-]NUM" 109262306a36Sopenharmony_ci */ 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_cistatic int ovl_set_nlink_common(struct dentry *dentry, 109562306a36Sopenharmony_ci struct dentry *realdentry, const char *format) 109662306a36Sopenharmony_ci{ 109762306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 109862306a36Sopenharmony_ci struct inode *realinode = d_inode(realdentry); 109962306a36Sopenharmony_ci char buf[13]; 110062306a36Sopenharmony_ci int len; 110162306a36Sopenharmony_ci 110262306a36Sopenharmony_ci len = snprintf(buf, sizeof(buf), format, 110362306a36Sopenharmony_ci (int) (inode->i_nlink - realinode->i_nlink)); 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci if (WARN_ON(len >= sizeof(buf))) 110662306a36Sopenharmony_ci return -EIO; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci return ovl_setxattr(OVL_FS(inode->i_sb), ovl_dentry_upper(dentry), 110962306a36Sopenharmony_ci OVL_XATTR_NLINK, buf, len); 111062306a36Sopenharmony_ci} 111162306a36Sopenharmony_ci 111262306a36Sopenharmony_ciint ovl_set_nlink_upper(struct dentry *dentry) 111362306a36Sopenharmony_ci{ 111462306a36Sopenharmony_ci return ovl_set_nlink_common(dentry, ovl_dentry_upper(dentry), "U%+i"); 111562306a36Sopenharmony_ci} 111662306a36Sopenharmony_ci 111762306a36Sopenharmony_ciint ovl_set_nlink_lower(struct dentry *dentry) 111862306a36Sopenharmony_ci{ 111962306a36Sopenharmony_ci return ovl_set_nlink_common(dentry, ovl_dentry_lower(dentry), "L%+i"); 112062306a36Sopenharmony_ci} 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ciunsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry, 112362306a36Sopenharmony_ci struct dentry *upperdentry, 112462306a36Sopenharmony_ci unsigned int fallback) 112562306a36Sopenharmony_ci{ 112662306a36Sopenharmony_ci int nlink_diff; 112762306a36Sopenharmony_ci int nlink; 112862306a36Sopenharmony_ci char buf[13]; 112962306a36Sopenharmony_ci int err; 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) 113262306a36Sopenharmony_ci return fallback; 113362306a36Sopenharmony_ci 113462306a36Sopenharmony_ci err = ovl_getxattr_upper(ofs, upperdentry, OVL_XATTR_NLINK, 113562306a36Sopenharmony_ci &buf, sizeof(buf) - 1); 113662306a36Sopenharmony_ci if (err < 0) 113762306a36Sopenharmony_ci goto fail; 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci buf[err] = '\0'; 114062306a36Sopenharmony_ci if ((buf[0] != 'L' && buf[0] != 'U') || 114162306a36Sopenharmony_ci (buf[1] != '+' && buf[1] != '-')) 114262306a36Sopenharmony_ci goto fail; 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci err = kstrtoint(buf + 1, 10, &nlink_diff); 114562306a36Sopenharmony_ci if (err < 0) 114662306a36Sopenharmony_ci goto fail; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci nlink = d_inode(buf[0] == 'L' ? lowerdentry : upperdentry)->i_nlink; 114962306a36Sopenharmony_ci nlink += nlink_diff; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci if (nlink <= 0) 115262306a36Sopenharmony_ci goto fail; 115362306a36Sopenharmony_ci 115462306a36Sopenharmony_ci return nlink; 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_cifail: 115762306a36Sopenharmony_ci pr_warn_ratelimited("failed to get index nlink (%pd2, err=%i)\n", 115862306a36Sopenharmony_ci upperdentry, err); 115962306a36Sopenharmony_ci return fallback; 116062306a36Sopenharmony_ci} 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_cistruct inode *ovl_new_inode(struct super_block *sb, umode_t mode, dev_t rdev) 116362306a36Sopenharmony_ci{ 116462306a36Sopenharmony_ci struct inode *inode; 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci inode = new_inode(sb); 116762306a36Sopenharmony_ci if (inode) 116862306a36Sopenharmony_ci ovl_fill_inode(inode, mode, rdev); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci return inode; 117162306a36Sopenharmony_ci} 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_cistatic int ovl_inode_test(struct inode *inode, void *data) 117462306a36Sopenharmony_ci{ 117562306a36Sopenharmony_ci return inode->i_private == data; 117662306a36Sopenharmony_ci} 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_cistatic int ovl_inode_set(struct inode *inode, void *data) 117962306a36Sopenharmony_ci{ 118062306a36Sopenharmony_ci inode->i_private = data; 118162306a36Sopenharmony_ci return 0; 118262306a36Sopenharmony_ci} 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_cistatic bool ovl_verify_inode(struct inode *inode, struct dentry *lowerdentry, 118562306a36Sopenharmony_ci struct dentry *upperdentry, bool strict) 118662306a36Sopenharmony_ci{ 118762306a36Sopenharmony_ci /* 118862306a36Sopenharmony_ci * For directories, @strict verify from lookup path performs consistency 118962306a36Sopenharmony_ci * checks, so NULL lower/upper in dentry must match NULL lower/upper in 119062306a36Sopenharmony_ci * inode. Non @strict verify from NFS handle decode path passes NULL for 119162306a36Sopenharmony_ci * 'unknown' lower/upper. 119262306a36Sopenharmony_ci */ 119362306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode) && strict) { 119462306a36Sopenharmony_ci /* Real lower dir moved to upper layer under us? */ 119562306a36Sopenharmony_ci if (!lowerdentry && ovl_inode_lower(inode)) 119662306a36Sopenharmony_ci return false; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci /* Lookup of an uncovered redirect origin? */ 119962306a36Sopenharmony_ci if (!upperdentry && ovl_inode_upper(inode)) 120062306a36Sopenharmony_ci return false; 120162306a36Sopenharmony_ci } 120262306a36Sopenharmony_ci 120362306a36Sopenharmony_ci /* 120462306a36Sopenharmony_ci * Allow non-NULL lower inode in ovl_inode even if lowerdentry is NULL. 120562306a36Sopenharmony_ci * This happens when finding a copied up overlay inode for a renamed 120662306a36Sopenharmony_ci * or hardlinked overlay dentry and lower dentry cannot be followed 120762306a36Sopenharmony_ci * by origin because lower fs does not support file handles. 120862306a36Sopenharmony_ci */ 120962306a36Sopenharmony_ci if (lowerdentry && ovl_inode_lower(inode) != d_inode(lowerdentry)) 121062306a36Sopenharmony_ci return false; 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci /* 121362306a36Sopenharmony_ci * Allow non-NULL __upperdentry in inode even if upperdentry is NULL. 121462306a36Sopenharmony_ci * This happens when finding a lower alias for a copied up hard link. 121562306a36Sopenharmony_ci */ 121662306a36Sopenharmony_ci if (upperdentry && ovl_inode_upper(inode) != d_inode(upperdentry)) 121762306a36Sopenharmony_ci return false; 121862306a36Sopenharmony_ci 121962306a36Sopenharmony_ci return true; 122062306a36Sopenharmony_ci} 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_cistruct inode *ovl_lookup_inode(struct super_block *sb, struct dentry *real, 122362306a36Sopenharmony_ci bool is_upper) 122462306a36Sopenharmony_ci{ 122562306a36Sopenharmony_ci struct inode *inode, *key = d_inode(real); 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci inode = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 122862306a36Sopenharmony_ci if (!inode) 122962306a36Sopenharmony_ci return NULL; 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci if (!ovl_verify_inode(inode, is_upper ? NULL : real, 123262306a36Sopenharmony_ci is_upper ? real : NULL, false)) { 123362306a36Sopenharmony_ci iput(inode); 123462306a36Sopenharmony_ci return ERR_PTR(-ESTALE); 123562306a36Sopenharmony_ci } 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_ci return inode; 123862306a36Sopenharmony_ci} 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_cibool ovl_lookup_trap_inode(struct super_block *sb, struct dentry *dir) 124162306a36Sopenharmony_ci{ 124262306a36Sopenharmony_ci struct inode *key = d_inode(dir); 124362306a36Sopenharmony_ci struct inode *trap; 124462306a36Sopenharmony_ci bool res; 124562306a36Sopenharmony_ci 124662306a36Sopenharmony_ci trap = ilookup5(sb, (unsigned long) key, ovl_inode_test, key); 124762306a36Sopenharmony_ci if (!trap) 124862306a36Sopenharmony_ci return false; 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci res = IS_DEADDIR(trap) && !ovl_inode_upper(trap) && 125162306a36Sopenharmony_ci !ovl_inode_lower(trap); 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci iput(trap); 125462306a36Sopenharmony_ci return res; 125562306a36Sopenharmony_ci} 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci/* 125862306a36Sopenharmony_ci * Create an inode cache entry for layer root dir, that will intentionally 125962306a36Sopenharmony_ci * fail ovl_verify_inode(), so any lookup that will find some layer root 126062306a36Sopenharmony_ci * will fail. 126162306a36Sopenharmony_ci */ 126262306a36Sopenharmony_cistruct inode *ovl_get_trap_inode(struct super_block *sb, struct dentry *dir) 126362306a36Sopenharmony_ci{ 126462306a36Sopenharmony_ci struct inode *key = d_inode(dir); 126562306a36Sopenharmony_ci struct inode *trap; 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci if (!d_is_dir(dir)) 126862306a36Sopenharmony_ci return ERR_PTR(-ENOTDIR); 126962306a36Sopenharmony_ci 127062306a36Sopenharmony_ci trap = iget5_locked(sb, (unsigned long) key, ovl_inode_test, 127162306a36Sopenharmony_ci ovl_inode_set, key); 127262306a36Sopenharmony_ci if (!trap) 127362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci if (!(trap->i_state & I_NEW)) { 127662306a36Sopenharmony_ci /* Conflicting layer roots? */ 127762306a36Sopenharmony_ci iput(trap); 127862306a36Sopenharmony_ci return ERR_PTR(-ELOOP); 127962306a36Sopenharmony_ci } 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci trap->i_mode = S_IFDIR; 128262306a36Sopenharmony_ci trap->i_flags = S_DEAD; 128362306a36Sopenharmony_ci unlock_new_inode(trap); 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci return trap; 128662306a36Sopenharmony_ci} 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci/* 128962306a36Sopenharmony_ci * Does overlay inode need to be hashed by lower inode? 129062306a36Sopenharmony_ci */ 129162306a36Sopenharmony_cistatic bool ovl_hash_bylower(struct super_block *sb, struct dentry *upper, 129262306a36Sopenharmony_ci struct dentry *lower, bool index) 129362306a36Sopenharmony_ci{ 129462306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(sb); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci /* No, if pure upper */ 129762306a36Sopenharmony_ci if (!lower) 129862306a36Sopenharmony_ci return false; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci /* Yes, if already indexed */ 130162306a36Sopenharmony_ci if (index) 130262306a36Sopenharmony_ci return true; 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci /* Yes, if won't be copied up */ 130562306a36Sopenharmony_ci if (!ovl_upper_mnt(ofs)) 130662306a36Sopenharmony_ci return true; 130762306a36Sopenharmony_ci 130862306a36Sopenharmony_ci /* No, if lower hardlink is or will be broken on copy up */ 130962306a36Sopenharmony_ci if ((upper || !ovl_indexdir(sb)) && 131062306a36Sopenharmony_ci !d_is_dir(lower) && d_inode(lower)->i_nlink > 1) 131162306a36Sopenharmony_ci return false; 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci /* No, if non-indexed upper with NFS export */ 131462306a36Sopenharmony_ci if (ofs->config.nfs_export && upper) 131562306a36Sopenharmony_ci return false; 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci /* Otherwise, hash by lower inode for fsnotify */ 131862306a36Sopenharmony_ci return true; 131962306a36Sopenharmony_ci} 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_cistatic struct inode *ovl_iget5(struct super_block *sb, struct inode *newinode, 132262306a36Sopenharmony_ci struct inode *key) 132362306a36Sopenharmony_ci{ 132462306a36Sopenharmony_ci return newinode ? inode_insert5(newinode, (unsigned long) key, 132562306a36Sopenharmony_ci ovl_inode_test, ovl_inode_set, key) : 132662306a36Sopenharmony_ci iget5_locked(sb, (unsigned long) key, 132762306a36Sopenharmony_ci ovl_inode_test, ovl_inode_set, key); 132862306a36Sopenharmony_ci} 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_cistruct inode *ovl_get_inode(struct super_block *sb, 133162306a36Sopenharmony_ci struct ovl_inode_params *oip) 133262306a36Sopenharmony_ci{ 133362306a36Sopenharmony_ci struct ovl_fs *ofs = OVL_FS(sb); 133462306a36Sopenharmony_ci struct dentry *upperdentry = oip->upperdentry; 133562306a36Sopenharmony_ci struct ovl_path *lowerpath = ovl_lowerpath(oip->oe); 133662306a36Sopenharmony_ci struct inode *realinode = upperdentry ? d_inode(upperdentry) : NULL; 133762306a36Sopenharmony_ci struct inode *inode; 133862306a36Sopenharmony_ci struct dentry *lowerdentry = lowerpath ? lowerpath->dentry : NULL; 133962306a36Sopenharmony_ci struct path realpath = { 134062306a36Sopenharmony_ci .dentry = upperdentry ?: lowerdentry, 134162306a36Sopenharmony_ci .mnt = upperdentry ? ovl_upper_mnt(ofs) : lowerpath->layer->mnt, 134262306a36Sopenharmony_ci }; 134362306a36Sopenharmony_ci bool bylower = ovl_hash_bylower(sb, upperdentry, lowerdentry, 134462306a36Sopenharmony_ci oip->index); 134562306a36Sopenharmony_ci int fsid = bylower ? lowerpath->layer->fsid : 0; 134662306a36Sopenharmony_ci bool is_dir; 134762306a36Sopenharmony_ci unsigned long ino = 0; 134862306a36Sopenharmony_ci int err = oip->newinode ? -EEXIST : -ENOMEM; 134962306a36Sopenharmony_ci 135062306a36Sopenharmony_ci if (!realinode) 135162306a36Sopenharmony_ci realinode = d_inode(lowerdentry); 135262306a36Sopenharmony_ci 135362306a36Sopenharmony_ci /* 135462306a36Sopenharmony_ci * Copy up origin (lower) may exist for non-indexed upper, but we must 135562306a36Sopenharmony_ci * not use lower as hash key if this is a broken hardlink. 135662306a36Sopenharmony_ci */ 135762306a36Sopenharmony_ci is_dir = S_ISDIR(realinode->i_mode); 135862306a36Sopenharmony_ci if (upperdentry || bylower) { 135962306a36Sopenharmony_ci struct inode *key = d_inode(bylower ? lowerdentry : 136062306a36Sopenharmony_ci upperdentry); 136162306a36Sopenharmony_ci unsigned int nlink = is_dir ? 1 : realinode->i_nlink; 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci inode = ovl_iget5(sb, oip->newinode, key); 136462306a36Sopenharmony_ci if (!inode) 136562306a36Sopenharmony_ci goto out_err; 136662306a36Sopenharmony_ci if (!(inode->i_state & I_NEW)) { 136762306a36Sopenharmony_ci /* 136862306a36Sopenharmony_ci * Verify that the underlying files stored in the inode 136962306a36Sopenharmony_ci * match those in the dentry. 137062306a36Sopenharmony_ci */ 137162306a36Sopenharmony_ci if (!ovl_verify_inode(inode, lowerdentry, upperdentry, 137262306a36Sopenharmony_ci true)) { 137362306a36Sopenharmony_ci iput(inode); 137462306a36Sopenharmony_ci err = -ESTALE; 137562306a36Sopenharmony_ci goto out_err; 137662306a36Sopenharmony_ci } 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci dput(upperdentry); 137962306a36Sopenharmony_ci ovl_free_entry(oip->oe); 138062306a36Sopenharmony_ci kfree(oip->redirect); 138162306a36Sopenharmony_ci kfree(oip->lowerdata_redirect); 138262306a36Sopenharmony_ci goto out; 138362306a36Sopenharmony_ci } 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci /* Recalculate nlink for non-dir due to indexing */ 138662306a36Sopenharmony_ci if (!is_dir) 138762306a36Sopenharmony_ci nlink = ovl_get_nlink(ofs, lowerdentry, upperdentry, 138862306a36Sopenharmony_ci nlink); 138962306a36Sopenharmony_ci set_nlink(inode, nlink); 139062306a36Sopenharmony_ci ino = key->i_ino; 139162306a36Sopenharmony_ci } else { 139262306a36Sopenharmony_ci /* Lower hardlink that will be broken on copy up */ 139362306a36Sopenharmony_ci inode = new_inode(sb); 139462306a36Sopenharmony_ci if (!inode) { 139562306a36Sopenharmony_ci err = -ENOMEM; 139662306a36Sopenharmony_ci goto out_err; 139762306a36Sopenharmony_ci } 139862306a36Sopenharmony_ci ino = realinode->i_ino; 139962306a36Sopenharmony_ci fsid = lowerpath->layer->fsid; 140062306a36Sopenharmony_ci } 140162306a36Sopenharmony_ci ovl_fill_inode(inode, realinode->i_mode, realinode->i_rdev); 140262306a36Sopenharmony_ci ovl_inode_init(inode, oip, ino, fsid); 140362306a36Sopenharmony_ci 140462306a36Sopenharmony_ci if (upperdentry && ovl_is_impuredir(sb, upperdentry)) 140562306a36Sopenharmony_ci ovl_set_flag(OVL_IMPURE, inode); 140662306a36Sopenharmony_ci 140762306a36Sopenharmony_ci if (oip->index) 140862306a36Sopenharmony_ci ovl_set_flag(OVL_INDEX, inode); 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci if (bylower) 141162306a36Sopenharmony_ci ovl_set_flag(OVL_CONST_INO, inode); 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_ci /* Check for non-merge dir that may have whiteouts */ 141462306a36Sopenharmony_ci if (is_dir) { 141562306a36Sopenharmony_ci if (((upperdentry && lowerdentry) || ovl_numlower(oip->oe) > 1) || 141662306a36Sopenharmony_ci ovl_path_check_origin_xattr(ofs, &realpath)) { 141762306a36Sopenharmony_ci ovl_set_flag(OVL_WHITEOUTS, inode); 141862306a36Sopenharmony_ci } 141962306a36Sopenharmony_ci } 142062306a36Sopenharmony_ci 142162306a36Sopenharmony_ci /* Check for immutable/append-only inode flags in xattr */ 142262306a36Sopenharmony_ci if (upperdentry) 142362306a36Sopenharmony_ci ovl_check_protattr(inode, upperdentry); 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci if (inode->i_state & I_NEW) 142662306a36Sopenharmony_ci unlock_new_inode(inode); 142762306a36Sopenharmony_ciout: 142862306a36Sopenharmony_ci return inode; 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ciout_err: 143162306a36Sopenharmony_ci pr_warn_ratelimited("failed to get inode (%i)\n", err); 143262306a36Sopenharmony_ci inode = ERR_PTR(err); 143362306a36Sopenharmony_ci goto out; 143462306a36Sopenharmony_ci} 1435