162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/module.h> 562306a36Sopenharmony_ci#include <linux/fs.h> 662306a36Sopenharmony_ci#include <linux/slab.h> 762306a36Sopenharmony_ci#include <linux/string.h> 862306a36Sopenharmony_ci#include <linux/uaccess.h> 962306a36Sopenharmony_ci#include <linux/kernel.h> 1062306a36Sopenharmony_ci#include <linux/writeback.h> 1162306a36Sopenharmony_ci#include <linux/vmalloc.h> 1262306a36Sopenharmony_ci#include <linux/xattr.h> 1362306a36Sopenharmony_ci#include <linux/posix_acl.h> 1462306a36Sopenharmony_ci#include <linux/random.h> 1562306a36Sopenharmony_ci#include <linux/sort.h> 1662306a36Sopenharmony_ci#include <linux/iversion.h> 1762306a36Sopenharmony_ci#include <linux/fscrypt.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include "super.h" 2062306a36Sopenharmony_ci#include "mds_client.h" 2162306a36Sopenharmony_ci#include "cache.h" 2262306a36Sopenharmony_ci#include "crypto.h" 2362306a36Sopenharmony_ci#include <linux/ceph/decode.h> 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/* 2662306a36Sopenharmony_ci * Ceph inode operations 2762306a36Sopenharmony_ci * 2862306a36Sopenharmony_ci * Implement basic inode helpers (get, alloc) and inode ops (getattr, 2962306a36Sopenharmony_ci * setattr, etc.), xattr helpers, and helpers for assimilating 3062306a36Sopenharmony_ci * metadata returned by the MDS into our cache. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci * Also define helpers for doing asynchronous writeback, invalidation, 3362306a36Sopenharmony_ci * and truncation for the benefit of those who can't afford to block 3462306a36Sopenharmony_ci * (typically because they are in the message handler path). 3562306a36Sopenharmony_ci */ 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cistatic const struct inode_operations ceph_symlink_iops; 3862306a36Sopenharmony_cistatic const struct inode_operations ceph_encrypted_symlink_iops; 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistatic void ceph_inode_work(struct work_struct *work); 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci/* 4362306a36Sopenharmony_ci * find or create an inode, given the ceph ino number 4462306a36Sopenharmony_ci */ 4562306a36Sopenharmony_cistatic int ceph_set_ino_cb(struct inode *inode, void *data) 4662306a36Sopenharmony_ci{ 4762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 4862306a36Sopenharmony_ci struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci ci->i_vino = *(struct ceph_vino *)data; 5162306a36Sopenharmony_ci inode->i_ino = ceph_vino_to_ino_t(ci->i_vino); 5262306a36Sopenharmony_ci inode_set_iversion_raw(inode, 0); 5362306a36Sopenharmony_ci percpu_counter_inc(&mdsc->metric.total_inodes); 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci return 0; 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci/** 5962306a36Sopenharmony_ci * ceph_new_inode - allocate a new inode in advance of an expected create 6062306a36Sopenharmony_ci * @dir: parent directory for new inode 6162306a36Sopenharmony_ci * @dentry: dentry that may eventually point to new inode 6262306a36Sopenharmony_ci * @mode: mode of new inode 6362306a36Sopenharmony_ci * @as_ctx: pointer to inherited security context 6462306a36Sopenharmony_ci * 6562306a36Sopenharmony_ci * Allocate a new inode in advance of an operation to create a new inode. 6662306a36Sopenharmony_ci * This allocates the inode and sets up the acl_sec_ctx with appropriate 6762306a36Sopenharmony_ci * info for the new inode. 6862306a36Sopenharmony_ci * 6962306a36Sopenharmony_ci * Returns a pointer to the new inode or an ERR_PTR. 7062306a36Sopenharmony_ci */ 7162306a36Sopenharmony_cistruct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry, 7262306a36Sopenharmony_ci umode_t *mode, struct ceph_acl_sec_ctx *as_ctx) 7362306a36Sopenharmony_ci{ 7462306a36Sopenharmony_ci int err; 7562306a36Sopenharmony_ci struct inode *inode; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci inode = new_inode(dir->i_sb); 7862306a36Sopenharmony_ci if (!inode) 7962306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci if (!S_ISLNK(*mode)) { 8262306a36Sopenharmony_ci err = ceph_pre_init_acls(dir, mode, as_ctx); 8362306a36Sopenharmony_ci if (err < 0) 8462306a36Sopenharmony_ci goto out_err; 8562306a36Sopenharmony_ci } 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci inode->i_state = 0; 8862306a36Sopenharmony_ci inode->i_mode = *mode; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci err = ceph_security_init_secctx(dentry, *mode, as_ctx); 9162306a36Sopenharmony_ci if (err < 0) 9262306a36Sopenharmony_ci goto out_err; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci /* 9562306a36Sopenharmony_ci * We'll skip setting fscrypt context for snapshots, leaving that for 9662306a36Sopenharmony_ci * the handle_reply(). 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci if (ceph_snap(dir) != CEPH_SNAPDIR) { 9962306a36Sopenharmony_ci err = ceph_fscrypt_prepare_context(dir, inode, as_ctx); 10062306a36Sopenharmony_ci if (err) 10162306a36Sopenharmony_ci goto out_err; 10262306a36Sopenharmony_ci } 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci return inode; 10562306a36Sopenharmony_ciout_err: 10662306a36Sopenharmony_ci iput(inode); 10762306a36Sopenharmony_ci return ERR_PTR(err); 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_civoid ceph_as_ctx_to_req(struct ceph_mds_request *req, 11162306a36Sopenharmony_ci struct ceph_acl_sec_ctx *as_ctx) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci if (as_ctx->pagelist) { 11462306a36Sopenharmony_ci req->r_pagelist = as_ctx->pagelist; 11562306a36Sopenharmony_ci as_ctx->pagelist = NULL; 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci ceph_fscrypt_as_ctx_to_req(req, as_ctx); 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/** 12162306a36Sopenharmony_ci * ceph_get_inode - find or create/hash a new inode 12262306a36Sopenharmony_ci * @sb: superblock to search and allocate in 12362306a36Sopenharmony_ci * @vino: vino to search for 12462306a36Sopenharmony_ci * @newino: optional new inode to insert if one isn't found (may be NULL) 12562306a36Sopenharmony_ci * 12662306a36Sopenharmony_ci * Search for or insert a new inode into the hash for the given vino, and 12762306a36Sopenharmony_ci * return a reference to it. If new is non-NULL, its reference is consumed. 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_cistruct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino, 13062306a36Sopenharmony_ci struct inode *newino) 13162306a36Sopenharmony_ci{ 13262306a36Sopenharmony_ci struct inode *inode; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci if (ceph_vino_is_reserved(vino)) 13562306a36Sopenharmony_ci return ERR_PTR(-EREMOTEIO); 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci if (newino) { 13862306a36Sopenharmony_ci inode = inode_insert5(newino, (unsigned long)vino.ino, 13962306a36Sopenharmony_ci ceph_ino_compare, ceph_set_ino_cb, &vino); 14062306a36Sopenharmony_ci if (inode != newino) 14162306a36Sopenharmony_ci iput(newino); 14262306a36Sopenharmony_ci } else { 14362306a36Sopenharmony_ci inode = iget5_locked(sb, (unsigned long)vino.ino, 14462306a36Sopenharmony_ci ceph_ino_compare, ceph_set_ino_cb, &vino); 14562306a36Sopenharmony_ci } 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci if (!inode) { 14862306a36Sopenharmony_ci dout("No inode found for %llx.%llx\n", vino.ino, vino.snap); 14962306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 15062306a36Sopenharmony_ci } 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode), 15362306a36Sopenharmony_ci ceph_vinop(inode), inode, !!(inode->i_state & I_NEW)); 15462306a36Sopenharmony_ci return inode; 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci/* 15862306a36Sopenharmony_ci * get/constuct snapdir inode for a given directory 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_cistruct inode *ceph_get_snapdir(struct inode *parent) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci struct ceph_vino vino = { 16362306a36Sopenharmony_ci .ino = ceph_ino(parent), 16462306a36Sopenharmony_ci .snap = CEPH_SNAPDIR, 16562306a36Sopenharmony_ci }; 16662306a36Sopenharmony_ci struct inode *inode = ceph_get_inode(parent->i_sb, vino, NULL); 16762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 16862306a36Sopenharmony_ci int ret = -ENOTDIR; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci if (IS_ERR(inode)) 17162306a36Sopenharmony_ci return inode; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (!S_ISDIR(parent->i_mode)) { 17462306a36Sopenharmony_ci pr_warn_once("bad snapdir parent type (mode=0%o)\n", 17562306a36Sopenharmony_ci parent->i_mode); 17662306a36Sopenharmony_ci goto err; 17762306a36Sopenharmony_ci } 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci if (!(inode->i_state & I_NEW) && !S_ISDIR(inode->i_mode)) { 18062306a36Sopenharmony_ci pr_warn_once("bad snapdir inode type (mode=0%o)\n", 18162306a36Sopenharmony_ci inode->i_mode); 18262306a36Sopenharmony_ci goto err; 18362306a36Sopenharmony_ci } 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci inode->i_mode = parent->i_mode; 18662306a36Sopenharmony_ci inode->i_uid = parent->i_uid; 18762306a36Sopenharmony_ci inode->i_gid = parent->i_gid; 18862306a36Sopenharmony_ci inode->i_mtime = parent->i_mtime; 18962306a36Sopenharmony_ci inode_set_ctime_to_ts(inode, inode_get_ctime(parent)); 19062306a36Sopenharmony_ci inode->i_atime = parent->i_atime; 19162306a36Sopenharmony_ci ci->i_rbytes = 0; 19262306a36Sopenharmony_ci ci->i_btime = ceph_inode(parent)->i_btime; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 19562306a36Sopenharmony_ci /* if encrypted, just borrow fscrypt_auth from parent */ 19662306a36Sopenharmony_ci if (IS_ENCRYPTED(parent)) { 19762306a36Sopenharmony_ci struct ceph_inode_info *pci = ceph_inode(parent); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci ci->fscrypt_auth = kmemdup(pci->fscrypt_auth, 20062306a36Sopenharmony_ci pci->fscrypt_auth_len, 20162306a36Sopenharmony_ci GFP_KERNEL); 20262306a36Sopenharmony_ci if (ci->fscrypt_auth) { 20362306a36Sopenharmony_ci inode->i_flags |= S_ENCRYPTED; 20462306a36Sopenharmony_ci ci->fscrypt_auth_len = pci->fscrypt_auth_len; 20562306a36Sopenharmony_ci } else { 20662306a36Sopenharmony_ci dout("Failed to alloc snapdir fscrypt_auth\n"); 20762306a36Sopenharmony_ci ret = -ENOMEM; 20862306a36Sopenharmony_ci goto err; 20962306a36Sopenharmony_ci } 21062306a36Sopenharmony_ci } 21162306a36Sopenharmony_ci#endif 21262306a36Sopenharmony_ci if (inode->i_state & I_NEW) { 21362306a36Sopenharmony_ci inode->i_op = &ceph_snapdir_iops; 21462306a36Sopenharmony_ci inode->i_fop = &ceph_snapdir_fops; 21562306a36Sopenharmony_ci ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */ 21662306a36Sopenharmony_ci unlock_new_inode(inode); 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci return inode; 22062306a36Sopenharmony_cierr: 22162306a36Sopenharmony_ci if ((inode->i_state & I_NEW)) 22262306a36Sopenharmony_ci discard_new_inode(inode); 22362306a36Sopenharmony_ci else 22462306a36Sopenharmony_ci iput(inode); 22562306a36Sopenharmony_ci return ERR_PTR(ret); 22662306a36Sopenharmony_ci} 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ciconst struct inode_operations ceph_file_iops = { 22962306a36Sopenharmony_ci .permission = ceph_permission, 23062306a36Sopenharmony_ci .setattr = ceph_setattr, 23162306a36Sopenharmony_ci .getattr = ceph_getattr, 23262306a36Sopenharmony_ci .listxattr = ceph_listxattr, 23362306a36Sopenharmony_ci .get_inode_acl = ceph_get_acl, 23462306a36Sopenharmony_ci .set_acl = ceph_set_acl, 23562306a36Sopenharmony_ci}; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci/* 23962306a36Sopenharmony_ci * We use a 'frag tree' to keep track of the MDS's directory fragments 24062306a36Sopenharmony_ci * for a given inode (usually there is just a single fragment). We 24162306a36Sopenharmony_ci * need to know when a child frag is delegated to a new MDS, or when 24262306a36Sopenharmony_ci * it is flagged as replicated, so we can direct our requests 24362306a36Sopenharmony_ci * accordingly. 24462306a36Sopenharmony_ci */ 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci/* 24762306a36Sopenharmony_ci * find/create a frag in the tree 24862306a36Sopenharmony_ci */ 24962306a36Sopenharmony_cistatic struct ceph_inode_frag *__get_or_create_frag(struct ceph_inode_info *ci, 25062306a36Sopenharmony_ci u32 f) 25162306a36Sopenharmony_ci{ 25262306a36Sopenharmony_ci struct rb_node **p; 25362306a36Sopenharmony_ci struct rb_node *parent = NULL; 25462306a36Sopenharmony_ci struct ceph_inode_frag *frag; 25562306a36Sopenharmony_ci int c; 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci p = &ci->i_fragtree.rb_node; 25862306a36Sopenharmony_ci while (*p) { 25962306a36Sopenharmony_ci parent = *p; 26062306a36Sopenharmony_ci frag = rb_entry(parent, struct ceph_inode_frag, node); 26162306a36Sopenharmony_ci c = ceph_frag_compare(f, frag->frag); 26262306a36Sopenharmony_ci if (c < 0) 26362306a36Sopenharmony_ci p = &(*p)->rb_left; 26462306a36Sopenharmony_ci else if (c > 0) 26562306a36Sopenharmony_ci p = &(*p)->rb_right; 26662306a36Sopenharmony_ci else 26762306a36Sopenharmony_ci return frag; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci frag = kmalloc(sizeof(*frag), GFP_NOFS); 27162306a36Sopenharmony_ci if (!frag) 27262306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci frag->frag = f; 27562306a36Sopenharmony_ci frag->split_by = 0; 27662306a36Sopenharmony_ci frag->mds = -1; 27762306a36Sopenharmony_ci frag->ndist = 0; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_ci rb_link_node(&frag->node, parent, p); 28062306a36Sopenharmony_ci rb_insert_color(&frag->node, &ci->i_fragtree); 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci dout("get_or_create_frag added %llx.%llx frag %x\n", 28362306a36Sopenharmony_ci ceph_vinop(&ci->netfs.inode), f); 28462306a36Sopenharmony_ci return frag; 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci/* 28862306a36Sopenharmony_ci * find a specific frag @f 28962306a36Sopenharmony_ci */ 29062306a36Sopenharmony_cistruct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) 29162306a36Sopenharmony_ci{ 29262306a36Sopenharmony_ci struct rb_node *n = ci->i_fragtree.rb_node; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci while (n) { 29562306a36Sopenharmony_ci struct ceph_inode_frag *frag = 29662306a36Sopenharmony_ci rb_entry(n, struct ceph_inode_frag, node); 29762306a36Sopenharmony_ci int c = ceph_frag_compare(f, frag->frag); 29862306a36Sopenharmony_ci if (c < 0) 29962306a36Sopenharmony_ci n = n->rb_left; 30062306a36Sopenharmony_ci else if (c > 0) 30162306a36Sopenharmony_ci n = n->rb_right; 30262306a36Sopenharmony_ci else 30362306a36Sopenharmony_ci return frag; 30462306a36Sopenharmony_ci } 30562306a36Sopenharmony_ci return NULL; 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci/* 30962306a36Sopenharmony_ci * Choose frag containing the given value @v. If @pfrag is 31062306a36Sopenharmony_ci * specified, copy the frag delegation info to the caller if 31162306a36Sopenharmony_ci * it is present. 31262306a36Sopenharmony_ci */ 31362306a36Sopenharmony_cistatic u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, 31462306a36Sopenharmony_ci struct ceph_inode_frag *pfrag, int *found) 31562306a36Sopenharmony_ci{ 31662306a36Sopenharmony_ci u32 t = ceph_frag_make(0, 0); 31762306a36Sopenharmony_ci struct ceph_inode_frag *frag; 31862306a36Sopenharmony_ci unsigned nway, i; 31962306a36Sopenharmony_ci u32 n; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci if (found) 32262306a36Sopenharmony_ci *found = 0; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci while (1) { 32562306a36Sopenharmony_ci WARN_ON(!ceph_frag_contains_value(t, v)); 32662306a36Sopenharmony_ci frag = __ceph_find_frag(ci, t); 32762306a36Sopenharmony_ci if (!frag) 32862306a36Sopenharmony_ci break; /* t is a leaf */ 32962306a36Sopenharmony_ci if (frag->split_by == 0) { 33062306a36Sopenharmony_ci if (pfrag) 33162306a36Sopenharmony_ci memcpy(pfrag, frag, sizeof(*pfrag)); 33262306a36Sopenharmony_ci if (found) 33362306a36Sopenharmony_ci *found = 1; 33462306a36Sopenharmony_ci break; 33562306a36Sopenharmony_ci } 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci /* choose child */ 33862306a36Sopenharmony_ci nway = 1 << frag->split_by; 33962306a36Sopenharmony_ci dout("choose_frag(%x) %x splits by %d (%d ways)\n", v, t, 34062306a36Sopenharmony_ci frag->split_by, nway); 34162306a36Sopenharmony_ci for (i = 0; i < nway; i++) { 34262306a36Sopenharmony_ci n = ceph_frag_make_child(t, frag->split_by, i); 34362306a36Sopenharmony_ci if (ceph_frag_contains_value(n, v)) { 34462306a36Sopenharmony_ci t = n; 34562306a36Sopenharmony_ci break; 34662306a36Sopenharmony_ci } 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci BUG_ON(i == nway); 34962306a36Sopenharmony_ci } 35062306a36Sopenharmony_ci dout("choose_frag(%x) = %x\n", v, t); 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci return t; 35362306a36Sopenharmony_ci} 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ciu32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, 35662306a36Sopenharmony_ci struct ceph_inode_frag *pfrag, int *found) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci u32 ret; 35962306a36Sopenharmony_ci mutex_lock(&ci->i_fragtree_mutex); 36062306a36Sopenharmony_ci ret = __ceph_choose_frag(ci, v, pfrag, found); 36162306a36Sopenharmony_ci mutex_unlock(&ci->i_fragtree_mutex); 36262306a36Sopenharmony_ci return ret; 36362306a36Sopenharmony_ci} 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci/* 36662306a36Sopenharmony_ci * Process dirfrag (delegation) info from the mds. Include leaf 36762306a36Sopenharmony_ci * fragment in tree ONLY if ndist > 0. Otherwise, only 36862306a36Sopenharmony_ci * branches/splits are included in i_fragtree) 36962306a36Sopenharmony_ci */ 37062306a36Sopenharmony_cistatic int ceph_fill_dirfrag(struct inode *inode, 37162306a36Sopenharmony_ci struct ceph_mds_reply_dirfrag *dirinfo) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 37462306a36Sopenharmony_ci struct ceph_inode_frag *frag; 37562306a36Sopenharmony_ci u32 id = le32_to_cpu(dirinfo->frag); 37662306a36Sopenharmony_ci int mds = le32_to_cpu(dirinfo->auth); 37762306a36Sopenharmony_ci int ndist = le32_to_cpu(dirinfo->ndist); 37862306a36Sopenharmony_ci int diri_auth = -1; 37962306a36Sopenharmony_ci int i; 38062306a36Sopenharmony_ci int err = 0; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 38362306a36Sopenharmony_ci if (ci->i_auth_cap) 38462306a36Sopenharmony_ci diri_auth = ci->i_auth_cap->mds; 38562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci if (mds == -1) /* CDIR_AUTH_PARENT */ 38862306a36Sopenharmony_ci mds = diri_auth; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci mutex_lock(&ci->i_fragtree_mutex); 39162306a36Sopenharmony_ci if (ndist == 0 && mds == diri_auth) { 39262306a36Sopenharmony_ci /* no delegation info needed. */ 39362306a36Sopenharmony_ci frag = __ceph_find_frag(ci, id); 39462306a36Sopenharmony_ci if (!frag) 39562306a36Sopenharmony_ci goto out; 39662306a36Sopenharmony_ci if (frag->split_by == 0) { 39762306a36Sopenharmony_ci /* tree leaf, remove */ 39862306a36Sopenharmony_ci dout("fill_dirfrag removed %llx.%llx frag %x" 39962306a36Sopenharmony_ci " (no ref)\n", ceph_vinop(inode), id); 40062306a36Sopenharmony_ci rb_erase(&frag->node, &ci->i_fragtree); 40162306a36Sopenharmony_ci kfree(frag); 40262306a36Sopenharmony_ci } else { 40362306a36Sopenharmony_ci /* tree branch, keep and clear */ 40462306a36Sopenharmony_ci dout("fill_dirfrag cleared %llx.%llx frag %x" 40562306a36Sopenharmony_ci " referral\n", ceph_vinop(inode), id); 40662306a36Sopenharmony_ci frag->mds = -1; 40762306a36Sopenharmony_ci frag->ndist = 0; 40862306a36Sopenharmony_ci } 40962306a36Sopenharmony_ci goto out; 41062306a36Sopenharmony_ci } 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci /* find/add this frag to store mds delegation info */ 41462306a36Sopenharmony_ci frag = __get_or_create_frag(ci, id); 41562306a36Sopenharmony_ci if (IS_ERR(frag)) { 41662306a36Sopenharmony_ci /* this is not the end of the world; we can continue 41762306a36Sopenharmony_ci with bad/inaccurate delegation info */ 41862306a36Sopenharmony_ci pr_err("fill_dirfrag ENOMEM on mds ref %llx.%llx fg %x\n", 41962306a36Sopenharmony_ci ceph_vinop(inode), le32_to_cpu(dirinfo->frag)); 42062306a36Sopenharmony_ci err = -ENOMEM; 42162306a36Sopenharmony_ci goto out; 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci frag->mds = mds; 42562306a36Sopenharmony_ci frag->ndist = min_t(u32, ndist, CEPH_MAX_DIRFRAG_REP); 42662306a36Sopenharmony_ci for (i = 0; i < frag->ndist; i++) 42762306a36Sopenharmony_ci frag->dist[i] = le32_to_cpu(dirinfo->dist[i]); 42862306a36Sopenharmony_ci dout("fill_dirfrag %llx.%llx frag %x ndist=%d\n", 42962306a36Sopenharmony_ci ceph_vinop(inode), frag->frag, frag->ndist); 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ciout: 43262306a36Sopenharmony_ci mutex_unlock(&ci->i_fragtree_mutex); 43362306a36Sopenharmony_ci return err; 43462306a36Sopenharmony_ci} 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_cistatic int frag_tree_split_cmp(const void *l, const void *r) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci struct ceph_frag_tree_split *ls = (struct ceph_frag_tree_split*)l; 43962306a36Sopenharmony_ci struct ceph_frag_tree_split *rs = (struct ceph_frag_tree_split*)r; 44062306a36Sopenharmony_ci return ceph_frag_compare(le32_to_cpu(ls->frag), 44162306a36Sopenharmony_ci le32_to_cpu(rs->frag)); 44262306a36Sopenharmony_ci} 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_cistatic bool is_frag_child(u32 f, struct ceph_inode_frag *frag) 44562306a36Sopenharmony_ci{ 44662306a36Sopenharmony_ci if (!frag) 44762306a36Sopenharmony_ci return f == ceph_frag_make(0, 0); 44862306a36Sopenharmony_ci if (ceph_frag_bits(f) != ceph_frag_bits(frag->frag) + frag->split_by) 44962306a36Sopenharmony_ci return false; 45062306a36Sopenharmony_ci return ceph_frag_contains_value(frag->frag, ceph_frag_value(f)); 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_cistatic int ceph_fill_fragtree(struct inode *inode, 45462306a36Sopenharmony_ci struct ceph_frag_tree_head *fragtree, 45562306a36Sopenharmony_ci struct ceph_mds_reply_dirfrag *dirinfo) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 45862306a36Sopenharmony_ci struct ceph_inode_frag *frag, *prev_frag = NULL; 45962306a36Sopenharmony_ci struct rb_node *rb_node; 46062306a36Sopenharmony_ci unsigned i, split_by, nsplits; 46162306a36Sopenharmony_ci u32 id; 46262306a36Sopenharmony_ci bool update = false; 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci mutex_lock(&ci->i_fragtree_mutex); 46562306a36Sopenharmony_ci nsplits = le32_to_cpu(fragtree->nsplits); 46662306a36Sopenharmony_ci if (nsplits != ci->i_fragtree_nsplits) { 46762306a36Sopenharmony_ci update = true; 46862306a36Sopenharmony_ci } else if (nsplits) { 46962306a36Sopenharmony_ci i = get_random_u32_below(nsplits); 47062306a36Sopenharmony_ci id = le32_to_cpu(fragtree->splits[i].frag); 47162306a36Sopenharmony_ci if (!__ceph_find_frag(ci, id)) 47262306a36Sopenharmony_ci update = true; 47362306a36Sopenharmony_ci } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { 47462306a36Sopenharmony_ci rb_node = rb_first(&ci->i_fragtree); 47562306a36Sopenharmony_ci frag = rb_entry(rb_node, struct ceph_inode_frag, node); 47662306a36Sopenharmony_ci if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) 47762306a36Sopenharmony_ci update = true; 47862306a36Sopenharmony_ci } 47962306a36Sopenharmony_ci if (!update && dirinfo) { 48062306a36Sopenharmony_ci id = le32_to_cpu(dirinfo->frag); 48162306a36Sopenharmony_ci if (id != __ceph_choose_frag(ci, id, NULL, NULL)) 48262306a36Sopenharmony_ci update = true; 48362306a36Sopenharmony_ci } 48462306a36Sopenharmony_ci if (!update) 48562306a36Sopenharmony_ci goto out_unlock; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci if (nsplits > 1) { 48862306a36Sopenharmony_ci sort(fragtree->splits, nsplits, sizeof(fragtree->splits[0]), 48962306a36Sopenharmony_ci frag_tree_split_cmp, NULL); 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); 49362306a36Sopenharmony_ci rb_node = rb_first(&ci->i_fragtree); 49462306a36Sopenharmony_ci for (i = 0; i < nsplits; i++) { 49562306a36Sopenharmony_ci id = le32_to_cpu(fragtree->splits[i].frag); 49662306a36Sopenharmony_ci split_by = le32_to_cpu(fragtree->splits[i].by); 49762306a36Sopenharmony_ci if (split_by == 0 || ceph_frag_bits(id) + split_by > 24) { 49862306a36Sopenharmony_ci pr_err("fill_fragtree %llx.%llx invalid split %d/%u, " 49962306a36Sopenharmony_ci "frag %x split by %d\n", ceph_vinop(inode), 50062306a36Sopenharmony_ci i, nsplits, id, split_by); 50162306a36Sopenharmony_ci continue; 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci frag = NULL; 50462306a36Sopenharmony_ci while (rb_node) { 50562306a36Sopenharmony_ci frag = rb_entry(rb_node, struct ceph_inode_frag, node); 50662306a36Sopenharmony_ci if (ceph_frag_compare(frag->frag, id) >= 0) { 50762306a36Sopenharmony_ci if (frag->frag != id) 50862306a36Sopenharmony_ci frag = NULL; 50962306a36Sopenharmony_ci else 51062306a36Sopenharmony_ci rb_node = rb_next(rb_node); 51162306a36Sopenharmony_ci break; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci rb_node = rb_next(rb_node); 51462306a36Sopenharmony_ci /* delete stale split/leaf node */ 51562306a36Sopenharmony_ci if (frag->split_by > 0 || 51662306a36Sopenharmony_ci !is_frag_child(frag->frag, prev_frag)) { 51762306a36Sopenharmony_ci rb_erase(&frag->node, &ci->i_fragtree); 51862306a36Sopenharmony_ci if (frag->split_by > 0) 51962306a36Sopenharmony_ci ci->i_fragtree_nsplits--; 52062306a36Sopenharmony_ci kfree(frag); 52162306a36Sopenharmony_ci } 52262306a36Sopenharmony_ci frag = NULL; 52362306a36Sopenharmony_ci } 52462306a36Sopenharmony_ci if (!frag) { 52562306a36Sopenharmony_ci frag = __get_or_create_frag(ci, id); 52662306a36Sopenharmony_ci if (IS_ERR(frag)) 52762306a36Sopenharmony_ci continue; 52862306a36Sopenharmony_ci } 52962306a36Sopenharmony_ci if (frag->split_by == 0) 53062306a36Sopenharmony_ci ci->i_fragtree_nsplits++; 53162306a36Sopenharmony_ci frag->split_by = split_by; 53262306a36Sopenharmony_ci dout(" frag %x split by %d\n", frag->frag, frag->split_by); 53362306a36Sopenharmony_ci prev_frag = frag; 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci while (rb_node) { 53662306a36Sopenharmony_ci frag = rb_entry(rb_node, struct ceph_inode_frag, node); 53762306a36Sopenharmony_ci rb_node = rb_next(rb_node); 53862306a36Sopenharmony_ci /* delete stale split/leaf node */ 53962306a36Sopenharmony_ci if (frag->split_by > 0 || 54062306a36Sopenharmony_ci !is_frag_child(frag->frag, prev_frag)) { 54162306a36Sopenharmony_ci rb_erase(&frag->node, &ci->i_fragtree); 54262306a36Sopenharmony_ci if (frag->split_by > 0) 54362306a36Sopenharmony_ci ci->i_fragtree_nsplits--; 54462306a36Sopenharmony_ci kfree(frag); 54562306a36Sopenharmony_ci } 54662306a36Sopenharmony_ci } 54762306a36Sopenharmony_ciout_unlock: 54862306a36Sopenharmony_ci mutex_unlock(&ci->i_fragtree_mutex); 54962306a36Sopenharmony_ci return 0; 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_ci/* 55362306a36Sopenharmony_ci * initialize a newly allocated inode. 55462306a36Sopenharmony_ci */ 55562306a36Sopenharmony_cistruct inode *ceph_alloc_inode(struct super_block *sb) 55662306a36Sopenharmony_ci{ 55762306a36Sopenharmony_ci struct ceph_inode_info *ci; 55862306a36Sopenharmony_ci int i; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci ci = alloc_inode_sb(sb, ceph_inode_cachep, GFP_NOFS); 56162306a36Sopenharmony_ci if (!ci) 56262306a36Sopenharmony_ci return NULL; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci dout("alloc_inode %p\n", &ci->netfs.inode); 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci /* Set parameters for the netfs library */ 56762306a36Sopenharmony_ci netfs_inode_init(&ci->netfs, &ceph_netfs_ops); 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci spin_lock_init(&ci->i_ceph_lock); 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci ci->i_version = 0; 57262306a36Sopenharmony_ci ci->i_inline_version = 0; 57362306a36Sopenharmony_ci ci->i_time_warp_seq = 0; 57462306a36Sopenharmony_ci ci->i_ceph_flags = 0; 57562306a36Sopenharmony_ci atomic64_set(&ci->i_ordered_count, 1); 57662306a36Sopenharmony_ci atomic64_set(&ci->i_release_count, 1); 57762306a36Sopenharmony_ci atomic64_set(&ci->i_complete_seq[0], 0); 57862306a36Sopenharmony_ci atomic64_set(&ci->i_complete_seq[1], 0); 57962306a36Sopenharmony_ci ci->i_symlink = NULL; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci ci->i_max_bytes = 0; 58262306a36Sopenharmony_ci ci->i_max_files = 0; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout)); 58562306a36Sopenharmony_ci memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); 58662306a36Sopenharmony_ci RCU_INIT_POINTER(ci->i_layout.pool_ns, NULL); 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci ci->i_fragtree = RB_ROOT; 58962306a36Sopenharmony_ci mutex_init(&ci->i_fragtree_mutex); 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci ci->i_xattrs.blob = NULL; 59262306a36Sopenharmony_ci ci->i_xattrs.prealloc_blob = NULL; 59362306a36Sopenharmony_ci ci->i_xattrs.dirty = false; 59462306a36Sopenharmony_ci ci->i_xattrs.index = RB_ROOT; 59562306a36Sopenharmony_ci ci->i_xattrs.count = 0; 59662306a36Sopenharmony_ci ci->i_xattrs.names_size = 0; 59762306a36Sopenharmony_ci ci->i_xattrs.vals_size = 0; 59862306a36Sopenharmony_ci ci->i_xattrs.version = 0; 59962306a36Sopenharmony_ci ci->i_xattrs.index_version = 0; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci ci->i_caps = RB_ROOT; 60262306a36Sopenharmony_ci ci->i_auth_cap = NULL; 60362306a36Sopenharmony_ci ci->i_dirty_caps = 0; 60462306a36Sopenharmony_ci ci->i_flushing_caps = 0; 60562306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_dirty_item); 60662306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_flushing_item); 60762306a36Sopenharmony_ci ci->i_prealloc_cap_flush = NULL; 60862306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_cap_flush_list); 60962306a36Sopenharmony_ci init_waitqueue_head(&ci->i_cap_wq); 61062306a36Sopenharmony_ci ci->i_hold_caps_max = 0; 61162306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_cap_delay_list); 61262306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_cap_snaps); 61362306a36Sopenharmony_ci ci->i_head_snapc = NULL; 61462306a36Sopenharmony_ci ci->i_snap_caps = 0; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci ci->i_last_rd = ci->i_last_wr = jiffies - 3600 * HZ; 61762306a36Sopenharmony_ci for (i = 0; i < CEPH_FILE_MODE_BITS; i++) 61862306a36Sopenharmony_ci ci->i_nr_by_mode[i] = 0; 61962306a36Sopenharmony_ci 62062306a36Sopenharmony_ci mutex_init(&ci->i_truncate_mutex); 62162306a36Sopenharmony_ci ci->i_truncate_seq = 0; 62262306a36Sopenharmony_ci ci->i_truncate_size = 0; 62362306a36Sopenharmony_ci ci->i_truncate_pending = 0; 62462306a36Sopenharmony_ci ci->i_truncate_pagecache_size = 0; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci ci->i_max_size = 0; 62762306a36Sopenharmony_ci ci->i_reported_size = 0; 62862306a36Sopenharmony_ci ci->i_wanted_max_size = 0; 62962306a36Sopenharmony_ci ci->i_requested_max_size = 0; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci ci->i_pin_ref = 0; 63262306a36Sopenharmony_ci ci->i_rd_ref = 0; 63362306a36Sopenharmony_ci ci->i_rdcache_ref = 0; 63462306a36Sopenharmony_ci ci->i_wr_ref = 0; 63562306a36Sopenharmony_ci ci->i_wb_ref = 0; 63662306a36Sopenharmony_ci ci->i_fx_ref = 0; 63762306a36Sopenharmony_ci ci->i_wrbuffer_ref = 0; 63862306a36Sopenharmony_ci ci->i_wrbuffer_ref_head = 0; 63962306a36Sopenharmony_ci atomic_set(&ci->i_filelock_ref, 0); 64062306a36Sopenharmony_ci atomic_set(&ci->i_shared_gen, 1); 64162306a36Sopenharmony_ci ci->i_rdcache_gen = 0; 64262306a36Sopenharmony_ci ci->i_rdcache_revoking = 0; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_unsafe_dirops); 64562306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_unsafe_iops); 64662306a36Sopenharmony_ci spin_lock_init(&ci->i_unsafe_lock); 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci ci->i_snap_realm = NULL; 64962306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_snap_realm_item); 65062306a36Sopenharmony_ci INIT_LIST_HEAD(&ci->i_snap_flush_item); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci INIT_WORK(&ci->i_work, ceph_inode_work); 65362306a36Sopenharmony_ci ci->i_work_mask = 0; 65462306a36Sopenharmony_ci memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); 65562306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 65662306a36Sopenharmony_ci ci->fscrypt_auth = NULL; 65762306a36Sopenharmony_ci ci->fscrypt_auth_len = 0; 65862306a36Sopenharmony_ci#endif 65962306a36Sopenharmony_ci return &ci->netfs.inode; 66062306a36Sopenharmony_ci} 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_civoid ceph_free_inode(struct inode *inode) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci kfree(ci->i_symlink); 66762306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 66862306a36Sopenharmony_ci kfree(ci->fscrypt_auth); 66962306a36Sopenharmony_ci#endif 67062306a36Sopenharmony_ci fscrypt_free_inode(inode); 67162306a36Sopenharmony_ci kmem_cache_free(ceph_inode_cachep, ci); 67262306a36Sopenharmony_ci} 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_civoid ceph_evict_inode(struct inode *inode) 67562306a36Sopenharmony_ci{ 67662306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 67762306a36Sopenharmony_ci struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); 67862306a36Sopenharmony_ci struct ceph_inode_frag *frag; 67962306a36Sopenharmony_ci struct rb_node *n; 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci dout("evict_inode %p ino %llx.%llx\n", inode, ceph_vinop(inode)); 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci percpu_counter_dec(&mdsc->metric.total_inodes); 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci truncate_inode_pages_final(&inode->i_data); 68662306a36Sopenharmony_ci if (inode->i_state & I_PINNING_FSCACHE_WB) 68762306a36Sopenharmony_ci ceph_fscache_unuse_cookie(inode, true); 68862306a36Sopenharmony_ci clear_inode(inode); 68962306a36Sopenharmony_ci 69062306a36Sopenharmony_ci ceph_fscache_unregister_inode_cookie(ci); 69162306a36Sopenharmony_ci fscrypt_put_encryption_info(inode); 69262306a36Sopenharmony_ci 69362306a36Sopenharmony_ci __ceph_remove_caps(ci); 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci if (__ceph_has_quota(ci, QUOTA_GET_ANY)) 69662306a36Sopenharmony_ci ceph_adjust_quota_realms_count(inode, false); 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci /* 69962306a36Sopenharmony_ci * we may still have a snap_realm reference if there are stray 70062306a36Sopenharmony_ci * caps in i_snap_caps. 70162306a36Sopenharmony_ci */ 70262306a36Sopenharmony_ci if (ci->i_snap_realm) { 70362306a36Sopenharmony_ci if (ceph_snap(inode) == CEPH_NOSNAP) { 70462306a36Sopenharmony_ci dout(" dropping residual ref to snap realm %p\n", 70562306a36Sopenharmony_ci ci->i_snap_realm); 70662306a36Sopenharmony_ci ceph_change_snap_realm(inode, NULL); 70762306a36Sopenharmony_ci } else { 70862306a36Sopenharmony_ci ceph_put_snapid_map(mdsc, ci->i_snapid_map); 70962306a36Sopenharmony_ci ci->i_snap_realm = NULL; 71062306a36Sopenharmony_ci } 71162306a36Sopenharmony_ci } 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci while ((n = rb_first(&ci->i_fragtree)) != NULL) { 71462306a36Sopenharmony_ci frag = rb_entry(n, struct ceph_inode_frag, node); 71562306a36Sopenharmony_ci rb_erase(n, &ci->i_fragtree); 71662306a36Sopenharmony_ci kfree(frag); 71762306a36Sopenharmony_ci } 71862306a36Sopenharmony_ci ci->i_fragtree_nsplits = 0; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci __ceph_destroy_xattrs(ci); 72162306a36Sopenharmony_ci if (ci->i_xattrs.blob) 72262306a36Sopenharmony_ci ceph_buffer_put(ci->i_xattrs.blob); 72362306a36Sopenharmony_ci if (ci->i_xattrs.prealloc_blob) 72462306a36Sopenharmony_ci ceph_buffer_put(ci->i_xattrs.prealloc_blob); 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci ceph_put_string(rcu_dereference_raw(ci->i_layout.pool_ns)); 72762306a36Sopenharmony_ci ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns)); 72862306a36Sopenharmony_ci} 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_cistatic inline blkcnt_t calc_inode_blocks(u64 size) 73162306a36Sopenharmony_ci{ 73262306a36Sopenharmony_ci return (size + (1<<9) - 1) >> 9; 73362306a36Sopenharmony_ci} 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci/* 73662306a36Sopenharmony_ci * Helpers to fill in size, ctime, mtime, and atime. We have to be 73762306a36Sopenharmony_ci * careful because either the client or MDS may have more up to date 73862306a36Sopenharmony_ci * info, depending on which capabilities are held, and whether 73962306a36Sopenharmony_ci * time_warp_seq or truncate_seq have increased. (Ordinarily, mtime 74062306a36Sopenharmony_ci * and size are monotonically increasing, except when utimes() or 74162306a36Sopenharmony_ci * truncate() increments the corresponding _seq values.) 74262306a36Sopenharmony_ci */ 74362306a36Sopenharmony_ciint ceph_fill_file_size(struct inode *inode, int issued, 74462306a36Sopenharmony_ci u32 truncate_seq, u64 truncate_size, u64 size) 74562306a36Sopenharmony_ci{ 74662306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 74762306a36Sopenharmony_ci int queue_trunc = 0; 74862306a36Sopenharmony_ci loff_t isize = i_size_read(inode); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 || 75162306a36Sopenharmony_ci (truncate_seq == ci->i_truncate_seq && size > isize)) { 75262306a36Sopenharmony_ci dout("size %lld -> %llu\n", isize, size); 75362306a36Sopenharmony_ci if (size > 0 && S_ISDIR(inode->i_mode)) { 75462306a36Sopenharmony_ci pr_err("fill_file_size non-zero size for directory\n"); 75562306a36Sopenharmony_ci size = 0; 75662306a36Sopenharmony_ci } 75762306a36Sopenharmony_ci i_size_write(inode, size); 75862306a36Sopenharmony_ci inode->i_blocks = calc_inode_blocks(size); 75962306a36Sopenharmony_ci /* 76062306a36Sopenharmony_ci * If we're expanding, then we should be able to just update 76162306a36Sopenharmony_ci * the existing cookie. 76262306a36Sopenharmony_ci */ 76362306a36Sopenharmony_ci if (size > isize) 76462306a36Sopenharmony_ci ceph_fscache_update(inode); 76562306a36Sopenharmony_ci ci->i_reported_size = size; 76662306a36Sopenharmony_ci if (truncate_seq != ci->i_truncate_seq) { 76762306a36Sopenharmony_ci dout("%s truncate_seq %u -> %u\n", __func__, 76862306a36Sopenharmony_ci ci->i_truncate_seq, truncate_seq); 76962306a36Sopenharmony_ci ci->i_truncate_seq = truncate_seq; 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci /* the MDS should have revoked these caps */ 77262306a36Sopenharmony_ci WARN_ON_ONCE(issued & (CEPH_CAP_FILE_RD | 77362306a36Sopenharmony_ci CEPH_CAP_FILE_LAZYIO)); 77462306a36Sopenharmony_ci /* 77562306a36Sopenharmony_ci * If we hold relevant caps, or in the case where we're 77662306a36Sopenharmony_ci * not the only client referencing this file and we 77762306a36Sopenharmony_ci * don't hold those caps, then we need to check whether 77862306a36Sopenharmony_ci * the file is either opened or mmaped 77962306a36Sopenharmony_ci */ 78062306a36Sopenharmony_ci if ((issued & (CEPH_CAP_FILE_CACHE| 78162306a36Sopenharmony_ci CEPH_CAP_FILE_BUFFER)) || 78262306a36Sopenharmony_ci mapping_mapped(inode->i_mapping) || 78362306a36Sopenharmony_ci __ceph_is_file_opened(ci)) { 78462306a36Sopenharmony_ci ci->i_truncate_pending++; 78562306a36Sopenharmony_ci queue_trunc = 1; 78662306a36Sopenharmony_ci } 78762306a36Sopenharmony_ci } 78862306a36Sopenharmony_ci } 78962306a36Sopenharmony_ci 79062306a36Sopenharmony_ci /* 79162306a36Sopenharmony_ci * It's possible that the new sizes of the two consecutive 79262306a36Sopenharmony_ci * size truncations will be in the same fscrypt last block, 79362306a36Sopenharmony_ci * and we need to truncate the corresponding page caches 79462306a36Sopenharmony_ci * anyway. 79562306a36Sopenharmony_ci */ 79662306a36Sopenharmony_ci if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) { 79762306a36Sopenharmony_ci dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__, 79862306a36Sopenharmony_ci ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode)); 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci ci->i_truncate_size = truncate_size; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 80362306a36Sopenharmony_ci dout("%s truncate_pagecache_size %lld -> %llu\n", 80462306a36Sopenharmony_ci __func__, ci->i_truncate_pagecache_size, size); 80562306a36Sopenharmony_ci ci->i_truncate_pagecache_size = size; 80662306a36Sopenharmony_ci } else { 80762306a36Sopenharmony_ci ci->i_truncate_pagecache_size = truncate_size; 80862306a36Sopenharmony_ci } 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci return queue_trunc; 81162306a36Sopenharmony_ci} 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_civoid ceph_fill_file_time(struct inode *inode, int issued, 81462306a36Sopenharmony_ci u64 time_warp_seq, struct timespec64 *ctime, 81562306a36Sopenharmony_ci struct timespec64 *mtime, struct timespec64 *atime) 81662306a36Sopenharmony_ci{ 81762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 81862306a36Sopenharmony_ci struct timespec64 ictime = inode_get_ctime(inode); 81962306a36Sopenharmony_ci int warn = 0; 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci if (issued & (CEPH_CAP_FILE_EXCL| 82262306a36Sopenharmony_ci CEPH_CAP_FILE_WR| 82362306a36Sopenharmony_ci CEPH_CAP_FILE_BUFFER| 82462306a36Sopenharmony_ci CEPH_CAP_AUTH_EXCL| 82562306a36Sopenharmony_ci CEPH_CAP_XATTR_EXCL)) { 82662306a36Sopenharmony_ci if (ci->i_version == 0 || 82762306a36Sopenharmony_ci timespec64_compare(ctime, &ictime) > 0) { 82862306a36Sopenharmony_ci dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n", 82962306a36Sopenharmony_ci ictime.tv_sec, ictime.tv_nsec, 83062306a36Sopenharmony_ci ctime->tv_sec, ctime->tv_nsec); 83162306a36Sopenharmony_ci inode_set_ctime_to_ts(inode, *ctime); 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci if (ci->i_version == 0 || 83462306a36Sopenharmony_ci ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) { 83562306a36Sopenharmony_ci /* the MDS did a utimes() */ 83662306a36Sopenharmony_ci dout("mtime %lld.%09ld -> %lld.%09ld " 83762306a36Sopenharmony_ci "tw %d -> %d\n", 83862306a36Sopenharmony_ci inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, 83962306a36Sopenharmony_ci mtime->tv_sec, mtime->tv_nsec, 84062306a36Sopenharmony_ci ci->i_time_warp_seq, (int)time_warp_seq); 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci inode->i_mtime = *mtime; 84362306a36Sopenharmony_ci inode->i_atime = *atime; 84462306a36Sopenharmony_ci ci->i_time_warp_seq = time_warp_seq; 84562306a36Sopenharmony_ci } else if (time_warp_seq == ci->i_time_warp_seq) { 84662306a36Sopenharmony_ci /* nobody did utimes(); take the max */ 84762306a36Sopenharmony_ci if (timespec64_compare(mtime, &inode->i_mtime) > 0) { 84862306a36Sopenharmony_ci dout("mtime %lld.%09ld -> %lld.%09ld inc\n", 84962306a36Sopenharmony_ci inode->i_mtime.tv_sec, 85062306a36Sopenharmony_ci inode->i_mtime.tv_nsec, 85162306a36Sopenharmony_ci mtime->tv_sec, mtime->tv_nsec); 85262306a36Sopenharmony_ci inode->i_mtime = *mtime; 85362306a36Sopenharmony_ci } 85462306a36Sopenharmony_ci if (timespec64_compare(atime, &inode->i_atime) > 0) { 85562306a36Sopenharmony_ci dout("atime %lld.%09ld -> %lld.%09ld inc\n", 85662306a36Sopenharmony_ci inode->i_atime.tv_sec, 85762306a36Sopenharmony_ci inode->i_atime.tv_nsec, 85862306a36Sopenharmony_ci atime->tv_sec, atime->tv_nsec); 85962306a36Sopenharmony_ci inode->i_atime = *atime; 86062306a36Sopenharmony_ci } 86162306a36Sopenharmony_ci } else if (issued & CEPH_CAP_FILE_EXCL) { 86262306a36Sopenharmony_ci /* we did a utimes(); ignore mds values */ 86362306a36Sopenharmony_ci } else { 86462306a36Sopenharmony_ci warn = 1; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci } else { 86762306a36Sopenharmony_ci /* we have no write|excl caps; whatever the MDS says is true */ 86862306a36Sopenharmony_ci if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) { 86962306a36Sopenharmony_ci inode_set_ctime_to_ts(inode, *ctime); 87062306a36Sopenharmony_ci inode->i_mtime = *mtime; 87162306a36Sopenharmony_ci inode->i_atime = *atime; 87262306a36Sopenharmony_ci ci->i_time_warp_seq = time_warp_seq; 87362306a36Sopenharmony_ci } else { 87462306a36Sopenharmony_ci warn = 1; 87562306a36Sopenharmony_ci } 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci if (warn) /* time_warp_seq shouldn't go backwards */ 87862306a36Sopenharmony_ci dout("%p mds time_warp_seq %llu < %u\n", 87962306a36Sopenharmony_ci inode, time_warp_seq, ci->i_time_warp_seq); 88062306a36Sopenharmony_ci} 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION) 88362306a36Sopenharmony_cistatic int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym) 88462306a36Sopenharmony_ci{ 88562306a36Sopenharmony_ci int declen; 88662306a36Sopenharmony_ci u8 *sym; 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci sym = kmalloc(enclen + 1, GFP_NOFS); 88962306a36Sopenharmony_ci if (!sym) 89062306a36Sopenharmony_ci return -ENOMEM; 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci declen = ceph_base64_decode(encsym, enclen, sym); 89362306a36Sopenharmony_ci if (declen < 0) { 89462306a36Sopenharmony_ci pr_err("%s: can't decode symlink (%d). Content: %.*s\n", 89562306a36Sopenharmony_ci __func__, declen, enclen, encsym); 89662306a36Sopenharmony_ci kfree(sym); 89762306a36Sopenharmony_ci return -EIO; 89862306a36Sopenharmony_ci } 89962306a36Sopenharmony_ci sym[declen + 1] = '\0'; 90062306a36Sopenharmony_ci *decsym = sym; 90162306a36Sopenharmony_ci return declen; 90262306a36Sopenharmony_ci} 90362306a36Sopenharmony_ci#else 90462306a36Sopenharmony_cistatic int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym) 90562306a36Sopenharmony_ci{ 90662306a36Sopenharmony_ci return -EOPNOTSUPP; 90762306a36Sopenharmony_ci} 90862306a36Sopenharmony_ci#endif 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci/* 91162306a36Sopenharmony_ci * Populate an inode based on info from mds. May be called on new or 91262306a36Sopenharmony_ci * existing inodes. 91362306a36Sopenharmony_ci */ 91462306a36Sopenharmony_ciint ceph_fill_inode(struct inode *inode, struct page *locked_page, 91562306a36Sopenharmony_ci struct ceph_mds_reply_info_in *iinfo, 91662306a36Sopenharmony_ci struct ceph_mds_reply_dirfrag *dirinfo, 91762306a36Sopenharmony_ci struct ceph_mds_session *session, int cap_fmode, 91862306a36Sopenharmony_ci struct ceph_cap_reservation *caps_reservation) 91962306a36Sopenharmony_ci{ 92062306a36Sopenharmony_ci struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); 92162306a36Sopenharmony_ci struct ceph_mds_reply_inode *info = iinfo->in; 92262306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 92362306a36Sopenharmony_ci int issued, new_issued, info_caps; 92462306a36Sopenharmony_ci struct timespec64 mtime, atime, ctime; 92562306a36Sopenharmony_ci struct ceph_buffer *xattr_blob = NULL; 92662306a36Sopenharmony_ci struct ceph_buffer *old_blob = NULL; 92762306a36Sopenharmony_ci struct ceph_string *pool_ns = NULL; 92862306a36Sopenharmony_ci struct ceph_cap *new_cap = NULL; 92962306a36Sopenharmony_ci int err = 0; 93062306a36Sopenharmony_ci bool wake = false; 93162306a36Sopenharmony_ci bool queue_trunc = false; 93262306a36Sopenharmony_ci bool new_version = false; 93362306a36Sopenharmony_ci bool fill_inline = false; 93462306a36Sopenharmony_ci umode_t mode = le32_to_cpu(info->mode); 93562306a36Sopenharmony_ci dev_t rdev = le32_to_cpu(info->rdev); 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci lockdep_assert_held(&mdsc->snap_rwsem); 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, 94062306a36Sopenharmony_ci inode, ceph_vinop(inode), le64_to_cpu(info->version), 94162306a36Sopenharmony_ci ci->i_version); 94262306a36Sopenharmony_ci 94362306a36Sopenharmony_ci /* Once I_NEW is cleared, we can't change type or dev numbers */ 94462306a36Sopenharmony_ci if (inode->i_state & I_NEW) { 94562306a36Sopenharmony_ci inode->i_mode = mode; 94662306a36Sopenharmony_ci } else { 94762306a36Sopenharmony_ci if (inode_wrong_type(inode, mode)) { 94862306a36Sopenharmony_ci pr_warn_once("inode type changed! (ino %llx.%llx is 0%o, mds says 0%o)\n", 94962306a36Sopenharmony_ci ceph_vinop(inode), inode->i_mode, mode); 95062306a36Sopenharmony_ci return -ESTALE; 95162306a36Sopenharmony_ci } 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci if ((S_ISCHR(mode) || S_ISBLK(mode)) && inode->i_rdev != rdev) { 95462306a36Sopenharmony_ci pr_warn_once("dev inode rdev changed! (ino %llx.%llx is %u:%u, mds says %u:%u)\n", 95562306a36Sopenharmony_ci ceph_vinop(inode), MAJOR(inode->i_rdev), 95662306a36Sopenharmony_ci MINOR(inode->i_rdev), MAJOR(rdev), 95762306a36Sopenharmony_ci MINOR(rdev)); 95862306a36Sopenharmony_ci return -ESTALE; 95962306a36Sopenharmony_ci } 96062306a36Sopenharmony_ci } 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci info_caps = le32_to_cpu(info->cap.caps); 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci /* prealloc new cap struct */ 96562306a36Sopenharmony_ci if (info_caps && ceph_snap(inode) == CEPH_NOSNAP) { 96662306a36Sopenharmony_ci new_cap = ceph_get_cap(mdsc, caps_reservation); 96762306a36Sopenharmony_ci if (!new_cap) 96862306a36Sopenharmony_ci return -ENOMEM; 96962306a36Sopenharmony_ci } 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci /* 97262306a36Sopenharmony_ci * prealloc xattr data, if it looks like we'll need it. only 97362306a36Sopenharmony_ci * if len > 4 (meaning there are actually xattrs; the first 4 97462306a36Sopenharmony_ci * bytes are the xattr count). 97562306a36Sopenharmony_ci */ 97662306a36Sopenharmony_ci if (iinfo->xattr_len > 4) { 97762306a36Sopenharmony_ci xattr_blob = ceph_buffer_new(iinfo->xattr_len, GFP_NOFS); 97862306a36Sopenharmony_ci if (!xattr_blob) 97962306a36Sopenharmony_ci pr_err("%s ENOMEM xattr blob %d bytes\n", __func__, 98062306a36Sopenharmony_ci iinfo->xattr_len); 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci if (iinfo->pool_ns_len > 0) 98462306a36Sopenharmony_ci pool_ns = ceph_find_or_create_string(iinfo->pool_ns_data, 98562306a36Sopenharmony_ci iinfo->pool_ns_len); 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ci if (ceph_snap(inode) != CEPH_NOSNAP && !ci->i_snapid_map) 98862306a36Sopenharmony_ci ci->i_snapid_map = ceph_get_snapid_map(mdsc, ceph_snap(inode)); 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci /* 99362306a36Sopenharmony_ci * provided version will be odd if inode value is projected, 99462306a36Sopenharmony_ci * even if stable. skip the update if we have newer stable 99562306a36Sopenharmony_ci * info (ours>=theirs, e.g. due to racing mds replies), unless 99662306a36Sopenharmony_ci * we are getting projected (unstable) info (in which case the 99762306a36Sopenharmony_ci * version is odd, and we want ours>theirs). 99862306a36Sopenharmony_ci * us them 99962306a36Sopenharmony_ci * 2 2 skip 100062306a36Sopenharmony_ci * 3 2 skip 100162306a36Sopenharmony_ci * 3 3 update 100262306a36Sopenharmony_ci */ 100362306a36Sopenharmony_ci if (ci->i_version == 0 || 100462306a36Sopenharmony_ci ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && 100562306a36Sopenharmony_ci le64_to_cpu(info->version) > (ci->i_version & ~1))) 100662306a36Sopenharmony_ci new_version = true; 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci /* Update change_attribute */ 100962306a36Sopenharmony_ci inode_set_max_iversion_raw(inode, iinfo->change_attr); 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci __ceph_caps_issued(ci, &issued); 101262306a36Sopenharmony_ci issued |= __ceph_caps_dirty(ci); 101362306a36Sopenharmony_ci new_issued = ~issued & info_caps; 101462306a36Sopenharmony_ci 101562306a36Sopenharmony_ci __ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files); 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 101862306a36Sopenharmony_ci if (iinfo->fscrypt_auth_len && 101962306a36Sopenharmony_ci ((inode->i_state & I_NEW) || (ci->fscrypt_auth_len == 0))) { 102062306a36Sopenharmony_ci kfree(ci->fscrypt_auth); 102162306a36Sopenharmony_ci ci->fscrypt_auth_len = iinfo->fscrypt_auth_len; 102262306a36Sopenharmony_ci ci->fscrypt_auth = iinfo->fscrypt_auth; 102362306a36Sopenharmony_ci iinfo->fscrypt_auth = NULL; 102462306a36Sopenharmony_ci iinfo->fscrypt_auth_len = 0; 102562306a36Sopenharmony_ci inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED); 102662306a36Sopenharmony_ci } 102762306a36Sopenharmony_ci#endif 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && 103062306a36Sopenharmony_ci (issued & CEPH_CAP_AUTH_EXCL) == 0) { 103162306a36Sopenharmony_ci inode->i_mode = mode; 103262306a36Sopenharmony_ci inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); 103362306a36Sopenharmony_ci inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); 103462306a36Sopenharmony_ci dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode, 103562306a36Sopenharmony_ci from_kuid(&init_user_ns, inode->i_uid), 103662306a36Sopenharmony_ci from_kgid(&init_user_ns, inode->i_gid)); 103762306a36Sopenharmony_ci ceph_decode_timespec64(&ci->i_btime, &iinfo->btime); 103862306a36Sopenharmony_ci ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime); 103962306a36Sopenharmony_ci } 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci /* directories have fl_stripe_unit set to zero */ 104262306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) 104362306a36Sopenharmony_ci inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT; 104462306a36Sopenharmony_ci else if (le32_to_cpu(info->layout.fl_stripe_unit)) 104562306a36Sopenharmony_ci inode->i_blkbits = 104662306a36Sopenharmony_ci fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; 104762306a36Sopenharmony_ci else 104862306a36Sopenharmony_ci inode->i_blkbits = CEPH_BLOCK_SHIFT; 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && 105162306a36Sopenharmony_ci (issued & CEPH_CAP_LINK_EXCL) == 0) 105262306a36Sopenharmony_ci set_nlink(inode, le32_to_cpu(info->nlink)); 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { 105562306a36Sopenharmony_ci /* be careful with mtime, atime, size */ 105662306a36Sopenharmony_ci ceph_decode_timespec64(&atime, &info->atime); 105762306a36Sopenharmony_ci ceph_decode_timespec64(&mtime, &info->mtime); 105862306a36Sopenharmony_ci ceph_decode_timespec64(&ctime, &info->ctime); 105962306a36Sopenharmony_ci ceph_fill_file_time(inode, issued, 106062306a36Sopenharmony_ci le32_to_cpu(info->time_warp_seq), 106162306a36Sopenharmony_ci &ctime, &mtime, &atime); 106262306a36Sopenharmony_ci } 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci if (new_version || (info_caps & CEPH_CAP_FILE_SHARED)) { 106562306a36Sopenharmony_ci ci->i_files = le64_to_cpu(info->files); 106662306a36Sopenharmony_ci ci->i_subdirs = le64_to_cpu(info->subdirs); 106762306a36Sopenharmony_ci } 106862306a36Sopenharmony_ci 106962306a36Sopenharmony_ci if (new_version || 107062306a36Sopenharmony_ci (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { 107162306a36Sopenharmony_ci u64 size = le64_to_cpu(info->size); 107262306a36Sopenharmony_ci s64 old_pool = ci->i_layout.pool_id; 107362306a36Sopenharmony_ci struct ceph_string *old_ns; 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci ceph_file_layout_from_legacy(&ci->i_layout, &info->layout); 107662306a36Sopenharmony_ci old_ns = rcu_dereference_protected(ci->i_layout.pool_ns, 107762306a36Sopenharmony_ci lockdep_is_held(&ci->i_ceph_lock)); 107862306a36Sopenharmony_ci rcu_assign_pointer(ci->i_layout.pool_ns, pool_ns); 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci if (ci->i_layout.pool_id != old_pool || pool_ns != old_ns) 108162306a36Sopenharmony_ci ci->i_ceph_flags &= ~CEPH_I_POOL_PERM; 108262306a36Sopenharmony_ci 108362306a36Sopenharmony_ci pool_ns = old_ns; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && size && 108662306a36Sopenharmony_ci iinfo->fscrypt_file_len == sizeof(__le64)) { 108762306a36Sopenharmony_ci u64 fsize = __le64_to_cpu(*(__le64 *)iinfo->fscrypt_file); 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) { 109062306a36Sopenharmony_ci size = fsize; 109162306a36Sopenharmony_ci } else { 109262306a36Sopenharmony_ci pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n", 109362306a36Sopenharmony_ci info->size, size); 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci } 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci queue_trunc = ceph_fill_file_size(inode, issued, 109862306a36Sopenharmony_ci le32_to_cpu(info->truncate_seq), 109962306a36Sopenharmony_ci le64_to_cpu(info->truncate_size), 110062306a36Sopenharmony_ci size); 110162306a36Sopenharmony_ci /* only update max_size on auth cap */ 110262306a36Sopenharmony_ci if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && 110362306a36Sopenharmony_ci ci->i_max_size != le64_to_cpu(info->max_size)) { 110462306a36Sopenharmony_ci dout("max_size %lld -> %llu\n", ci->i_max_size, 110562306a36Sopenharmony_ci le64_to_cpu(info->max_size)); 110662306a36Sopenharmony_ci ci->i_max_size = le64_to_cpu(info->max_size); 110762306a36Sopenharmony_ci } 110862306a36Sopenharmony_ci } 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci /* layout and rstat are not tracked by capability, update them if 111162306a36Sopenharmony_ci * the inode info is from auth mds */ 111262306a36Sopenharmony_ci if (new_version || (info->cap.flags & CEPH_CAP_FLAG_AUTH)) { 111362306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 111462306a36Sopenharmony_ci ci->i_dir_layout = iinfo->dir_layout; 111562306a36Sopenharmony_ci ci->i_rbytes = le64_to_cpu(info->rbytes); 111662306a36Sopenharmony_ci ci->i_rfiles = le64_to_cpu(info->rfiles); 111762306a36Sopenharmony_ci ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); 111862306a36Sopenharmony_ci ci->i_dir_pin = iinfo->dir_pin; 111962306a36Sopenharmony_ci ci->i_rsnaps = iinfo->rsnaps; 112062306a36Sopenharmony_ci ceph_decode_timespec64(&ci->i_rctime, &info->rctime); 112162306a36Sopenharmony_ci } 112262306a36Sopenharmony_ci } 112362306a36Sopenharmony_ci 112462306a36Sopenharmony_ci /* xattrs */ 112562306a36Sopenharmony_ci /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ 112662306a36Sopenharmony_ci if ((ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) && 112762306a36Sopenharmony_ci le64_to_cpu(info->xattr_version) > ci->i_xattrs.version) { 112862306a36Sopenharmony_ci if (ci->i_xattrs.blob) 112962306a36Sopenharmony_ci old_blob = ci->i_xattrs.blob; 113062306a36Sopenharmony_ci ci->i_xattrs.blob = xattr_blob; 113162306a36Sopenharmony_ci if (xattr_blob) 113262306a36Sopenharmony_ci memcpy(ci->i_xattrs.blob->vec.iov_base, 113362306a36Sopenharmony_ci iinfo->xattr_data, iinfo->xattr_len); 113462306a36Sopenharmony_ci ci->i_xattrs.version = le64_to_cpu(info->xattr_version); 113562306a36Sopenharmony_ci ceph_forget_all_cached_acls(inode); 113662306a36Sopenharmony_ci ceph_security_invalidate_secctx(inode); 113762306a36Sopenharmony_ci xattr_blob = NULL; 113862306a36Sopenharmony_ci } 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_ci /* finally update i_version */ 114162306a36Sopenharmony_ci if (le64_to_cpu(info->version) > ci->i_version) 114262306a36Sopenharmony_ci ci->i_version = le64_to_cpu(info->version); 114362306a36Sopenharmony_ci 114462306a36Sopenharmony_ci inode->i_mapping->a_ops = &ceph_aops; 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_ci switch (inode->i_mode & S_IFMT) { 114762306a36Sopenharmony_ci case S_IFIFO: 114862306a36Sopenharmony_ci case S_IFBLK: 114962306a36Sopenharmony_ci case S_IFCHR: 115062306a36Sopenharmony_ci case S_IFSOCK: 115162306a36Sopenharmony_ci inode->i_blkbits = PAGE_SHIFT; 115262306a36Sopenharmony_ci init_special_inode(inode, inode->i_mode, rdev); 115362306a36Sopenharmony_ci inode->i_op = &ceph_file_iops; 115462306a36Sopenharmony_ci break; 115562306a36Sopenharmony_ci case S_IFREG: 115662306a36Sopenharmony_ci inode->i_op = &ceph_file_iops; 115762306a36Sopenharmony_ci inode->i_fop = &ceph_file_fops; 115862306a36Sopenharmony_ci break; 115962306a36Sopenharmony_ci case S_IFLNK: 116062306a36Sopenharmony_ci if (!ci->i_symlink) { 116162306a36Sopenharmony_ci u32 symlen = iinfo->symlink_len; 116262306a36Sopenharmony_ci char *sym; 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 116762306a36Sopenharmony_ci if (symlen != i_size_read(inode)) 116862306a36Sopenharmony_ci pr_err("%s %llx.%llx BAD symlink size %lld\n", 116962306a36Sopenharmony_ci __func__, ceph_vinop(inode), 117062306a36Sopenharmony_ci i_size_read(inode)); 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci err = decode_encrypted_symlink(iinfo->symlink, 117362306a36Sopenharmony_ci symlen, (u8 **)&sym); 117462306a36Sopenharmony_ci if (err < 0) { 117562306a36Sopenharmony_ci pr_err("%s decoding encrypted symlink failed: %d\n", 117662306a36Sopenharmony_ci __func__, err); 117762306a36Sopenharmony_ci goto out; 117862306a36Sopenharmony_ci } 117962306a36Sopenharmony_ci symlen = err; 118062306a36Sopenharmony_ci i_size_write(inode, symlen); 118162306a36Sopenharmony_ci inode->i_blocks = calc_inode_blocks(symlen); 118262306a36Sopenharmony_ci } else { 118362306a36Sopenharmony_ci if (symlen != i_size_read(inode)) { 118462306a36Sopenharmony_ci pr_err("%s %llx.%llx BAD symlink size %lld\n", 118562306a36Sopenharmony_ci __func__, ceph_vinop(inode), 118662306a36Sopenharmony_ci i_size_read(inode)); 118762306a36Sopenharmony_ci i_size_write(inode, symlen); 118862306a36Sopenharmony_ci inode->i_blocks = calc_inode_blocks(symlen); 118962306a36Sopenharmony_ci } 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_ci err = -ENOMEM; 119262306a36Sopenharmony_ci sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS); 119362306a36Sopenharmony_ci if (!sym) 119462306a36Sopenharmony_ci goto out; 119562306a36Sopenharmony_ci } 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 119862306a36Sopenharmony_ci if (!ci->i_symlink) 119962306a36Sopenharmony_ci ci->i_symlink = sym; 120062306a36Sopenharmony_ci else 120162306a36Sopenharmony_ci kfree(sym); /* lost a race */ 120262306a36Sopenharmony_ci } 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 120562306a36Sopenharmony_ci /* 120662306a36Sopenharmony_ci * Encrypted symlinks need to be decrypted before we can 120762306a36Sopenharmony_ci * cache their targets in i_link. Don't touch it here. 120862306a36Sopenharmony_ci */ 120962306a36Sopenharmony_ci inode->i_op = &ceph_encrypted_symlink_iops; 121062306a36Sopenharmony_ci } else { 121162306a36Sopenharmony_ci inode->i_link = ci->i_symlink; 121262306a36Sopenharmony_ci inode->i_op = &ceph_symlink_iops; 121362306a36Sopenharmony_ci } 121462306a36Sopenharmony_ci break; 121562306a36Sopenharmony_ci case S_IFDIR: 121662306a36Sopenharmony_ci inode->i_op = &ceph_dir_iops; 121762306a36Sopenharmony_ci inode->i_fop = &ceph_dir_fops; 121862306a36Sopenharmony_ci break; 121962306a36Sopenharmony_ci default: 122062306a36Sopenharmony_ci pr_err("%s %llx.%llx BAD mode 0%o\n", __func__, 122162306a36Sopenharmony_ci ceph_vinop(inode), inode->i_mode); 122262306a36Sopenharmony_ci } 122362306a36Sopenharmony_ci 122462306a36Sopenharmony_ci /* were we issued a capability? */ 122562306a36Sopenharmony_ci if (info_caps) { 122662306a36Sopenharmony_ci if (ceph_snap(inode) == CEPH_NOSNAP) { 122762306a36Sopenharmony_ci ceph_add_cap(inode, session, 122862306a36Sopenharmony_ci le64_to_cpu(info->cap.cap_id), 122962306a36Sopenharmony_ci info_caps, 123062306a36Sopenharmony_ci le32_to_cpu(info->cap.wanted), 123162306a36Sopenharmony_ci le32_to_cpu(info->cap.seq), 123262306a36Sopenharmony_ci le32_to_cpu(info->cap.mseq), 123362306a36Sopenharmony_ci le64_to_cpu(info->cap.realm), 123462306a36Sopenharmony_ci info->cap.flags, &new_cap); 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci /* set dir completion flag? */ 123762306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode) && 123862306a36Sopenharmony_ci ci->i_files == 0 && ci->i_subdirs == 0 && 123962306a36Sopenharmony_ci (info_caps & CEPH_CAP_FILE_SHARED) && 124062306a36Sopenharmony_ci (issued & CEPH_CAP_FILE_EXCL) == 0 && 124162306a36Sopenharmony_ci !__ceph_dir_is_complete(ci)) { 124262306a36Sopenharmony_ci dout(" marking %p complete (empty)\n", inode); 124362306a36Sopenharmony_ci i_size_write(inode, 0); 124462306a36Sopenharmony_ci __ceph_dir_set_complete(ci, 124562306a36Sopenharmony_ci atomic64_read(&ci->i_release_count), 124662306a36Sopenharmony_ci atomic64_read(&ci->i_ordered_count)); 124762306a36Sopenharmony_ci } 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci wake = true; 125062306a36Sopenharmony_ci } else { 125162306a36Sopenharmony_ci dout(" %p got snap_caps %s\n", inode, 125262306a36Sopenharmony_ci ceph_cap_string(info_caps)); 125362306a36Sopenharmony_ci ci->i_snap_caps |= info_caps; 125462306a36Sopenharmony_ci } 125562306a36Sopenharmony_ci } 125662306a36Sopenharmony_ci 125762306a36Sopenharmony_ci if (iinfo->inline_version > 0 && 125862306a36Sopenharmony_ci iinfo->inline_version >= ci->i_inline_version) { 125962306a36Sopenharmony_ci int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; 126062306a36Sopenharmony_ci ci->i_inline_version = iinfo->inline_version; 126162306a36Sopenharmony_ci if (ceph_has_inline_data(ci) && 126262306a36Sopenharmony_ci (locked_page || (info_caps & cache_caps))) 126362306a36Sopenharmony_ci fill_inline = true; 126462306a36Sopenharmony_ci } 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci if (cap_fmode >= 0) { 126762306a36Sopenharmony_ci if (!info_caps) 126862306a36Sopenharmony_ci pr_warn("mds issued no caps on %llx.%llx\n", 126962306a36Sopenharmony_ci ceph_vinop(inode)); 127062306a36Sopenharmony_ci __ceph_touch_fmode(ci, mdsc, cap_fmode); 127162306a36Sopenharmony_ci } 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci ceph_fscache_register_inode_cookie(inode); 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ci if (fill_inline) 127862306a36Sopenharmony_ci ceph_fill_inline_data(inode, locked_page, 127962306a36Sopenharmony_ci iinfo->inline_data, iinfo->inline_len); 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci if (wake) 128262306a36Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci /* queue truncate if we saw i_size decrease */ 128562306a36Sopenharmony_ci if (queue_trunc) 128662306a36Sopenharmony_ci ceph_queue_vmtruncate(inode); 128762306a36Sopenharmony_ci 128862306a36Sopenharmony_ci /* populate frag tree */ 128962306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 129062306a36Sopenharmony_ci ceph_fill_fragtree(inode, &info->fragtree, dirinfo); 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci /* update delegation info? */ 129362306a36Sopenharmony_ci if (dirinfo) 129462306a36Sopenharmony_ci ceph_fill_dirfrag(inode, dirinfo); 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci err = 0; 129762306a36Sopenharmony_ciout: 129862306a36Sopenharmony_ci if (new_cap) 129962306a36Sopenharmony_ci ceph_put_cap(mdsc, new_cap); 130062306a36Sopenharmony_ci ceph_buffer_put(old_blob); 130162306a36Sopenharmony_ci ceph_buffer_put(xattr_blob); 130262306a36Sopenharmony_ci ceph_put_string(pool_ns); 130362306a36Sopenharmony_ci return err; 130462306a36Sopenharmony_ci} 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci/* 130762306a36Sopenharmony_ci * caller should hold session s_mutex and dentry->d_lock. 130862306a36Sopenharmony_ci */ 130962306a36Sopenharmony_cistatic void __update_dentry_lease(struct inode *dir, struct dentry *dentry, 131062306a36Sopenharmony_ci struct ceph_mds_reply_lease *lease, 131162306a36Sopenharmony_ci struct ceph_mds_session *session, 131262306a36Sopenharmony_ci unsigned long from_time, 131362306a36Sopenharmony_ci struct ceph_mds_session **old_lease_session) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci struct ceph_dentry_info *di = ceph_dentry(dentry); 131662306a36Sopenharmony_ci unsigned mask = le16_to_cpu(lease->mask); 131762306a36Sopenharmony_ci long unsigned duration = le32_to_cpu(lease->duration_ms); 131862306a36Sopenharmony_ci long unsigned ttl = from_time + (duration * HZ) / 1000; 131962306a36Sopenharmony_ci long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000; 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci dout("update_dentry_lease %p duration %lu ms ttl %lu\n", 132262306a36Sopenharmony_ci dentry, duration, ttl); 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci /* only track leases on regular dentries */ 132562306a36Sopenharmony_ci if (ceph_snap(dir) != CEPH_NOSNAP) 132662306a36Sopenharmony_ci return; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci if (mask & CEPH_LEASE_PRIMARY_LINK) 132962306a36Sopenharmony_ci di->flags |= CEPH_DENTRY_PRIMARY_LINK; 133062306a36Sopenharmony_ci else 133162306a36Sopenharmony_ci di->flags &= ~CEPH_DENTRY_PRIMARY_LINK; 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci di->lease_shared_gen = atomic_read(&ceph_inode(dir)->i_shared_gen); 133462306a36Sopenharmony_ci if (!(mask & CEPH_LEASE_VALID)) { 133562306a36Sopenharmony_ci __ceph_dentry_dir_lease_touch(di); 133662306a36Sopenharmony_ci return; 133762306a36Sopenharmony_ci } 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci if (di->lease_gen == atomic_read(&session->s_cap_gen) && 134062306a36Sopenharmony_ci time_before(ttl, di->time)) 134162306a36Sopenharmony_ci return; /* we already have a newer lease. */ 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_ci if (di->lease_session && di->lease_session != session) { 134462306a36Sopenharmony_ci *old_lease_session = di->lease_session; 134562306a36Sopenharmony_ci di->lease_session = NULL; 134662306a36Sopenharmony_ci } 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci if (!di->lease_session) 134962306a36Sopenharmony_ci di->lease_session = ceph_get_mds_session(session); 135062306a36Sopenharmony_ci di->lease_gen = atomic_read(&session->s_cap_gen); 135162306a36Sopenharmony_ci di->lease_seq = le32_to_cpu(lease->seq); 135262306a36Sopenharmony_ci di->lease_renew_after = half_ttl; 135362306a36Sopenharmony_ci di->lease_renew_from = 0; 135462306a36Sopenharmony_ci di->time = ttl; 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci __ceph_dentry_lease_touch(di); 135762306a36Sopenharmony_ci} 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_cistatic inline void update_dentry_lease(struct inode *dir, struct dentry *dentry, 136062306a36Sopenharmony_ci struct ceph_mds_reply_lease *lease, 136162306a36Sopenharmony_ci struct ceph_mds_session *session, 136262306a36Sopenharmony_ci unsigned long from_time) 136362306a36Sopenharmony_ci{ 136462306a36Sopenharmony_ci struct ceph_mds_session *old_lease_session = NULL; 136562306a36Sopenharmony_ci spin_lock(&dentry->d_lock); 136662306a36Sopenharmony_ci __update_dentry_lease(dir, dentry, lease, session, from_time, 136762306a36Sopenharmony_ci &old_lease_session); 136862306a36Sopenharmony_ci spin_unlock(&dentry->d_lock); 136962306a36Sopenharmony_ci ceph_put_mds_session(old_lease_session); 137062306a36Sopenharmony_ci} 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci/* 137362306a36Sopenharmony_ci * update dentry lease without having parent inode locked 137462306a36Sopenharmony_ci */ 137562306a36Sopenharmony_cistatic void update_dentry_lease_careful(struct dentry *dentry, 137662306a36Sopenharmony_ci struct ceph_mds_reply_lease *lease, 137762306a36Sopenharmony_ci struct ceph_mds_session *session, 137862306a36Sopenharmony_ci unsigned long from_time, 137962306a36Sopenharmony_ci char *dname, u32 dname_len, 138062306a36Sopenharmony_ci struct ceph_vino *pdvino, 138162306a36Sopenharmony_ci struct ceph_vino *ptvino) 138262306a36Sopenharmony_ci 138362306a36Sopenharmony_ci{ 138462306a36Sopenharmony_ci struct inode *dir; 138562306a36Sopenharmony_ci struct ceph_mds_session *old_lease_session = NULL; 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci spin_lock(&dentry->d_lock); 138862306a36Sopenharmony_ci /* make sure dentry's name matches target */ 138962306a36Sopenharmony_ci if (dentry->d_name.len != dname_len || 139062306a36Sopenharmony_ci memcmp(dentry->d_name.name, dname, dname_len)) 139162306a36Sopenharmony_ci goto out_unlock; 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_ci dir = d_inode(dentry->d_parent); 139462306a36Sopenharmony_ci /* make sure parent matches dvino */ 139562306a36Sopenharmony_ci if (!ceph_ino_compare(dir, pdvino)) 139662306a36Sopenharmony_ci goto out_unlock; 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci /* make sure dentry's inode matches target. NULL ptvino means that 139962306a36Sopenharmony_ci * we expect a negative dentry */ 140062306a36Sopenharmony_ci if (ptvino) { 140162306a36Sopenharmony_ci if (d_really_is_negative(dentry)) 140262306a36Sopenharmony_ci goto out_unlock; 140362306a36Sopenharmony_ci if (!ceph_ino_compare(d_inode(dentry), ptvino)) 140462306a36Sopenharmony_ci goto out_unlock; 140562306a36Sopenharmony_ci } else { 140662306a36Sopenharmony_ci if (d_really_is_positive(dentry)) 140762306a36Sopenharmony_ci goto out_unlock; 140862306a36Sopenharmony_ci } 140962306a36Sopenharmony_ci 141062306a36Sopenharmony_ci __update_dentry_lease(dir, dentry, lease, session, 141162306a36Sopenharmony_ci from_time, &old_lease_session); 141262306a36Sopenharmony_ciout_unlock: 141362306a36Sopenharmony_ci spin_unlock(&dentry->d_lock); 141462306a36Sopenharmony_ci ceph_put_mds_session(old_lease_session); 141562306a36Sopenharmony_ci} 141662306a36Sopenharmony_ci 141762306a36Sopenharmony_ci/* 141862306a36Sopenharmony_ci * splice a dentry to an inode. 141962306a36Sopenharmony_ci * caller must hold directory i_rwsem for this to be safe. 142062306a36Sopenharmony_ci */ 142162306a36Sopenharmony_cistatic int splice_dentry(struct dentry **pdn, struct inode *in) 142262306a36Sopenharmony_ci{ 142362306a36Sopenharmony_ci struct dentry *dn = *pdn; 142462306a36Sopenharmony_ci struct dentry *realdn; 142562306a36Sopenharmony_ci 142662306a36Sopenharmony_ci BUG_ON(d_inode(dn)); 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci if (S_ISDIR(in->i_mode)) { 142962306a36Sopenharmony_ci /* If inode is directory, d_splice_alias() below will remove 143062306a36Sopenharmony_ci * 'realdn' from its origin parent. We need to ensure that 143162306a36Sopenharmony_ci * origin parent's readdir cache will not reference 'realdn' 143262306a36Sopenharmony_ci */ 143362306a36Sopenharmony_ci realdn = d_find_any_alias(in); 143462306a36Sopenharmony_ci if (realdn) { 143562306a36Sopenharmony_ci struct ceph_dentry_info *di = ceph_dentry(realdn); 143662306a36Sopenharmony_ci spin_lock(&realdn->d_lock); 143762306a36Sopenharmony_ci 143862306a36Sopenharmony_ci realdn->d_op->d_prune(realdn); 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci di->time = jiffies; 144162306a36Sopenharmony_ci di->lease_shared_gen = 0; 144262306a36Sopenharmony_ci di->offset = 0; 144362306a36Sopenharmony_ci 144462306a36Sopenharmony_ci spin_unlock(&realdn->d_lock); 144562306a36Sopenharmony_ci dput(realdn); 144662306a36Sopenharmony_ci } 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci /* dn must be unhashed */ 145062306a36Sopenharmony_ci if (!d_unhashed(dn)) 145162306a36Sopenharmony_ci d_drop(dn); 145262306a36Sopenharmony_ci realdn = d_splice_alias(in, dn); 145362306a36Sopenharmony_ci if (IS_ERR(realdn)) { 145462306a36Sopenharmony_ci pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n", 145562306a36Sopenharmony_ci PTR_ERR(realdn), dn, in, ceph_vinop(in)); 145662306a36Sopenharmony_ci return PTR_ERR(realdn); 145762306a36Sopenharmony_ci } 145862306a36Sopenharmony_ci 145962306a36Sopenharmony_ci if (realdn) { 146062306a36Sopenharmony_ci dout("dn %p (%d) spliced with %p (%d) " 146162306a36Sopenharmony_ci "inode %p ino %llx.%llx\n", 146262306a36Sopenharmony_ci dn, d_count(dn), 146362306a36Sopenharmony_ci realdn, d_count(realdn), 146462306a36Sopenharmony_ci d_inode(realdn), ceph_vinop(d_inode(realdn))); 146562306a36Sopenharmony_ci dput(dn); 146662306a36Sopenharmony_ci *pdn = realdn; 146762306a36Sopenharmony_ci } else { 146862306a36Sopenharmony_ci BUG_ON(!ceph_dentry(dn)); 146962306a36Sopenharmony_ci dout("dn %p attached to %p ino %llx.%llx\n", 147062306a36Sopenharmony_ci dn, d_inode(dn), ceph_vinop(d_inode(dn))); 147162306a36Sopenharmony_ci } 147262306a36Sopenharmony_ci return 0; 147362306a36Sopenharmony_ci} 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci/* 147662306a36Sopenharmony_ci * Incorporate results into the local cache. This is either just 147762306a36Sopenharmony_ci * one inode, or a directory, dentry, and possibly linked-to inode (e.g., 147862306a36Sopenharmony_ci * after a lookup). 147962306a36Sopenharmony_ci * 148062306a36Sopenharmony_ci * A reply may contain 148162306a36Sopenharmony_ci * a directory inode along with a dentry. 148262306a36Sopenharmony_ci * and/or a target inode 148362306a36Sopenharmony_ci * 148462306a36Sopenharmony_ci * Called with snap_rwsem (read). 148562306a36Sopenharmony_ci */ 148662306a36Sopenharmony_ciint ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) 148762306a36Sopenharmony_ci{ 148862306a36Sopenharmony_ci struct ceph_mds_session *session = req->r_session; 148962306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 149062306a36Sopenharmony_ci struct inode *in = NULL; 149162306a36Sopenharmony_ci struct ceph_vino tvino, dvino; 149262306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_sb_to_client(sb); 149362306a36Sopenharmony_ci int err = 0; 149462306a36Sopenharmony_ci 149562306a36Sopenharmony_ci dout("fill_trace %p is_dentry %d is_target %d\n", req, 149662306a36Sopenharmony_ci rinfo->head->is_dentry, rinfo->head->is_target); 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci if (!rinfo->head->is_target && !rinfo->head->is_dentry) { 149962306a36Sopenharmony_ci dout("fill_trace reply is empty!\n"); 150062306a36Sopenharmony_ci if (rinfo->head->result == 0 && req->r_parent) 150162306a36Sopenharmony_ci ceph_invalidate_dir_request(req); 150262306a36Sopenharmony_ci return 0; 150362306a36Sopenharmony_ci } 150462306a36Sopenharmony_ci 150562306a36Sopenharmony_ci if (rinfo->head->is_dentry) { 150662306a36Sopenharmony_ci struct inode *dir = req->r_parent; 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci if (dir) { 150962306a36Sopenharmony_ci err = ceph_fill_inode(dir, NULL, &rinfo->diri, 151062306a36Sopenharmony_ci rinfo->dirfrag, session, -1, 151162306a36Sopenharmony_ci &req->r_caps_reservation); 151262306a36Sopenharmony_ci if (err < 0) 151362306a36Sopenharmony_ci goto done; 151462306a36Sopenharmony_ci } else { 151562306a36Sopenharmony_ci WARN_ON_ONCE(1); 151662306a36Sopenharmony_ci } 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && 151962306a36Sopenharmony_ci test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && 152062306a36Sopenharmony_ci !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 152162306a36Sopenharmony_ci bool is_nokey = false; 152262306a36Sopenharmony_ci struct qstr dname; 152362306a36Sopenharmony_ci struct dentry *dn, *parent; 152462306a36Sopenharmony_ci struct fscrypt_str oname = FSTR_INIT(NULL, 0); 152562306a36Sopenharmony_ci struct ceph_fname fname = { .dir = dir, 152662306a36Sopenharmony_ci .name = rinfo->dname, 152762306a36Sopenharmony_ci .ctext = rinfo->altname, 152862306a36Sopenharmony_ci .name_len = rinfo->dname_len, 152962306a36Sopenharmony_ci .ctext_len = rinfo->altname_len }; 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci BUG_ON(!rinfo->head->is_target); 153262306a36Sopenharmony_ci BUG_ON(req->r_dentry); 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci parent = d_find_any_alias(dir); 153562306a36Sopenharmony_ci BUG_ON(!parent); 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci err = ceph_fname_alloc_buffer(dir, &oname); 153862306a36Sopenharmony_ci if (err < 0) { 153962306a36Sopenharmony_ci dput(parent); 154062306a36Sopenharmony_ci goto done; 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey); 154462306a36Sopenharmony_ci if (err < 0) { 154562306a36Sopenharmony_ci dput(parent); 154662306a36Sopenharmony_ci ceph_fname_free_buffer(dir, &oname); 154762306a36Sopenharmony_ci goto done; 154862306a36Sopenharmony_ci } 154962306a36Sopenharmony_ci dname.name = oname.name; 155062306a36Sopenharmony_ci dname.len = oname.len; 155162306a36Sopenharmony_ci dname.hash = full_name_hash(parent, dname.name, dname.len); 155262306a36Sopenharmony_ci tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); 155362306a36Sopenharmony_ci tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); 155462306a36Sopenharmony_ciretry_lookup: 155562306a36Sopenharmony_ci dn = d_lookup(parent, &dname); 155662306a36Sopenharmony_ci dout("d_lookup on parent=%p name=%.*s got %p\n", 155762306a36Sopenharmony_ci parent, dname.len, dname.name, dn); 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci if (!dn) { 156062306a36Sopenharmony_ci dn = d_alloc(parent, &dname); 156162306a36Sopenharmony_ci dout("d_alloc %p '%.*s' = %p\n", parent, 156262306a36Sopenharmony_ci dname.len, dname.name, dn); 156362306a36Sopenharmony_ci if (!dn) { 156462306a36Sopenharmony_ci dput(parent); 156562306a36Sopenharmony_ci ceph_fname_free_buffer(dir, &oname); 156662306a36Sopenharmony_ci err = -ENOMEM; 156762306a36Sopenharmony_ci goto done; 156862306a36Sopenharmony_ci } 156962306a36Sopenharmony_ci if (is_nokey) { 157062306a36Sopenharmony_ci spin_lock(&dn->d_lock); 157162306a36Sopenharmony_ci dn->d_flags |= DCACHE_NOKEY_NAME; 157262306a36Sopenharmony_ci spin_unlock(&dn->d_lock); 157362306a36Sopenharmony_ci } 157462306a36Sopenharmony_ci err = 0; 157562306a36Sopenharmony_ci } else if (d_really_is_positive(dn) && 157662306a36Sopenharmony_ci (ceph_ino(d_inode(dn)) != tvino.ino || 157762306a36Sopenharmony_ci ceph_snap(d_inode(dn)) != tvino.snap)) { 157862306a36Sopenharmony_ci dout(" dn %p points to wrong inode %p\n", 157962306a36Sopenharmony_ci dn, d_inode(dn)); 158062306a36Sopenharmony_ci ceph_dir_clear_ordered(dir); 158162306a36Sopenharmony_ci d_delete(dn); 158262306a36Sopenharmony_ci dput(dn); 158362306a36Sopenharmony_ci goto retry_lookup; 158462306a36Sopenharmony_ci } 158562306a36Sopenharmony_ci ceph_fname_free_buffer(dir, &oname); 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci req->r_dentry = dn; 158862306a36Sopenharmony_ci dput(parent); 158962306a36Sopenharmony_ci } 159062306a36Sopenharmony_ci } 159162306a36Sopenharmony_ci 159262306a36Sopenharmony_ci if (rinfo->head->is_target) { 159362306a36Sopenharmony_ci /* Should be filled in by handle_reply */ 159462306a36Sopenharmony_ci BUG_ON(!req->r_target_inode); 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci in = req->r_target_inode; 159762306a36Sopenharmony_ci err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti, 159862306a36Sopenharmony_ci NULL, session, 159962306a36Sopenharmony_ci (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && 160062306a36Sopenharmony_ci !test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) && 160162306a36Sopenharmony_ci rinfo->head->result == 0) ? req->r_fmode : -1, 160262306a36Sopenharmony_ci &req->r_caps_reservation); 160362306a36Sopenharmony_ci if (err < 0) { 160462306a36Sopenharmony_ci pr_err("ceph_fill_inode badness %p %llx.%llx\n", 160562306a36Sopenharmony_ci in, ceph_vinop(in)); 160662306a36Sopenharmony_ci req->r_target_inode = NULL; 160762306a36Sopenharmony_ci if (in->i_state & I_NEW) 160862306a36Sopenharmony_ci discard_new_inode(in); 160962306a36Sopenharmony_ci else 161062306a36Sopenharmony_ci iput(in); 161162306a36Sopenharmony_ci goto done; 161262306a36Sopenharmony_ci } 161362306a36Sopenharmony_ci if (in->i_state & I_NEW) 161462306a36Sopenharmony_ci unlock_new_inode(in); 161562306a36Sopenharmony_ci } 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci /* 161862306a36Sopenharmony_ci * ignore null lease/binding on snapdir ENOENT, or else we 161962306a36Sopenharmony_ci * will have trouble splicing in the virtual snapdir later 162062306a36Sopenharmony_ci */ 162162306a36Sopenharmony_ci if (rinfo->head->is_dentry && 162262306a36Sopenharmony_ci !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && 162362306a36Sopenharmony_ci test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && 162462306a36Sopenharmony_ci (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, 162562306a36Sopenharmony_ci fsc->mount_options->snapdir_name, 162662306a36Sopenharmony_ci req->r_dentry->d_name.len))) { 162762306a36Sopenharmony_ci /* 162862306a36Sopenharmony_ci * lookup link rename : null -> possibly existing inode 162962306a36Sopenharmony_ci * mknod symlink mkdir : null -> new inode 163062306a36Sopenharmony_ci * unlink : linked -> null 163162306a36Sopenharmony_ci */ 163262306a36Sopenharmony_ci struct inode *dir = req->r_parent; 163362306a36Sopenharmony_ci struct dentry *dn = req->r_dentry; 163462306a36Sopenharmony_ci bool have_dir_cap, have_lease; 163562306a36Sopenharmony_ci 163662306a36Sopenharmony_ci BUG_ON(!dn); 163762306a36Sopenharmony_ci BUG_ON(!dir); 163862306a36Sopenharmony_ci BUG_ON(d_inode(dn->d_parent) != dir); 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci dvino.ino = le64_to_cpu(rinfo->diri.in->ino); 164162306a36Sopenharmony_ci dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci BUG_ON(ceph_ino(dir) != dvino.ino); 164462306a36Sopenharmony_ci BUG_ON(ceph_snap(dir) != dvino.snap); 164562306a36Sopenharmony_ci 164662306a36Sopenharmony_ci /* do we have a lease on the whole dir? */ 164762306a36Sopenharmony_ci have_dir_cap = 164862306a36Sopenharmony_ci (le32_to_cpu(rinfo->diri.in->cap.caps) & 164962306a36Sopenharmony_ci CEPH_CAP_FILE_SHARED); 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_ci /* do we have a dn lease? */ 165262306a36Sopenharmony_ci have_lease = have_dir_cap || 165362306a36Sopenharmony_ci le32_to_cpu(rinfo->dlease->duration_ms); 165462306a36Sopenharmony_ci if (!have_lease) 165562306a36Sopenharmony_ci dout("fill_trace no dentry lease or dir cap\n"); 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci /* rename? */ 165862306a36Sopenharmony_ci if (req->r_old_dentry && req->r_op == CEPH_MDS_OP_RENAME) { 165962306a36Sopenharmony_ci struct inode *olddir = req->r_old_dentry_dir; 166062306a36Sopenharmony_ci BUG_ON(!olddir); 166162306a36Sopenharmony_ci 166262306a36Sopenharmony_ci dout(" src %p '%pd' dst %p '%pd'\n", 166362306a36Sopenharmony_ci req->r_old_dentry, 166462306a36Sopenharmony_ci req->r_old_dentry, 166562306a36Sopenharmony_ci dn, dn); 166662306a36Sopenharmony_ci dout("fill_trace doing d_move %p -> %p\n", 166762306a36Sopenharmony_ci req->r_old_dentry, dn); 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci /* d_move screws up sibling dentries' offsets */ 167062306a36Sopenharmony_ci ceph_dir_clear_ordered(dir); 167162306a36Sopenharmony_ci ceph_dir_clear_ordered(olddir); 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci d_move(req->r_old_dentry, dn); 167462306a36Sopenharmony_ci dout(" src %p '%pd' dst %p '%pd'\n", 167562306a36Sopenharmony_ci req->r_old_dentry, 167662306a36Sopenharmony_ci req->r_old_dentry, 167762306a36Sopenharmony_ci dn, dn); 167862306a36Sopenharmony_ci 167962306a36Sopenharmony_ci /* ensure target dentry is invalidated, despite 168062306a36Sopenharmony_ci rehashing bug in vfs_rename_dir */ 168162306a36Sopenharmony_ci ceph_invalidate_dentry_lease(dn); 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_ci dout("dn %p gets new offset %lld\n", req->r_old_dentry, 168462306a36Sopenharmony_ci ceph_dentry(req->r_old_dentry)->offset); 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci /* swap r_dentry and r_old_dentry in case that 168762306a36Sopenharmony_ci * splice_dentry() gets called later. This is safe 168862306a36Sopenharmony_ci * because no other place will use them */ 168962306a36Sopenharmony_ci req->r_dentry = req->r_old_dentry; 169062306a36Sopenharmony_ci req->r_old_dentry = dn; 169162306a36Sopenharmony_ci dn = req->r_dentry; 169262306a36Sopenharmony_ci } 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci /* null dentry? */ 169562306a36Sopenharmony_ci if (!rinfo->head->is_target) { 169662306a36Sopenharmony_ci dout("fill_trace null dentry\n"); 169762306a36Sopenharmony_ci if (d_really_is_positive(dn)) { 169862306a36Sopenharmony_ci dout("d_delete %p\n", dn); 169962306a36Sopenharmony_ci ceph_dir_clear_ordered(dir); 170062306a36Sopenharmony_ci d_delete(dn); 170162306a36Sopenharmony_ci } else if (have_lease) { 170262306a36Sopenharmony_ci if (d_unhashed(dn)) 170362306a36Sopenharmony_ci d_add(dn, NULL); 170462306a36Sopenharmony_ci } 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci if (!d_unhashed(dn) && have_lease) 170762306a36Sopenharmony_ci update_dentry_lease(dir, dn, 170862306a36Sopenharmony_ci rinfo->dlease, session, 170962306a36Sopenharmony_ci req->r_request_started); 171062306a36Sopenharmony_ci goto done; 171162306a36Sopenharmony_ci } 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci /* attach proper inode */ 171462306a36Sopenharmony_ci if (d_really_is_negative(dn)) { 171562306a36Sopenharmony_ci ceph_dir_clear_ordered(dir); 171662306a36Sopenharmony_ci ihold(in); 171762306a36Sopenharmony_ci err = splice_dentry(&req->r_dentry, in); 171862306a36Sopenharmony_ci if (err < 0) 171962306a36Sopenharmony_ci goto done; 172062306a36Sopenharmony_ci dn = req->r_dentry; /* may have spliced */ 172162306a36Sopenharmony_ci } else if (d_really_is_positive(dn) && d_inode(dn) != in) { 172262306a36Sopenharmony_ci dout(" %p links to %p %llx.%llx, not %llx.%llx\n", 172362306a36Sopenharmony_ci dn, d_inode(dn), ceph_vinop(d_inode(dn)), 172462306a36Sopenharmony_ci ceph_vinop(in)); 172562306a36Sopenharmony_ci d_invalidate(dn); 172662306a36Sopenharmony_ci have_lease = false; 172762306a36Sopenharmony_ci } 172862306a36Sopenharmony_ci 172962306a36Sopenharmony_ci if (have_lease) { 173062306a36Sopenharmony_ci update_dentry_lease(dir, dn, 173162306a36Sopenharmony_ci rinfo->dlease, session, 173262306a36Sopenharmony_ci req->r_request_started); 173362306a36Sopenharmony_ci } 173462306a36Sopenharmony_ci dout(" final dn %p\n", dn); 173562306a36Sopenharmony_ci } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || 173662306a36Sopenharmony_ci req->r_op == CEPH_MDS_OP_MKSNAP) && 173762306a36Sopenharmony_ci test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && 173862306a36Sopenharmony_ci !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 173962306a36Sopenharmony_ci struct inode *dir = req->r_parent; 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci /* fill out a snapdir LOOKUPSNAP dentry */ 174262306a36Sopenharmony_ci BUG_ON(!dir); 174362306a36Sopenharmony_ci BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR); 174462306a36Sopenharmony_ci BUG_ON(!req->r_dentry); 174562306a36Sopenharmony_ci dout(" linking snapped dir %p to dn %p\n", in, req->r_dentry); 174662306a36Sopenharmony_ci ceph_dir_clear_ordered(dir); 174762306a36Sopenharmony_ci ihold(in); 174862306a36Sopenharmony_ci err = splice_dentry(&req->r_dentry, in); 174962306a36Sopenharmony_ci if (err < 0) 175062306a36Sopenharmony_ci goto done; 175162306a36Sopenharmony_ci } else if (rinfo->head->is_dentry && req->r_dentry) { 175262306a36Sopenharmony_ci /* parent inode is not locked, be carefull */ 175362306a36Sopenharmony_ci struct ceph_vino *ptvino = NULL; 175462306a36Sopenharmony_ci dvino.ino = le64_to_cpu(rinfo->diri.in->ino); 175562306a36Sopenharmony_ci dvino.snap = le64_to_cpu(rinfo->diri.in->snapid); 175662306a36Sopenharmony_ci if (rinfo->head->is_target) { 175762306a36Sopenharmony_ci tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); 175862306a36Sopenharmony_ci tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); 175962306a36Sopenharmony_ci ptvino = &tvino; 176062306a36Sopenharmony_ci } 176162306a36Sopenharmony_ci update_dentry_lease_careful(req->r_dentry, rinfo->dlease, 176262306a36Sopenharmony_ci session, req->r_request_started, 176362306a36Sopenharmony_ci rinfo->dname, rinfo->dname_len, 176462306a36Sopenharmony_ci &dvino, ptvino); 176562306a36Sopenharmony_ci } 176662306a36Sopenharmony_cidone: 176762306a36Sopenharmony_ci dout("fill_trace done err=%d\n", err); 176862306a36Sopenharmony_ci return err; 176962306a36Sopenharmony_ci} 177062306a36Sopenharmony_ci 177162306a36Sopenharmony_ci/* 177262306a36Sopenharmony_ci * Prepopulate our cache with readdir results, leases, etc. 177362306a36Sopenharmony_ci */ 177462306a36Sopenharmony_cistatic int readdir_prepopulate_inodes_only(struct ceph_mds_request *req, 177562306a36Sopenharmony_ci struct ceph_mds_session *session) 177662306a36Sopenharmony_ci{ 177762306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 177862306a36Sopenharmony_ci int i, err = 0; 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci for (i = 0; i < rinfo->dir_nr; i++) { 178162306a36Sopenharmony_ci struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; 178262306a36Sopenharmony_ci struct ceph_vino vino; 178362306a36Sopenharmony_ci struct inode *in; 178462306a36Sopenharmony_ci int rc; 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ci vino.ino = le64_to_cpu(rde->inode.in->ino); 178762306a36Sopenharmony_ci vino.snap = le64_to_cpu(rde->inode.in->snapid); 178862306a36Sopenharmony_ci 178962306a36Sopenharmony_ci in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL); 179062306a36Sopenharmony_ci if (IS_ERR(in)) { 179162306a36Sopenharmony_ci err = PTR_ERR(in); 179262306a36Sopenharmony_ci dout("new_inode badness got %d\n", err); 179362306a36Sopenharmony_ci continue; 179462306a36Sopenharmony_ci } 179562306a36Sopenharmony_ci rc = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, 179662306a36Sopenharmony_ci -1, &req->r_caps_reservation); 179762306a36Sopenharmony_ci if (rc < 0) { 179862306a36Sopenharmony_ci pr_err("ceph_fill_inode badness on %p got %d\n", 179962306a36Sopenharmony_ci in, rc); 180062306a36Sopenharmony_ci err = rc; 180162306a36Sopenharmony_ci if (in->i_state & I_NEW) { 180262306a36Sopenharmony_ci ihold(in); 180362306a36Sopenharmony_ci discard_new_inode(in); 180462306a36Sopenharmony_ci } 180562306a36Sopenharmony_ci } else if (in->i_state & I_NEW) { 180662306a36Sopenharmony_ci unlock_new_inode(in); 180762306a36Sopenharmony_ci } 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci iput(in); 181062306a36Sopenharmony_ci } 181162306a36Sopenharmony_ci 181262306a36Sopenharmony_ci return err; 181362306a36Sopenharmony_ci} 181462306a36Sopenharmony_ci 181562306a36Sopenharmony_civoid ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl) 181662306a36Sopenharmony_ci{ 181762306a36Sopenharmony_ci if (ctl->page) { 181862306a36Sopenharmony_ci kunmap(ctl->page); 181962306a36Sopenharmony_ci put_page(ctl->page); 182062306a36Sopenharmony_ci ctl->page = NULL; 182162306a36Sopenharmony_ci } 182262306a36Sopenharmony_ci} 182362306a36Sopenharmony_ci 182462306a36Sopenharmony_cistatic int fill_readdir_cache(struct inode *dir, struct dentry *dn, 182562306a36Sopenharmony_ci struct ceph_readdir_cache_control *ctl, 182662306a36Sopenharmony_ci struct ceph_mds_request *req) 182762306a36Sopenharmony_ci{ 182862306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(dir); 182962306a36Sopenharmony_ci unsigned nsize = PAGE_SIZE / sizeof(struct dentry*); 183062306a36Sopenharmony_ci unsigned idx = ctl->index % nsize; 183162306a36Sopenharmony_ci pgoff_t pgoff = ctl->index / nsize; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci if (!ctl->page || pgoff != page_index(ctl->page)) { 183462306a36Sopenharmony_ci ceph_readdir_cache_release(ctl); 183562306a36Sopenharmony_ci if (idx == 0) 183662306a36Sopenharmony_ci ctl->page = grab_cache_page(&dir->i_data, pgoff); 183762306a36Sopenharmony_ci else 183862306a36Sopenharmony_ci ctl->page = find_lock_page(&dir->i_data, pgoff); 183962306a36Sopenharmony_ci if (!ctl->page) { 184062306a36Sopenharmony_ci ctl->index = -1; 184162306a36Sopenharmony_ci return idx == 0 ? -ENOMEM : 0; 184262306a36Sopenharmony_ci } 184362306a36Sopenharmony_ci /* reading/filling the cache are serialized by 184462306a36Sopenharmony_ci * i_rwsem, no need to use page lock */ 184562306a36Sopenharmony_ci unlock_page(ctl->page); 184662306a36Sopenharmony_ci ctl->dentries = kmap(ctl->page); 184762306a36Sopenharmony_ci if (idx == 0) 184862306a36Sopenharmony_ci memset(ctl->dentries, 0, PAGE_SIZE); 184962306a36Sopenharmony_ci } 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) && 185262306a36Sopenharmony_ci req->r_dir_ordered_cnt == atomic64_read(&ci->i_ordered_count)) { 185362306a36Sopenharmony_ci dout("readdir cache dn %p idx %d\n", dn, ctl->index); 185462306a36Sopenharmony_ci ctl->dentries[idx] = dn; 185562306a36Sopenharmony_ci ctl->index++; 185662306a36Sopenharmony_ci } else { 185762306a36Sopenharmony_ci dout("disable readdir cache\n"); 185862306a36Sopenharmony_ci ctl->index = -1; 185962306a36Sopenharmony_ci } 186062306a36Sopenharmony_ci return 0; 186162306a36Sopenharmony_ci} 186262306a36Sopenharmony_ci 186362306a36Sopenharmony_ciint ceph_readdir_prepopulate(struct ceph_mds_request *req, 186462306a36Sopenharmony_ci struct ceph_mds_session *session) 186562306a36Sopenharmony_ci{ 186662306a36Sopenharmony_ci struct dentry *parent = req->r_dentry; 186762306a36Sopenharmony_ci struct inode *inode = d_inode(parent); 186862306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 186962306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 187062306a36Sopenharmony_ci struct qstr dname; 187162306a36Sopenharmony_ci struct dentry *dn; 187262306a36Sopenharmony_ci struct inode *in; 187362306a36Sopenharmony_ci int err = 0, skipped = 0, ret, i; 187462306a36Sopenharmony_ci u32 frag = le32_to_cpu(req->r_args.readdir.frag); 187562306a36Sopenharmony_ci u32 last_hash = 0; 187662306a36Sopenharmony_ci u32 fpos_offset; 187762306a36Sopenharmony_ci struct ceph_readdir_cache_control cache_ctl = {}; 187862306a36Sopenharmony_ci 187962306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) 188062306a36Sopenharmony_ci return readdir_prepopulate_inodes_only(req, session); 188162306a36Sopenharmony_ci 188262306a36Sopenharmony_ci if (rinfo->hash_order) { 188362306a36Sopenharmony_ci if (req->r_path2) { 188462306a36Sopenharmony_ci last_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, 188562306a36Sopenharmony_ci req->r_path2, 188662306a36Sopenharmony_ci strlen(req->r_path2)); 188762306a36Sopenharmony_ci last_hash = ceph_frag_value(last_hash); 188862306a36Sopenharmony_ci } else if (rinfo->offset_hash) { 188962306a36Sopenharmony_ci /* mds understands offset_hash */ 189062306a36Sopenharmony_ci WARN_ON_ONCE(req->r_readdir_offset != 2); 189162306a36Sopenharmony_ci last_hash = le32_to_cpu(req->r_args.readdir.offset_hash); 189262306a36Sopenharmony_ci } 189362306a36Sopenharmony_ci } 189462306a36Sopenharmony_ci 189562306a36Sopenharmony_ci if (rinfo->dir_dir && 189662306a36Sopenharmony_ci le32_to_cpu(rinfo->dir_dir->frag) != frag) { 189762306a36Sopenharmony_ci dout("readdir_prepopulate got new frag %x -> %x\n", 189862306a36Sopenharmony_ci frag, le32_to_cpu(rinfo->dir_dir->frag)); 189962306a36Sopenharmony_ci frag = le32_to_cpu(rinfo->dir_dir->frag); 190062306a36Sopenharmony_ci if (!rinfo->hash_order) 190162306a36Sopenharmony_ci req->r_readdir_offset = 2; 190262306a36Sopenharmony_ci } 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci if (le32_to_cpu(rinfo->head->op) == CEPH_MDS_OP_LSSNAP) { 190562306a36Sopenharmony_ci dout("readdir_prepopulate %d items under SNAPDIR dn %p\n", 190662306a36Sopenharmony_ci rinfo->dir_nr, parent); 190762306a36Sopenharmony_ci } else { 190862306a36Sopenharmony_ci dout("readdir_prepopulate %d items under dn %p\n", 190962306a36Sopenharmony_ci rinfo->dir_nr, parent); 191062306a36Sopenharmony_ci if (rinfo->dir_dir) 191162306a36Sopenharmony_ci ceph_fill_dirfrag(d_inode(parent), rinfo->dir_dir); 191262306a36Sopenharmony_ci 191362306a36Sopenharmony_ci if (ceph_frag_is_leftmost(frag) && 191462306a36Sopenharmony_ci req->r_readdir_offset == 2 && 191562306a36Sopenharmony_ci !(rinfo->hash_order && last_hash)) { 191662306a36Sopenharmony_ci /* note dir version at start of readdir so we can 191762306a36Sopenharmony_ci * tell if any dentries get dropped */ 191862306a36Sopenharmony_ci req->r_dir_release_cnt = 191962306a36Sopenharmony_ci atomic64_read(&ci->i_release_count); 192062306a36Sopenharmony_ci req->r_dir_ordered_cnt = 192162306a36Sopenharmony_ci atomic64_read(&ci->i_ordered_count); 192262306a36Sopenharmony_ci req->r_readdir_cache_idx = 0; 192362306a36Sopenharmony_ci } 192462306a36Sopenharmony_ci } 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci cache_ctl.index = req->r_readdir_cache_idx; 192762306a36Sopenharmony_ci fpos_offset = req->r_readdir_offset; 192862306a36Sopenharmony_ci 192962306a36Sopenharmony_ci /* FIXME: release caps/leases if error occurs */ 193062306a36Sopenharmony_ci for (i = 0; i < rinfo->dir_nr; i++) { 193162306a36Sopenharmony_ci struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i; 193262306a36Sopenharmony_ci struct ceph_vino tvino; 193362306a36Sopenharmony_ci 193462306a36Sopenharmony_ci dname.name = rde->name; 193562306a36Sopenharmony_ci dname.len = rde->name_len; 193662306a36Sopenharmony_ci dname.hash = full_name_hash(parent, dname.name, dname.len); 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_ci tvino.ino = le64_to_cpu(rde->inode.in->ino); 193962306a36Sopenharmony_ci tvino.snap = le64_to_cpu(rde->inode.in->snapid); 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci if (rinfo->hash_order) { 194262306a36Sopenharmony_ci u32 hash = ceph_frag_value(rde->raw_hash); 194362306a36Sopenharmony_ci if (hash != last_hash) 194462306a36Sopenharmony_ci fpos_offset = 2; 194562306a36Sopenharmony_ci last_hash = hash; 194662306a36Sopenharmony_ci rde->offset = ceph_make_fpos(hash, fpos_offset++, true); 194762306a36Sopenharmony_ci } else { 194862306a36Sopenharmony_ci rde->offset = ceph_make_fpos(frag, fpos_offset++, false); 194962306a36Sopenharmony_ci } 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ciretry_lookup: 195262306a36Sopenharmony_ci dn = d_lookup(parent, &dname); 195362306a36Sopenharmony_ci dout("d_lookup on parent=%p name=%.*s got %p\n", 195462306a36Sopenharmony_ci parent, dname.len, dname.name, dn); 195562306a36Sopenharmony_ci 195662306a36Sopenharmony_ci if (!dn) { 195762306a36Sopenharmony_ci dn = d_alloc(parent, &dname); 195862306a36Sopenharmony_ci dout("d_alloc %p '%.*s' = %p\n", parent, 195962306a36Sopenharmony_ci dname.len, dname.name, dn); 196062306a36Sopenharmony_ci if (!dn) { 196162306a36Sopenharmony_ci dout("d_alloc badness\n"); 196262306a36Sopenharmony_ci err = -ENOMEM; 196362306a36Sopenharmony_ci goto out; 196462306a36Sopenharmony_ci } 196562306a36Sopenharmony_ci if (rde->is_nokey) { 196662306a36Sopenharmony_ci spin_lock(&dn->d_lock); 196762306a36Sopenharmony_ci dn->d_flags |= DCACHE_NOKEY_NAME; 196862306a36Sopenharmony_ci spin_unlock(&dn->d_lock); 196962306a36Sopenharmony_ci } 197062306a36Sopenharmony_ci } else if (d_really_is_positive(dn) && 197162306a36Sopenharmony_ci (ceph_ino(d_inode(dn)) != tvino.ino || 197262306a36Sopenharmony_ci ceph_snap(d_inode(dn)) != tvino.snap)) { 197362306a36Sopenharmony_ci struct ceph_dentry_info *di = ceph_dentry(dn); 197462306a36Sopenharmony_ci dout(" dn %p points to wrong inode %p\n", 197562306a36Sopenharmony_ci dn, d_inode(dn)); 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_ci spin_lock(&dn->d_lock); 197862306a36Sopenharmony_ci if (di->offset > 0 && 197962306a36Sopenharmony_ci di->lease_shared_gen == 198062306a36Sopenharmony_ci atomic_read(&ci->i_shared_gen)) { 198162306a36Sopenharmony_ci __ceph_dir_clear_ordered(ci); 198262306a36Sopenharmony_ci di->offset = 0; 198362306a36Sopenharmony_ci } 198462306a36Sopenharmony_ci spin_unlock(&dn->d_lock); 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci d_delete(dn); 198762306a36Sopenharmony_ci dput(dn); 198862306a36Sopenharmony_ci goto retry_lookup; 198962306a36Sopenharmony_ci } 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci /* inode */ 199262306a36Sopenharmony_ci if (d_really_is_positive(dn)) { 199362306a36Sopenharmony_ci in = d_inode(dn); 199462306a36Sopenharmony_ci } else { 199562306a36Sopenharmony_ci in = ceph_get_inode(parent->d_sb, tvino, NULL); 199662306a36Sopenharmony_ci if (IS_ERR(in)) { 199762306a36Sopenharmony_ci dout("new_inode badness\n"); 199862306a36Sopenharmony_ci d_drop(dn); 199962306a36Sopenharmony_ci dput(dn); 200062306a36Sopenharmony_ci err = PTR_ERR(in); 200162306a36Sopenharmony_ci goto out; 200262306a36Sopenharmony_ci } 200362306a36Sopenharmony_ci } 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci ret = ceph_fill_inode(in, NULL, &rde->inode, NULL, session, 200662306a36Sopenharmony_ci -1, &req->r_caps_reservation); 200762306a36Sopenharmony_ci if (ret < 0) { 200862306a36Sopenharmony_ci pr_err("ceph_fill_inode badness on %p\n", in); 200962306a36Sopenharmony_ci if (d_really_is_negative(dn)) { 201062306a36Sopenharmony_ci if (in->i_state & I_NEW) { 201162306a36Sopenharmony_ci ihold(in); 201262306a36Sopenharmony_ci discard_new_inode(in); 201362306a36Sopenharmony_ci } 201462306a36Sopenharmony_ci iput(in); 201562306a36Sopenharmony_ci } 201662306a36Sopenharmony_ci d_drop(dn); 201762306a36Sopenharmony_ci err = ret; 201862306a36Sopenharmony_ci goto next_item; 201962306a36Sopenharmony_ci } 202062306a36Sopenharmony_ci if (in->i_state & I_NEW) 202162306a36Sopenharmony_ci unlock_new_inode(in); 202262306a36Sopenharmony_ci 202362306a36Sopenharmony_ci if (d_really_is_negative(dn)) { 202462306a36Sopenharmony_ci if (ceph_security_xattr_deadlock(in)) { 202562306a36Sopenharmony_ci dout(" skip splicing dn %p to inode %p" 202662306a36Sopenharmony_ci " (security xattr deadlock)\n", dn, in); 202762306a36Sopenharmony_ci iput(in); 202862306a36Sopenharmony_ci skipped++; 202962306a36Sopenharmony_ci goto next_item; 203062306a36Sopenharmony_ci } 203162306a36Sopenharmony_ci 203262306a36Sopenharmony_ci err = splice_dentry(&dn, in); 203362306a36Sopenharmony_ci if (err < 0) 203462306a36Sopenharmony_ci goto next_item; 203562306a36Sopenharmony_ci } 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_ci ceph_dentry(dn)->offset = rde->offset; 203862306a36Sopenharmony_ci 203962306a36Sopenharmony_ci update_dentry_lease(d_inode(parent), dn, 204062306a36Sopenharmony_ci rde->lease, req->r_session, 204162306a36Sopenharmony_ci req->r_request_started); 204262306a36Sopenharmony_ci 204362306a36Sopenharmony_ci if (err == 0 && skipped == 0 && cache_ctl.index >= 0) { 204462306a36Sopenharmony_ci ret = fill_readdir_cache(d_inode(parent), dn, 204562306a36Sopenharmony_ci &cache_ctl, req); 204662306a36Sopenharmony_ci if (ret < 0) 204762306a36Sopenharmony_ci err = ret; 204862306a36Sopenharmony_ci } 204962306a36Sopenharmony_cinext_item: 205062306a36Sopenharmony_ci dput(dn); 205162306a36Sopenharmony_ci } 205262306a36Sopenharmony_ciout: 205362306a36Sopenharmony_ci if (err == 0 && skipped == 0) { 205462306a36Sopenharmony_ci set_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags); 205562306a36Sopenharmony_ci req->r_readdir_cache_idx = cache_ctl.index; 205662306a36Sopenharmony_ci } 205762306a36Sopenharmony_ci ceph_readdir_cache_release(&cache_ctl); 205862306a36Sopenharmony_ci dout("readdir_prepopulate done\n"); 205962306a36Sopenharmony_ci return err; 206062306a36Sopenharmony_ci} 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_cibool ceph_inode_set_size(struct inode *inode, loff_t size) 206362306a36Sopenharmony_ci{ 206462306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 206562306a36Sopenharmony_ci bool ret; 206662306a36Sopenharmony_ci 206762306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 206862306a36Sopenharmony_ci dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size); 206962306a36Sopenharmony_ci i_size_write(inode, size); 207062306a36Sopenharmony_ci ceph_fscache_update(inode); 207162306a36Sopenharmony_ci inode->i_blocks = calc_inode_blocks(size); 207262306a36Sopenharmony_ci 207362306a36Sopenharmony_ci ret = __ceph_should_report_size(ci); 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci return ret; 207862306a36Sopenharmony_ci} 207962306a36Sopenharmony_ci 208062306a36Sopenharmony_civoid ceph_queue_inode_work(struct inode *inode, int work_bit) 208162306a36Sopenharmony_ci{ 208262306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 208362306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 208462306a36Sopenharmony_ci set_bit(work_bit, &ci->i_work_mask); 208562306a36Sopenharmony_ci 208662306a36Sopenharmony_ci ihold(inode); 208762306a36Sopenharmony_ci if (queue_work(fsc->inode_wq, &ci->i_work)) { 208862306a36Sopenharmony_ci dout("queue_inode_work %p, mask=%lx\n", inode, ci->i_work_mask); 208962306a36Sopenharmony_ci } else { 209062306a36Sopenharmony_ci dout("queue_inode_work %p already queued, mask=%lx\n", 209162306a36Sopenharmony_ci inode, ci->i_work_mask); 209262306a36Sopenharmony_ci iput(inode); 209362306a36Sopenharmony_ci } 209462306a36Sopenharmony_ci} 209562306a36Sopenharmony_ci 209662306a36Sopenharmony_cistatic void ceph_do_invalidate_pages(struct inode *inode) 209762306a36Sopenharmony_ci{ 209862306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 209962306a36Sopenharmony_ci u32 orig_gen; 210062306a36Sopenharmony_ci int check = 0; 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci ceph_fscache_invalidate(inode, false); 210362306a36Sopenharmony_ci 210462306a36Sopenharmony_ci mutex_lock(&ci->i_truncate_mutex); 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) { 210762306a36Sopenharmony_ci pr_warn_ratelimited("%s: inode %llx.%llx is shut down\n", 210862306a36Sopenharmony_ci __func__, ceph_vinop(inode)); 210962306a36Sopenharmony_ci mapping_set_error(inode->i_mapping, -EIO); 211062306a36Sopenharmony_ci truncate_pagecache(inode, 0); 211162306a36Sopenharmony_ci mutex_unlock(&ci->i_truncate_mutex); 211262306a36Sopenharmony_ci goto out; 211362306a36Sopenharmony_ci } 211462306a36Sopenharmony_ci 211562306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 211662306a36Sopenharmony_ci dout("invalidate_pages %p gen %d revoking %d\n", inode, 211762306a36Sopenharmony_ci ci->i_rdcache_gen, ci->i_rdcache_revoking); 211862306a36Sopenharmony_ci if (ci->i_rdcache_revoking != ci->i_rdcache_gen) { 211962306a36Sopenharmony_ci if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE)) 212062306a36Sopenharmony_ci check = 1; 212162306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 212262306a36Sopenharmony_ci mutex_unlock(&ci->i_truncate_mutex); 212362306a36Sopenharmony_ci goto out; 212462306a36Sopenharmony_ci } 212562306a36Sopenharmony_ci orig_gen = ci->i_rdcache_gen; 212662306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci if (invalidate_inode_pages2(inode->i_mapping) < 0) { 212962306a36Sopenharmony_ci pr_err("invalidate_inode_pages2 %llx.%llx failed\n", 213062306a36Sopenharmony_ci ceph_vinop(inode)); 213162306a36Sopenharmony_ci } 213262306a36Sopenharmony_ci 213362306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 213462306a36Sopenharmony_ci if (orig_gen == ci->i_rdcache_gen && 213562306a36Sopenharmony_ci orig_gen == ci->i_rdcache_revoking) { 213662306a36Sopenharmony_ci dout("invalidate_pages %p gen %d successful\n", inode, 213762306a36Sopenharmony_ci ci->i_rdcache_gen); 213862306a36Sopenharmony_ci ci->i_rdcache_revoking--; 213962306a36Sopenharmony_ci check = 1; 214062306a36Sopenharmony_ci } else { 214162306a36Sopenharmony_ci dout("invalidate_pages %p gen %d raced, now %d revoking %d\n", 214262306a36Sopenharmony_ci inode, orig_gen, ci->i_rdcache_gen, 214362306a36Sopenharmony_ci ci->i_rdcache_revoking); 214462306a36Sopenharmony_ci if (__ceph_caps_revoking_other(ci, NULL, CEPH_CAP_FILE_CACHE)) 214562306a36Sopenharmony_ci check = 1; 214662306a36Sopenharmony_ci } 214762306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 214862306a36Sopenharmony_ci mutex_unlock(&ci->i_truncate_mutex); 214962306a36Sopenharmony_ciout: 215062306a36Sopenharmony_ci if (check) 215162306a36Sopenharmony_ci ceph_check_caps(ci, 0); 215262306a36Sopenharmony_ci} 215362306a36Sopenharmony_ci 215462306a36Sopenharmony_ci/* 215562306a36Sopenharmony_ci * Make sure any pending truncation is applied before doing anything 215662306a36Sopenharmony_ci * that may depend on it. 215762306a36Sopenharmony_ci */ 215862306a36Sopenharmony_civoid __ceph_do_pending_vmtruncate(struct inode *inode) 215962306a36Sopenharmony_ci{ 216062306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 216162306a36Sopenharmony_ci u64 to; 216262306a36Sopenharmony_ci int wrbuffer_refs, finish = 0; 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci mutex_lock(&ci->i_truncate_mutex); 216562306a36Sopenharmony_ciretry: 216662306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 216762306a36Sopenharmony_ci if (ci->i_truncate_pending == 0) { 216862306a36Sopenharmony_ci dout("%s %p none pending\n", __func__, inode); 216962306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 217062306a36Sopenharmony_ci mutex_unlock(&ci->i_truncate_mutex); 217162306a36Sopenharmony_ci return; 217262306a36Sopenharmony_ci } 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci /* 217562306a36Sopenharmony_ci * make sure any dirty snapped pages are flushed before we 217662306a36Sopenharmony_ci * possibly truncate them.. so write AND block! 217762306a36Sopenharmony_ci */ 217862306a36Sopenharmony_ci if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) { 217962306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 218062306a36Sopenharmony_ci dout("%s %p flushing snaps first\n", __func__, inode); 218162306a36Sopenharmony_ci filemap_write_and_wait_range(&inode->i_data, 0, 218262306a36Sopenharmony_ci inode->i_sb->s_maxbytes); 218362306a36Sopenharmony_ci goto retry; 218462306a36Sopenharmony_ci } 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci /* there should be no reader or writer */ 218762306a36Sopenharmony_ci WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref); 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ci to = ci->i_truncate_pagecache_size; 219062306a36Sopenharmony_ci wrbuffer_refs = ci->i_wrbuffer_ref; 219162306a36Sopenharmony_ci dout("%s %p (%d) to %lld\n", __func__, inode, 219262306a36Sopenharmony_ci ci->i_truncate_pending, to); 219362306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 219462306a36Sopenharmony_ci 219562306a36Sopenharmony_ci ceph_fscache_resize(inode, to); 219662306a36Sopenharmony_ci truncate_pagecache(inode, to); 219762306a36Sopenharmony_ci 219862306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 219962306a36Sopenharmony_ci if (to == ci->i_truncate_pagecache_size) { 220062306a36Sopenharmony_ci ci->i_truncate_pending = 0; 220162306a36Sopenharmony_ci finish = 1; 220262306a36Sopenharmony_ci } 220362306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 220462306a36Sopenharmony_ci if (!finish) 220562306a36Sopenharmony_ci goto retry; 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci mutex_unlock(&ci->i_truncate_mutex); 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci if (wrbuffer_refs == 0) 221062306a36Sopenharmony_ci ceph_check_caps(ci, 0); 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 221362306a36Sopenharmony_ci} 221462306a36Sopenharmony_ci 221562306a36Sopenharmony_cistatic void ceph_inode_work(struct work_struct *work) 221662306a36Sopenharmony_ci{ 221762306a36Sopenharmony_ci struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info, 221862306a36Sopenharmony_ci i_work); 221962306a36Sopenharmony_ci struct inode *inode = &ci->netfs.inode; 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) { 222262306a36Sopenharmony_ci dout("writeback %p\n", inode); 222362306a36Sopenharmony_ci filemap_fdatawrite(&inode->i_data); 222462306a36Sopenharmony_ci } 222562306a36Sopenharmony_ci if (test_and_clear_bit(CEPH_I_WORK_INVALIDATE_PAGES, &ci->i_work_mask)) 222662306a36Sopenharmony_ci ceph_do_invalidate_pages(inode); 222762306a36Sopenharmony_ci 222862306a36Sopenharmony_ci if (test_and_clear_bit(CEPH_I_WORK_VMTRUNCATE, &ci->i_work_mask)) 222962306a36Sopenharmony_ci __ceph_do_pending_vmtruncate(inode); 223062306a36Sopenharmony_ci 223162306a36Sopenharmony_ci if (test_and_clear_bit(CEPH_I_WORK_CHECK_CAPS, &ci->i_work_mask)) 223262306a36Sopenharmony_ci ceph_check_caps(ci, 0); 223362306a36Sopenharmony_ci 223462306a36Sopenharmony_ci if (test_and_clear_bit(CEPH_I_WORK_FLUSH_SNAPS, &ci->i_work_mask)) 223562306a36Sopenharmony_ci ceph_flush_snaps(ci, NULL); 223662306a36Sopenharmony_ci 223762306a36Sopenharmony_ci iput(inode); 223862306a36Sopenharmony_ci} 223962306a36Sopenharmony_ci 224062306a36Sopenharmony_cistatic const char *ceph_encrypted_get_link(struct dentry *dentry, 224162306a36Sopenharmony_ci struct inode *inode, 224262306a36Sopenharmony_ci struct delayed_call *done) 224362306a36Sopenharmony_ci{ 224462306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 224562306a36Sopenharmony_ci 224662306a36Sopenharmony_ci if (!dentry) 224762306a36Sopenharmony_ci return ERR_PTR(-ECHILD); 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci return fscrypt_get_symlink(inode, ci->i_symlink, i_size_read(inode), 225062306a36Sopenharmony_ci done); 225162306a36Sopenharmony_ci} 225262306a36Sopenharmony_ci 225362306a36Sopenharmony_cistatic int ceph_encrypted_symlink_getattr(struct mnt_idmap *idmap, 225462306a36Sopenharmony_ci const struct path *path, 225562306a36Sopenharmony_ci struct kstat *stat, u32 request_mask, 225662306a36Sopenharmony_ci unsigned int query_flags) 225762306a36Sopenharmony_ci{ 225862306a36Sopenharmony_ci int ret; 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci ret = ceph_getattr(idmap, path, stat, request_mask, query_flags); 226162306a36Sopenharmony_ci if (ret) 226262306a36Sopenharmony_ci return ret; 226362306a36Sopenharmony_ci return fscrypt_symlink_getattr(path, stat); 226462306a36Sopenharmony_ci} 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_ci/* 226762306a36Sopenharmony_ci * symlinks 226862306a36Sopenharmony_ci */ 226962306a36Sopenharmony_cistatic const struct inode_operations ceph_symlink_iops = { 227062306a36Sopenharmony_ci .get_link = simple_get_link, 227162306a36Sopenharmony_ci .setattr = ceph_setattr, 227262306a36Sopenharmony_ci .getattr = ceph_getattr, 227362306a36Sopenharmony_ci .listxattr = ceph_listxattr, 227462306a36Sopenharmony_ci}; 227562306a36Sopenharmony_ci 227662306a36Sopenharmony_cistatic const struct inode_operations ceph_encrypted_symlink_iops = { 227762306a36Sopenharmony_ci .get_link = ceph_encrypted_get_link, 227862306a36Sopenharmony_ci .setattr = ceph_setattr, 227962306a36Sopenharmony_ci .getattr = ceph_encrypted_symlink_getattr, 228062306a36Sopenharmony_ci .listxattr = ceph_listxattr, 228162306a36Sopenharmony_ci}; 228262306a36Sopenharmony_ci 228362306a36Sopenharmony_ci/* 228462306a36Sopenharmony_ci * Transfer the encrypted last block to the MDS and the MDS 228562306a36Sopenharmony_ci * will help update it when truncating a smaller size. 228662306a36Sopenharmony_ci * 228762306a36Sopenharmony_ci * We don't support a PAGE_SIZE that is smaller than the 228862306a36Sopenharmony_ci * CEPH_FSCRYPT_BLOCK_SIZE. 228962306a36Sopenharmony_ci */ 229062306a36Sopenharmony_cistatic int fill_fscrypt_truncate(struct inode *inode, 229162306a36Sopenharmony_ci struct ceph_mds_request *req, 229262306a36Sopenharmony_ci struct iattr *attr) 229362306a36Sopenharmony_ci{ 229462306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 229562306a36Sopenharmony_ci int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE; 229662306a36Sopenharmony_ci loff_t pos, orig_pos = round_down(attr->ia_size, 229762306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE); 229862306a36Sopenharmony_ci u64 block = orig_pos >> CEPH_FSCRYPT_BLOCK_SHIFT; 229962306a36Sopenharmony_ci struct ceph_pagelist *pagelist = NULL; 230062306a36Sopenharmony_ci struct kvec iov = {0}; 230162306a36Sopenharmony_ci struct iov_iter iter; 230262306a36Sopenharmony_ci struct page *page = NULL; 230362306a36Sopenharmony_ci struct ceph_fscrypt_truncate_size_header header; 230462306a36Sopenharmony_ci int retry_op = 0; 230562306a36Sopenharmony_ci int len = CEPH_FSCRYPT_BLOCK_SIZE; 230662306a36Sopenharmony_ci loff_t i_size = i_size_read(inode); 230762306a36Sopenharmony_ci int got, ret, issued; 230862306a36Sopenharmony_ci u64 objver; 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_ci ret = __ceph_get_caps(inode, NULL, CEPH_CAP_FILE_RD, 0, -1, &got); 231162306a36Sopenharmony_ci if (ret < 0) 231262306a36Sopenharmony_ci return ret; 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_ci issued = __ceph_caps_issued(ci, NULL); 231562306a36Sopenharmony_ci 231662306a36Sopenharmony_ci dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__, 231762306a36Sopenharmony_ci i_size, attr->ia_size, ceph_cap_string(got), 231862306a36Sopenharmony_ci ceph_cap_string(issued)); 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_ci /* Try to writeback the dirty pagecaches */ 232162306a36Sopenharmony_ci if (issued & (CEPH_CAP_FILE_BUFFER)) { 232262306a36Sopenharmony_ci loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1; 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci ret = filemap_write_and_wait_range(inode->i_mapping, 232562306a36Sopenharmony_ci orig_pos, lend); 232662306a36Sopenharmony_ci if (ret < 0) 232762306a36Sopenharmony_ci goto out; 232862306a36Sopenharmony_ci } 232962306a36Sopenharmony_ci 233062306a36Sopenharmony_ci page = __page_cache_alloc(GFP_KERNEL); 233162306a36Sopenharmony_ci if (page == NULL) { 233262306a36Sopenharmony_ci ret = -ENOMEM; 233362306a36Sopenharmony_ci goto out; 233462306a36Sopenharmony_ci } 233562306a36Sopenharmony_ci 233662306a36Sopenharmony_ci pagelist = ceph_pagelist_alloc(GFP_KERNEL); 233762306a36Sopenharmony_ci if (!pagelist) { 233862306a36Sopenharmony_ci ret = -ENOMEM; 233962306a36Sopenharmony_ci goto out; 234062306a36Sopenharmony_ci } 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci iov.iov_base = kmap_local_page(page); 234362306a36Sopenharmony_ci iov.iov_len = len; 234462306a36Sopenharmony_ci iov_iter_kvec(&iter, READ, &iov, 1, len); 234562306a36Sopenharmony_ci 234662306a36Sopenharmony_ci pos = orig_pos; 234762306a36Sopenharmony_ci ret = __ceph_sync_read(inode, &pos, &iter, &retry_op, &objver); 234862306a36Sopenharmony_ci if (ret < 0) 234962306a36Sopenharmony_ci goto out; 235062306a36Sopenharmony_ci 235162306a36Sopenharmony_ci /* Insert the header first */ 235262306a36Sopenharmony_ci header.ver = 1; 235362306a36Sopenharmony_ci header.compat = 1; 235462306a36Sopenharmony_ci header.change_attr = cpu_to_le64(inode_peek_iversion_raw(inode)); 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci /* 235762306a36Sopenharmony_ci * Always set the block_size to CEPH_FSCRYPT_BLOCK_SIZE, 235862306a36Sopenharmony_ci * because in MDS it may need this to do the truncate. 235962306a36Sopenharmony_ci */ 236062306a36Sopenharmony_ci header.block_size = cpu_to_le32(CEPH_FSCRYPT_BLOCK_SIZE); 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci /* 236362306a36Sopenharmony_ci * If we hit a hole here, we should just skip filling 236462306a36Sopenharmony_ci * the fscrypt for the request, because once the fscrypt 236562306a36Sopenharmony_ci * is enabled, the file will be split into many blocks 236662306a36Sopenharmony_ci * with the size of CEPH_FSCRYPT_BLOCK_SIZE, if there 236762306a36Sopenharmony_ci * has a hole, the hole size should be multiple of block 236862306a36Sopenharmony_ci * size. 236962306a36Sopenharmony_ci * 237062306a36Sopenharmony_ci * If the Rados object doesn't exist, it will be set to 0. 237162306a36Sopenharmony_ci */ 237262306a36Sopenharmony_ci if (!objver) { 237362306a36Sopenharmony_ci dout("%s hit hole, ppos %lld < size %lld\n", __func__, 237462306a36Sopenharmony_ci pos, i_size); 237562306a36Sopenharmony_ci 237662306a36Sopenharmony_ci header.data_len = cpu_to_le32(8 + 8 + 4); 237762306a36Sopenharmony_ci header.file_offset = 0; 237862306a36Sopenharmony_ci ret = 0; 237962306a36Sopenharmony_ci } else { 238062306a36Sopenharmony_ci header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE); 238162306a36Sopenharmony_ci header.file_offset = cpu_to_le64(orig_pos); 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci dout("%s encrypt block boff/bsize %d/%lu\n", __func__, 238462306a36Sopenharmony_ci boff, CEPH_FSCRYPT_BLOCK_SIZE); 238562306a36Sopenharmony_ci 238662306a36Sopenharmony_ci /* truncate and zero out the extra contents for the last block */ 238762306a36Sopenharmony_ci memset(iov.iov_base + boff, 0, PAGE_SIZE - boff); 238862306a36Sopenharmony_ci 238962306a36Sopenharmony_ci /* encrypt the last block */ 239062306a36Sopenharmony_ci ret = ceph_fscrypt_encrypt_block_inplace(inode, page, 239162306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE, 239262306a36Sopenharmony_ci 0, block, 239362306a36Sopenharmony_ci GFP_KERNEL); 239462306a36Sopenharmony_ci if (ret) 239562306a36Sopenharmony_ci goto out; 239662306a36Sopenharmony_ci } 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci /* Insert the header */ 239962306a36Sopenharmony_ci ret = ceph_pagelist_append(pagelist, &header, sizeof(header)); 240062306a36Sopenharmony_ci if (ret) 240162306a36Sopenharmony_ci goto out; 240262306a36Sopenharmony_ci 240362306a36Sopenharmony_ci if (header.block_size) { 240462306a36Sopenharmony_ci /* Append the last block contents to pagelist */ 240562306a36Sopenharmony_ci ret = ceph_pagelist_append(pagelist, iov.iov_base, 240662306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE); 240762306a36Sopenharmony_ci if (ret) 240862306a36Sopenharmony_ci goto out; 240962306a36Sopenharmony_ci } 241062306a36Sopenharmony_ci req->r_pagelist = pagelist; 241162306a36Sopenharmony_ciout: 241262306a36Sopenharmony_ci dout("%s %p size dropping cap refs on %s\n", __func__, 241362306a36Sopenharmony_ci inode, ceph_cap_string(got)); 241462306a36Sopenharmony_ci ceph_put_cap_refs(ci, got); 241562306a36Sopenharmony_ci if (iov.iov_base) 241662306a36Sopenharmony_ci kunmap_local(iov.iov_base); 241762306a36Sopenharmony_ci if (page) 241862306a36Sopenharmony_ci __free_pages(page, 0); 241962306a36Sopenharmony_ci if (ret && pagelist) 242062306a36Sopenharmony_ci ceph_pagelist_release(pagelist); 242162306a36Sopenharmony_ci return ret; 242262306a36Sopenharmony_ci} 242362306a36Sopenharmony_ci 242462306a36Sopenharmony_ciint __ceph_setattr(struct inode *inode, struct iattr *attr, 242562306a36Sopenharmony_ci struct ceph_iattr *cia) 242662306a36Sopenharmony_ci{ 242762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 242862306a36Sopenharmony_ci unsigned int ia_valid = attr->ia_valid; 242962306a36Sopenharmony_ci struct ceph_mds_request *req; 243062306a36Sopenharmony_ci struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; 243162306a36Sopenharmony_ci struct ceph_cap_flush *prealloc_cf; 243262306a36Sopenharmony_ci loff_t isize = i_size_read(inode); 243362306a36Sopenharmony_ci int issued; 243462306a36Sopenharmony_ci int release = 0, dirtied = 0; 243562306a36Sopenharmony_ci int mask = 0; 243662306a36Sopenharmony_ci int err = 0; 243762306a36Sopenharmony_ci int inode_dirty_flags = 0; 243862306a36Sopenharmony_ci bool lock_snap_rwsem = false; 243962306a36Sopenharmony_ci bool fill_fscrypt; 244062306a36Sopenharmony_ci int truncate_retry = 20; /* The RMW will take around 50ms */ 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ciretry: 244362306a36Sopenharmony_ci prealloc_cf = ceph_alloc_cap_flush(); 244462306a36Sopenharmony_ci if (!prealloc_cf) 244562306a36Sopenharmony_ci return -ENOMEM; 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETATTR, 244862306a36Sopenharmony_ci USE_AUTH_MDS); 244962306a36Sopenharmony_ci if (IS_ERR(req)) { 245062306a36Sopenharmony_ci ceph_free_cap_flush(prealloc_cf); 245162306a36Sopenharmony_ci return PTR_ERR(req); 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci fill_fscrypt = false; 245562306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 245662306a36Sopenharmony_ci issued = __ceph_caps_issued(ci, NULL); 245762306a36Sopenharmony_ci 245862306a36Sopenharmony_ci if (!ci->i_head_snapc && 245962306a36Sopenharmony_ci (issued & (CEPH_CAP_ANY_EXCL | CEPH_CAP_FILE_WR))) { 246062306a36Sopenharmony_ci lock_snap_rwsem = true; 246162306a36Sopenharmony_ci if (!down_read_trylock(&mdsc->snap_rwsem)) { 246262306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 246362306a36Sopenharmony_ci down_read(&mdsc->snap_rwsem); 246462306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 246562306a36Sopenharmony_ci issued = __ceph_caps_issued(ci, NULL); 246662306a36Sopenharmony_ci } 246762306a36Sopenharmony_ci } 246862306a36Sopenharmony_ci 246962306a36Sopenharmony_ci dout("setattr %p issued %s\n", inode, ceph_cap_string(issued)); 247062306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION) 247162306a36Sopenharmony_ci if (cia && cia->fscrypt_auth) { 247262306a36Sopenharmony_ci u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth); 247362306a36Sopenharmony_ci 247462306a36Sopenharmony_ci if (len > sizeof(*cia->fscrypt_auth)) { 247562306a36Sopenharmony_ci err = -EINVAL; 247662306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 247762306a36Sopenharmony_ci goto out; 247862306a36Sopenharmony_ci } 247962306a36Sopenharmony_ci 248062306a36Sopenharmony_ci dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n", 248162306a36Sopenharmony_ci ceph_vinop(inode), ci->fscrypt_auth_len, len); 248262306a36Sopenharmony_ci 248362306a36Sopenharmony_ci /* It should never be re-set once set */ 248462306a36Sopenharmony_ci WARN_ON_ONCE(ci->fscrypt_auth); 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci if (issued & CEPH_CAP_AUTH_EXCL) { 248762306a36Sopenharmony_ci dirtied |= CEPH_CAP_AUTH_EXCL; 248862306a36Sopenharmony_ci kfree(ci->fscrypt_auth); 248962306a36Sopenharmony_ci ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; 249062306a36Sopenharmony_ci ci->fscrypt_auth_len = len; 249162306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 249262306a36Sopenharmony_ci ci->fscrypt_auth_len != len || 249362306a36Sopenharmony_ci memcmp(ci->fscrypt_auth, cia->fscrypt_auth, len)) { 249462306a36Sopenharmony_ci req->r_fscrypt_auth = cia->fscrypt_auth; 249562306a36Sopenharmony_ci mask |= CEPH_SETATTR_FSCRYPT_AUTH; 249662306a36Sopenharmony_ci release |= CEPH_CAP_AUTH_SHARED; 249762306a36Sopenharmony_ci } 249862306a36Sopenharmony_ci cia->fscrypt_auth = NULL; 249962306a36Sopenharmony_ci } 250062306a36Sopenharmony_ci#else 250162306a36Sopenharmony_ci if (cia && cia->fscrypt_auth) { 250262306a36Sopenharmony_ci err = -EINVAL; 250362306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 250462306a36Sopenharmony_ci goto out; 250562306a36Sopenharmony_ci } 250662306a36Sopenharmony_ci#endif /* CONFIG_FS_ENCRYPTION */ 250762306a36Sopenharmony_ci 250862306a36Sopenharmony_ci if (ia_valid & ATTR_UID) { 250962306a36Sopenharmony_ci dout("setattr %p uid %d -> %d\n", inode, 251062306a36Sopenharmony_ci from_kuid(&init_user_ns, inode->i_uid), 251162306a36Sopenharmony_ci from_kuid(&init_user_ns, attr->ia_uid)); 251262306a36Sopenharmony_ci if (issued & CEPH_CAP_AUTH_EXCL) { 251362306a36Sopenharmony_ci inode->i_uid = attr->ia_uid; 251462306a36Sopenharmony_ci dirtied |= CEPH_CAP_AUTH_EXCL; 251562306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 251662306a36Sopenharmony_ci !uid_eq(attr->ia_uid, inode->i_uid)) { 251762306a36Sopenharmony_ci req->r_args.setattr.uid = cpu_to_le32( 251862306a36Sopenharmony_ci from_kuid(&init_user_ns, attr->ia_uid)); 251962306a36Sopenharmony_ci mask |= CEPH_SETATTR_UID; 252062306a36Sopenharmony_ci release |= CEPH_CAP_AUTH_SHARED; 252162306a36Sopenharmony_ci } 252262306a36Sopenharmony_ci } 252362306a36Sopenharmony_ci if (ia_valid & ATTR_GID) { 252462306a36Sopenharmony_ci dout("setattr %p gid %d -> %d\n", inode, 252562306a36Sopenharmony_ci from_kgid(&init_user_ns, inode->i_gid), 252662306a36Sopenharmony_ci from_kgid(&init_user_ns, attr->ia_gid)); 252762306a36Sopenharmony_ci if (issued & CEPH_CAP_AUTH_EXCL) { 252862306a36Sopenharmony_ci inode->i_gid = attr->ia_gid; 252962306a36Sopenharmony_ci dirtied |= CEPH_CAP_AUTH_EXCL; 253062306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 253162306a36Sopenharmony_ci !gid_eq(attr->ia_gid, inode->i_gid)) { 253262306a36Sopenharmony_ci req->r_args.setattr.gid = cpu_to_le32( 253362306a36Sopenharmony_ci from_kgid(&init_user_ns, attr->ia_gid)); 253462306a36Sopenharmony_ci mask |= CEPH_SETATTR_GID; 253562306a36Sopenharmony_ci release |= CEPH_CAP_AUTH_SHARED; 253662306a36Sopenharmony_ci } 253762306a36Sopenharmony_ci } 253862306a36Sopenharmony_ci if (ia_valid & ATTR_MODE) { 253962306a36Sopenharmony_ci dout("setattr %p mode 0%o -> 0%o\n", inode, inode->i_mode, 254062306a36Sopenharmony_ci attr->ia_mode); 254162306a36Sopenharmony_ci if (issued & CEPH_CAP_AUTH_EXCL) { 254262306a36Sopenharmony_ci inode->i_mode = attr->ia_mode; 254362306a36Sopenharmony_ci dirtied |= CEPH_CAP_AUTH_EXCL; 254462306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || 254562306a36Sopenharmony_ci attr->ia_mode != inode->i_mode) { 254662306a36Sopenharmony_ci inode->i_mode = attr->ia_mode; 254762306a36Sopenharmony_ci req->r_args.setattr.mode = cpu_to_le32(attr->ia_mode); 254862306a36Sopenharmony_ci mask |= CEPH_SETATTR_MODE; 254962306a36Sopenharmony_ci release |= CEPH_CAP_AUTH_SHARED; 255062306a36Sopenharmony_ci } 255162306a36Sopenharmony_ci } 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_ci if (ia_valid & ATTR_ATIME) { 255462306a36Sopenharmony_ci dout("setattr %p atime %lld.%ld -> %lld.%ld\n", inode, 255562306a36Sopenharmony_ci inode->i_atime.tv_sec, inode->i_atime.tv_nsec, 255662306a36Sopenharmony_ci attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); 255762306a36Sopenharmony_ci if (issued & CEPH_CAP_FILE_EXCL) { 255862306a36Sopenharmony_ci ci->i_time_warp_seq++; 255962306a36Sopenharmony_ci inode->i_atime = attr->ia_atime; 256062306a36Sopenharmony_ci dirtied |= CEPH_CAP_FILE_EXCL; 256162306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_FILE_WR) && 256262306a36Sopenharmony_ci timespec64_compare(&inode->i_atime, 256362306a36Sopenharmony_ci &attr->ia_atime) < 0) { 256462306a36Sopenharmony_ci inode->i_atime = attr->ia_atime; 256562306a36Sopenharmony_ci dirtied |= CEPH_CAP_FILE_WR; 256662306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || 256762306a36Sopenharmony_ci !timespec64_equal(&inode->i_atime, &attr->ia_atime)) { 256862306a36Sopenharmony_ci ceph_encode_timespec64(&req->r_args.setattr.atime, 256962306a36Sopenharmony_ci &attr->ia_atime); 257062306a36Sopenharmony_ci mask |= CEPH_SETATTR_ATIME; 257162306a36Sopenharmony_ci release |= CEPH_CAP_FILE_SHARED | 257262306a36Sopenharmony_ci CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; 257362306a36Sopenharmony_ci } 257462306a36Sopenharmony_ci } 257562306a36Sopenharmony_ci if (ia_valid & ATTR_SIZE) { 257662306a36Sopenharmony_ci dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size); 257762306a36Sopenharmony_ci /* 257862306a36Sopenharmony_ci * Only when the new size is smaller and not aligned to 257962306a36Sopenharmony_ci * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed. 258062306a36Sopenharmony_ci */ 258162306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && attr->ia_size < isize && 258262306a36Sopenharmony_ci (attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE)) { 258362306a36Sopenharmony_ci mask |= CEPH_SETATTR_SIZE; 258462306a36Sopenharmony_ci release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | 258562306a36Sopenharmony_ci CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; 258662306a36Sopenharmony_ci set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags); 258762306a36Sopenharmony_ci mask |= CEPH_SETATTR_FSCRYPT_FILE; 258862306a36Sopenharmony_ci req->r_args.setattr.size = 258962306a36Sopenharmony_ci cpu_to_le64(round_up(attr->ia_size, 259062306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE)); 259162306a36Sopenharmony_ci req->r_args.setattr.old_size = 259262306a36Sopenharmony_ci cpu_to_le64(round_up(isize, 259362306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE)); 259462306a36Sopenharmony_ci req->r_fscrypt_file = attr->ia_size; 259562306a36Sopenharmony_ci fill_fscrypt = true; 259662306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { 259762306a36Sopenharmony_ci if (attr->ia_size > isize) { 259862306a36Sopenharmony_ci i_size_write(inode, attr->ia_size); 259962306a36Sopenharmony_ci inode->i_blocks = calc_inode_blocks(attr->ia_size); 260062306a36Sopenharmony_ci ci->i_reported_size = attr->ia_size; 260162306a36Sopenharmony_ci dirtied |= CEPH_CAP_FILE_EXCL; 260262306a36Sopenharmony_ci ia_valid |= ATTR_MTIME; 260362306a36Sopenharmony_ci } 260462306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || 260562306a36Sopenharmony_ci attr->ia_size != isize) { 260662306a36Sopenharmony_ci mask |= CEPH_SETATTR_SIZE; 260762306a36Sopenharmony_ci release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL | 260862306a36Sopenharmony_ci CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; 260962306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && attr->ia_size) { 261062306a36Sopenharmony_ci set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags); 261162306a36Sopenharmony_ci mask |= CEPH_SETATTR_FSCRYPT_FILE; 261262306a36Sopenharmony_ci req->r_args.setattr.size = 261362306a36Sopenharmony_ci cpu_to_le64(round_up(attr->ia_size, 261462306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE)); 261562306a36Sopenharmony_ci req->r_args.setattr.old_size = 261662306a36Sopenharmony_ci cpu_to_le64(round_up(isize, 261762306a36Sopenharmony_ci CEPH_FSCRYPT_BLOCK_SIZE)); 261862306a36Sopenharmony_ci req->r_fscrypt_file = attr->ia_size; 261962306a36Sopenharmony_ci } else { 262062306a36Sopenharmony_ci req->r_args.setattr.size = cpu_to_le64(attr->ia_size); 262162306a36Sopenharmony_ci req->r_args.setattr.old_size = cpu_to_le64(isize); 262262306a36Sopenharmony_ci req->r_fscrypt_file = 0; 262362306a36Sopenharmony_ci } 262462306a36Sopenharmony_ci } 262562306a36Sopenharmony_ci } 262662306a36Sopenharmony_ci if (ia_valid & ATTR_MTIME) { 262762306a36Sopenharmony_ci dout("setattr %p mtime %lld.%ld -> %lld.%ld\n", inode, 262862306a36Sopenharmony_ci inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, 262962306a36Sopenharmony_ci attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); 263062306a36Sopenharmony_ci if (issued & CEPH_CAP_FILE_EXCL) { 263162306a36Sopenharmony_ci ci->i_time_warp_seq++; 263262306a36Sopenharmony_ci inode->i_mtime = attr->ia_mtime; 263362306a36Sopenharmony_ci dirtied |= CEPH_CAP_FILE_EXCL; 263462306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_FILE_WR) && 263562306a36Sopenharmony_ci timespec64_compare(&inode->i_mtime, 263662306a36Sopenharmony_ci &attr->ia_mtime) < 0) { 263762306a36Sopenharmony_ci inode->i_mtime = attr->ia_mtime; 263862306a36Sopenharmony_ci dirtied |= CEPH_CAP_FILE_WR; 263962306a36Sopenharmony_ci } else if ((issued & CEPH_CAP_FILE_SHARED) == 0 || 264062306a36Sopenharmony_ci !timespec64_equal(&inode->i_mtime, &attr->ia_mtime)) { 264162306a36Sopenharmony_ci ceph_encode_timespec64(&req->r_args.setattr.mtime, 264262306a36Sopenharmony_ci &attr->ia_mtime); 264362306a36Sopenharmony_ci mask |= CEPH_SETATTR_MTIME; 264462306a36Sopenharmony_ci release |= CEPH_CAP_FILE_SHARED | 264562306a36Sopenharmony_ci CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR; 264662306a36Sopenharmony_ci } 264762306a36Sopenharmony_ci } 264862306a36Sopenharmony_ci 264962306a36Sopenharmony_ci /* these do nothing */ 265062306a36Sopenharmony_ci if (ia_valid & ATTR_CTIME) { 265162306a36Sopenharmony_ci bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME| 265262306a36Sopenharmony_ci ATTR_MODE|ATTR_UID|ATTR_GID)) == 0; 265362306a36Sopenharmony_ci dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode, 265462306a36Sopenharmony_ci inode_get_ctime(inode).tv_sec, 265562306a36Sopenharmony_ci inode_get_ctime(inode).tv_nsec, 265662306a36Sopenharmony_ci attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec, 265762306a36Sopenharmony_ci only ? "ctime only" : "ignored"); 265862306a36Sopenharmony_ci if (only) { 265962306a36Sopenharmony_ci /* 266062306a36Sopenharmony_ci * if kernel wants to dirty ctime but nothing else, 266162306a36Sopenharmony_ci * we need to choose a cap to dirty under, or do 266262306a36Sopenharmony_ci * a almost-no-op setattr 266362306a36Sopenharmony_ci */ 266462306a36Sopenharmony_ci if (issued & CEPH_CAP_AUTH_EXCL) 266562306a36Sopenharmony_ci dirtied |= CEPH_CAP_AUTH_EXCL; 266662306a36Sopenharmony_ci else if (issued & CEPH_CAP_FILE_EXCL) 266762306a36Sopenharmony_ci dirtied |= CEPH_CAP_FILE_EXCL; 266862306a36Sopenharmony_ci else if (issued & CEPH_CAP_XATTR_EXCL) 266962306a36Sopenharmony_ci dirtied |= CEPH_CAP_XATTR_EXCL; 267062306a36Sopenharmony_ci else 267162306a36Sopenharmony_ci mask |= CEPH_SETATTR_CTIME; 267262306a36Sopenharmony_ci } 267362306a36Sopenharmony_ci } 267462306a36Sopenharmony_ci if (ia_valid & ATTR_FILE) 267562306a36Sopenharmony_ci dout("setattr %p ATTR_FILE ... hrm!\n", inode); 267662306a36Sopenharmony_ci 267762306a36Sopenharmony_ci if (dirtied) { 267862306a36Sopenharmony_ci inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied, 267962306a36Sopenharmony_ci &prealloc_cf); 268062306a36Sopenharmony_ci inode_set_ctime_to_ts(inode, attr->ia_ctime); 268162306a36Sopenharmony_ci inode_inc_iversion_raw(inode); 268262306a36Sopenharmony_ci } 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci release &= issued; 268562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 268662306a36Sopenharmony_ci if (lock_snap_rwsem) { 268762306a36Sopenharmony_ci up_read(&mdsc->snap_rwsem); 268862306a36Sopenharmony_ci lock_snap_rwsem = false; 268962306a36Sopenharmony_ci } 269062306a36Sopenharmony_ci 269162306a36Sopenharmony_ci if (inode_dirty_flags) 269262306a36Sopenharmony_ci __mark_inode_dirty(inode, inode_dirty_flags); 269362306a36Sopenharmony_ci 269462306a36Sopenharmony_ci if (mask) { 269562306a36Sopenharmony_ci req->r_inode = inode; 269662306a36Sopenharmony_ci ihold(inode); 269762306a36Sopenharmony_ci req->r_inode_drop = release; 269862306a36Sopenharmony_ci req->r_args.setattr.mask = cpu_to_le32(mask); 269962306a36Sopenharmony_ci req->r_num_caps = 1; 270062306a36Sopenharmony_ci req->r_stamp = attr->ia_ctime; 270162306a36Sopenharmony_ci if (fill_fscrypt) { 270262306a36Sopenharmony_ci err = fill_fscrypt_truncate(inode, req, attr); 270362306a36Sopenharmony_ci if (err) 270462306a36Sopenharmony_ci goto out; 270562306a36Sopenharmony_ci } 270662306a36Sopenharmony_ci 270762306a36Sopenharmony_ci /* 270862306a36Sopenharmony_ci * The truncate request will return -EAGAIN when the 270962306a36Sopenharmony_ci * last block has been updated just before the MDS 271062306a36Sopenharmony_ci * successfully gets the xlock for the FILE lock. To 271162306a36Sopenharmony_ci * avoid corrupting the file contents we need to retry 271262306a36Sopenharmony_ci * it. 271362306a36Sopenharmony_ci */ 271462306a36Sopenharmony_ci err = ceph_mdsc_do_request(mdsc, NULL, req); 271562306a36Sopenharmony_ci if (err == -EAGAIN && truncate_retry--) { 271662306a36Sopenharmony_ci dout("setattr %p result=%d (%s locally, %d remote), retry it!\n", 271762306a36Sopenharmony_ci inode, err, ceph_cap_string(dirtied), mask); 271862306a36Sopenharmony_ci ceph_mdsc_put_request(req); 271962306a36Sopenharmony_ci ceph_free_cap_flush(prealloc_cf); 272062306a36Sopenharmony_ci goto retry; 272162306a36Sopenharmony_ci } 272262306a36Sopenharmony_ci } 272362306a36Sopenharmony_ciout: 272462306a36Sopenharmony_ci dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, 272562306a36Sopenharmony_ci ceph_cap_string(dirtied), mask); 272662306a36Sopenharmony_ci 272762306a36Sopenharmony_ci ceph_mdsc_put_request(req); 272862306a36Sopenharmony_ci ceph_free_cap_flush(prealloc_cf); 272962306a36Sopenharmony_ci 273062306a36Sopenharmony_ci if (err >= 0 && (mask & CEPH_SETATTR_SIZE)) 273162306a36Sopenharmony_ci __ceph_do_pending_vmtruncate(inode); 273262306a36Sopenharmony_ci 273362306a36Sopenharmony_ci return err; 273462306a36Sopenharmony_ci} 273562306a36Sopenharmony_ci 273662306a36Sopenharmony_ci/* 273762306a36Sopenharmony_ci * setattr 273862306a36Sopenharmony_ci */ 273962306a36Sopenharmony_ciint ceph_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 274062306a36Sopenharmony_ci struct iattr *attr) 274162306a36Sopenharmony_ci{ 274262306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 274362306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_inode_to_client(inode); 274462306a36Sopenharmony_ci int err; 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci if (ceph_snap(inode) != CEPH_NOSNAP) 274762306a36Sopenharmony_ci return -EROFS; 274862306a36Sopenharmony_ci 274962306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) 275062306a36Sopenharmony_ci return -ESTALE; 275162306a36Sopenharmony_ci 275262306a36Sopenharmony_ci err = fscrypt_prepare_setattr(dentry, attr); 275362306a36Sopenharmony_ci if (err) 275462306a36Sopenharmony_ci return err; 275562306a36Sopenharmony_ci 275662306a36Sopenharmony_ci err = setattr_prepare(&nop_mnt_idmap, dentry, attr); 275762306a36Sopenharmony_ci if (err != 0) 275862306a36Sopenharmony_ci return err; 275962306a36Sopenharmony_ci 276062306a36Sopenharmony_ci if ((attr->ia_valid & ATTR_SIZE) && 276162306a36Sopenharmony_ci attr->ia_size > max(i_size_read(inode), fsc->max_file_size)) 276262306a36Sopenharmony_ci return -EFBIG; 276362306a36Sopenharmony_ci 276462306a36Sopenharmony_ci if ((attr->ia_valid & ATTR_SIZE) && 276562306a36Sopenharmony_ci ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size)) 276662306a36Sopenharmony_ci return -EDQUOT; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci err = __ceph_setattr(inode, attr, NULL); 276962306a36Sopenharmony_ci 277062306a36Sopenharmony_ci if (err >= 0 && (attr->ia_valid & ATTR_MODE)) 277162306a36Sopenharmony_ci err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode); 277262306a36Sopenharmony_ci 277362306a36Sopenharmony_ci return err; 277462306a36Sopenharmony_ci} 277562306a36Sopenharmony_ci 277662306a36Sopenharmony_ciint ceph_try_to_choose_auth_mds(struct inode *inode, int mask) 277762306a36Sopenharmony_ci{ 277862306a36Sopenharmony_ci int issued = ceph_caps_issued(ceph_inode(inode)); 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci /* 278162306a36Sopenharmony_ci * If any 'x' caps is issued we can just choose the auth MDS 278262306a36Sopenharmony_ci * instead of the random replica MDSes. Because only when the 278362306a36Sopenharmony_ci * Locker is in LOCK_EXEC state will the loner client could 278462306a36Sopenharmony_ci * get the 'x' caps. And if we send the getattr requests to 278562306a36Sopenharmony_ci * any replica MDS it must auth pin and tries to rdlock from 278662306a36Sopenharmony_ci * the auth MDS, and then the auth MDS need to do the Locker 278762306a36Sopenharmony_ci * state transition to LOCK_SYNC. And after that the lock state 278862306a36Sopenharmony_ci * will change back. 278962306a36Sopenharmony_ci * 279062306a36Sopenharmony_ci * This cost much when doing the Locker state transition and 279162306a36Sopenharmony_ci * usually will need to revoke caps from clients. 279262306a36Sopenharmony_ci * 279362306a36Sopenharmony_ci * And for the 'Xs' caps for getxattr we will also choose the 279462306a36Sopenharmony_ci * auth MDS, because the MDS side code is buggy due to setxattr 279562306a36Sopenharmony_ci * won't notify the replica MDSes when the values changed and 279662306a36Sopenharmony_ci * the replica MDS will return the old values. Though we will 279762306a36Sopenharmony_ci * fix it in MDS code, but this still makes sense for old ceph. 279862306a36Sopenharmony_ci */ 279962306a36Sopenharmony_ci if (((mask & CEPH_CAP_ANY_SHARED) && (issued & CEPH_CAP_ANY_EXCL)) 280062306a36Sopenharmony_ci || (mask & (CEPH_STAT_RSTAT | CEPH_STAT_CAP_XATTR))) 280162306a36Sopenharmony_ci return USE_AUTH_MDS; 280262306a36Sopenharmony_ci else 280362306a36Sopenharmony_ci return USE_ANY_MDS; 280462306a36Sopenharmony_ci} 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_ci/* 280762306a36Sopenharmony_ci * Verify that we have a lease on the given mask. If not, 280862306a36Sopenharmony_ci * do a getattr against an mds. 280962306a36Sopenharmony_ci */ 281062306a36Sopenharmony_ciint __ceph_do_getattr(struct inode *inode, struct page *locked_page, 281162306a36Sopenharmony_ci int mask, bool force) 281262306a36Sopenharmony_ci{ 281362306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 281462306a36Sopenharmony_ci struct ceph_mds_client *mdsc = fsc->mdsc; 281562306a36Sopenharmony_ci struct ceph_mds_request *req; 281662306a36Sopenharmony_ci int mode; 281762306a36Sopenharmony_ci int err; 281862306a36Sopenharmony_ci 281962306a36Sopenharmony_ci if (ceph_snap(inode) == CEPH_SNAPDIR) { 282062306a36Sopenharmony_ci dout("do_getattr inode %p SNAPDIR\n", inode); 282162306a36Sopenharmony_ci return 0; 282262306a36Sopenharmony_ci } 282362306a36Sopenharmony_ci 282462306a36Sopenharmony_ci dout("do_getattr inode %p mask %s mode 0%o\n", 282562306a36Sopenharmony_ci inode, ceph_cap_string(mask), inode->i_mode); 282662306a36Sopenharmony_ci if (!force && ceph_caps_issued_mask_metric(ceph_inode(inode), mask, 1)) 282762306a36Sopenharmony_ci return 0; 282862306a36Sopenharmony_ci 282962306a36Sopenharmony_ci mode = ceph_try_to_choose_auth_mds(inode, mask); 283062306a36Sopenharmony_ci req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode); 283162306a36Sopenharmony_ci if (IS_ERR(req)) 283262306a36Sopenharmony_ci return PTR_ERR(req); 283362306a36Sopenharmony_ci req->r_inode = inode; 283462306a36Sopenharmony_ci ihold(inode); 283562306a36Sopenharmony_ci req->r_num_caps = 1; 283662306a36Sopenharmony_ci req->r_args.getattr.mask = cpu_to_le32(mask); 283762306a36Sopenharmony_ci req->r_locked_page = locked_page; 283862306a36Sopenharmony_ci err = ceph_mdsc_do_request(mdsc, NULL, req); 283962306a36Sopenharmony_ci if (locked_page && err == 0) { 284062306a36Sopenharmony_ci u64 inline_version = req->r_reply_info.targeti.inline_version; 284162306a36Sopenharmony_ci if (inline_version == 0) { 284262306a36Sopenharmony_ci /* the reply is supposed to contain inline data */ 284362306a36Sopenharmony_ci err = -EINVAL; 284462306a36Sopenharmony_ci } else if (inline_version == CEPH_INLINE_NONE || 284562306a36Sopenharmony_ci inline_version == 1) { 284662306a36Sopenharmony_ci err = -ENODATA; 284762306a36Sopenharmony_ci } else { 284862306a36Sopenharmony_ci err = req->r_reply_info.targeti.inline_len; 284962306a36Sopenharmony_ci } 285062306a36Sopenharmony_ci } 285162306a36Sopenharmony_ci ceph_mdsc_put_request(req); 285262306a36Sopenharmony_ci dout("do_getattr result=%d\n", err); 285362306a36Sopenharmony_ci return err; 285462306a36Sopenharmony_ci} 285562306a36Sopenharmony_ci 285662306a36Sopenharmony_ciint ceph_do_getvxattr(struct inode *inode, const char *name, void *value, 285762306a36Sopenharmony_ci size_t size) 285862306a36Sopenharmony_ci{ 285962306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); 286062306a36Sopenharmony_ci struct ceph_mds_client *mdsc = fsc->mdsc; 286162306a36Sopenharmony_ci struct ceph_mds_request *req; 286262306a36Sopenharmony_ci int mode = USE_AUTH_MDS; 286362306a36Sopenharmony_ci int err; 286462306a36Sopenharmony_ci char *xattr_value; 286562306a36Sopenharmony_ci size_t xattr_value_len; 286662306a36Sopenharmony_ci 286762306a36Sopenharmony_ci req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETVXATTR, mode); 286862306a36Sopenharmony_ci if (IS_ERR(req)) { 286962306a36Sopenharmony_ci err = -ENOMEM; 287062306a36Sopenharmony_ci goto out; 287162306a36Sopenharmony_ci } 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci req->r_feature_needed = CEPHFS_FEATURE_OP_GETVXATTR; 287462306a36Sopenharmony_ci req->r_path2 = kstrdup(name, GFP_NOFS); 287562306a36Sopenharmony_ci if (!req->r_path2) { 287662306a36Sopenharmony_ci err = -ENOMEM; 287762306a36Sopenharmony_ci goto put; 287862306a36Sopenharmony_ci } 287962306a36Sopenharmony_ci 288062306a36Sopenharmony_ci ihold(inode); 288162306a36Sopenharmony_ci req->r_inode = inode; 288262306a36Sopenharmony_ci err = ceph_mdsc_do_request(mdsc, NULL, req); 288362306a36Sopenharmony_ci if (err < 0) 288462306a36Sopenharmony_ci goto put; 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_ci xattr_value = req->r_reply_info.xattr_info.xattr_value; 288762306a36Sopenharmony_ci xattr_value_len = req->r_reply_info.xattr_info.xattr_value_len; 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_ci dout("do_getvxattr xattr_value_len:%zu, size:%zu\n", xattr_value_len, size); 289062306a36Sopenharmony_ci 289162306a36Sopenharmony_ci err = (int)xattr_value_len; 289262306a36Sopenharmony_ci if (size == 0) 289362306a36Sopenharmony_ci goto put; 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci if (xattr_value_len > size) { 289662306a36Sopenharmony_ci err = -ERANGE; 289762306a36Sopenharmony_ci goto put; 289862306a36Sopenharmony_ci } 289962306a36Sopenharmony_ci 290062306a36Sopenharmony_ci memcpy(value, xattr_value, xattr_value_len); 290162306a36Sopenharmony_ciput: 290262306a36Sopenharmony_ci ceph_mdsc_put_request(req); 290362306a36Sopenharmony_ciout: 290462306a36Sopenharmony_ci dout("do_getvxattr result=%d\n", err); 290562306a36Sopenharmony_ci return err; 290662306a36Sopenharmony_ci} 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_ci 290962306a36Sopenharmony_ci/* 291062306a36Sopenharmony_ci * Check inode permissions. We verify we have a valid value for 291162306a36Sopenharmony_ci * the AUTH cap, then call the generic handler. 291262306a36Sopenharmony_ci */ 291362306a36Sopenharmony_ciint ceph_permission(struct mnt_idmap *idmap, struct inode *inode, 291462306a36Sopenharmony_ci int mask) 291562306a36Sopenharmony_ci{ 291662306a36Sopenharmony_ci int err; 291762306a36Sopenharmony_ci 291862306a36Sopenharmony_ci if (mask & MAY_NOT_BLOCK) 291962306a36Sopenharmony_ci return -ECHILD; 292062306a36Sopenharmony_ci 292162306a36Sopenharmony_ci err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false); 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_ci if (!err) 292462306a36Sopenharmony_ci err = generic_permission(&nop_mnt_idmap, inode, mask); 292562306a36Sopenharmony_ci return err; 292662306a36Sopenharmony_ci} 292762306a36Sopenharmony_ci 292862306a36Sopenharmony_ci/* Craft a mask of needed caps given a set of requested statx attrs. */ 292962306a36Sopenharmony_cistatic int statx_to_caps(u32 want, umode_t mode) 293062306a36Sopenharmony_ci{ 293162306a36Sopenharmony_ci int mask = 0; 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_ci if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME|STATX_CHANGE_COOKIE)) 293462306a36Sopenharmony_ci mask |= CEPH_CAP_AUTH_SHARED; 293562306a36Sopenharmony_ci 293662306a36Sopenharmony_ci if (want & (STATX_NLINK|STATX_CTIME|STATX_CHANGE_COOKIE)) { 293762306a36Sopenharmony_ci /* 293862306a36Sopenharmony_ci * The link count for directories depends on inode->i_subdirs, 293962306a36Sopenharmony_ci * and that is only updated when Fs caps are held. 294062306a36Sopenharmony_ci */ 294162306a36Sopenharmony_ci if (S_ISDIR(mode)) 294262306a36Sopenharmony_ci mask |= CEPH_CAP_FILE_SHARED; 294362306a36Sopenharmony_ci else 294462306a36Sopenharmony_ci mask |= CEPH_CAP_LINK_SHARED; 294562306a36Sopenharmony_ci } 294662306a36Sopenharmony_ci 294762306a36Sopenharmony_ci if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|STATX_BLOCKS|STATX_CHANGE_COOKIE)) 294862306a36Sopenharmony_ci mask |= CEPH_CAP_FILE_SHARED; 294962306a36Sopenharmony_ci 295062306a36Sopenharmony_ci if (want & (STATX_CTIME|STATX_CHANGE_COOKIE)) 295162306a36Sopenharmony_ci mask |= CEPH_CAP_XATTR_SHARED; 295262306a36Sopenharmony_ci 295362306a36Sopenharmony_ci return mask; 295462306a36Sopenharmony_ci} 295562306a36Sopenharmony_ci 295662306a36Sopenharmony_ci/* 295762306a36Sopenharmony_ci * Get all the attributes. If we have sufficient caps for the requested attrs, 295862306a36Sopenharmony_ci * then we can avoid talking to the MDS at all. 295962306a36Sopenharmony_ci */ 296062306a36Sopenharmony_ciint ceph_getattr(struct mnt_idmap *idmap, const struct path *path, 296162306a36Sopenharmony_ci struct kstat *stat, u32 request_mask, unsigned int flags) 296262306a36Sopenharmony_ci{ 296362306a36Sopenharmony_ci struct inode *inode = d_inode(path->dentry); 296462306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 296562306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 296662306a36Sopenharmony_ci u32 valid_mask = STATX_BASIC_STATS; 296762306a36Sopenharmony_ci int err = 0; 296862306a36Sopenharmony_ci 296962306a36Sopenharmony_ci if (ceph_inode_is_shutdown(inode)) 297062306a36Sopenharmony_ci return -ESTALE; 297162306a36Sopenharmony_ci 297262306a36Sopenharmony_ci /* Skip the getattr altogether if we're asked not to sync */ 297362306a36Sopenharmony_ci if ((flags & AT_STATX_SYNC_TYPE) != AT_STATX_DONT_SYNC) { 297462306a36Sopenharmony_ci err = ceph_do_getattr(inode, 297562306a36Sopenharmony_ci statx_to_caps(request_mask, inode->i_mode), 297662306a36Sopenharmony_ci flags & AT_STATX_FORCE_SYNC); 297762306a36Sopenharmony_ci if (err) 297862306a36Sopenharmony_ci return err; 297962306a36Sopenharmony_ci } 298062306a36Sopenharmony_ci 298162306a36Sopenharmony_ci generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); 298262306a36Sopenharmony_ci stat->ino = ceph_present_inode(inode); 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci /* 298562306a36Sopenharmony_ci * btime on newly-allocated inodes is 0, so if this is still set to 298662306a36Sopenharmony_ci * that, then assume that it's not valid. 298762306a36Sopenharmony_ci */ 298862306a36Sopenharmony_ci if (ci->i_btime.tv_sec || ci->i_btime.tv_nsec) { 298962306a36Sopenharmony_ci stat->btime = ci->i_btime; 299062306a36Sopenharmony_ci valid_mask |= STATX_BTIME; 299162306a36Sopenharmony_ci } 299262306a36Sopenharmony_ci 299362306a36Sopenharmony_ci if (request_mask & STATX_CHANGE_COOKIE) { 299462306a36Sopenharmony_ci stat->change_cookie = inode_peek_iversion_raw(inode); 299562306a36Sopenharmony_ci valid_mask |= STATX_CHANGE_COOKIE; 299662306a36Sopenharmony_ci } 299762306a36Sopenharmony_ci 299862306a36Sopenharmony_ci if (ceph_snap(inode) == CEPH_NOSNAP) 299962306a36Sopenharmony_ci stat->dev = sb->s_dev; 300062306a36Sopenharmony_ci else 300162306a36Sopenharmony_ci stat->dev = ci->i_snapid_map ? ci->i_snapid_map->dev : 0; 300262306a36Sopenharmony_ci 300362306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 300462306a36Sopenharmony_ci if (ceph_test_mount_opt(ceph_sb_to_client(sb), RBYTES)) { 300562306a36Sopenharmony_ci stat->size = ci->i_rbytes; 300662306a36Sopenharmony_ci } else if (ceph_snap(inode) == CEPH_SNAPDIR) { 300762306a36Sopenharmony_ci struct ceph_inode_info *pci; 300862306a36Sopenharmony_ci struct ceph_snap_realm *realm; 300962306a36Sopenharmony_ci struct inode *parent; 301062306a36Sopenharmony_ci 301162306a36Sopenharmony_ci parent = ceph_lookup_inode(sb, ceph_ino(inode)); 301262306a36Sopenharmony_ci if (IS_ERR(parent)) 301362306a36Sopenharmony_ci return PTR_ERR(parent); 301462306a36Sopenharmony_ci 301562306a36Sopenharmony_ci pci = ceph_inode(parent); 301662306a36Sopenharmony_ci spin_lock(&pci->i_ceph_lock); 301762306a36Sopenharmony_ci realm = pci->i_snap_realm; 301862306a36Sopenharmony_ci if (realm) 301962306a36Sopenharmony_ci stat->size = realm->num_snaps; 302062306a36Sopenharmony_ci else 302162306a36Sopenharmony_ci stat->size = 0; 302262306a36Sopenharmony_ci spin_unlock(&pci->i_ceph_lock); 302362306a36Sopenharmony_ci iput(parent); 302462306a36Sopenharmony_ci } else { 302562306a36Sopenharmony_ci stat->size = ci->i_files + ci->i_subdirs; 302662306a36Sopenharmony_ci } 302762306a36Sopenharmony_ci stat->blocks = 0; 302862306a36Sopenharmony_ci stat->blksize = 65536; 302962306a36Sopenharmony_ci /* 303062306a36Sopenharmony_ci * Some applications rely on the number of st_nlink 303162306a36Sopenharmony_ci * value on directories to be either 0 (if unlinked) 303262306a36Sopenharmony_ci * or 2 + number of subdirectories. 303362306a36Sopenharmony_ci */ 303462306a36Sopenharmony_ci if (stat->nlink == 1) 303562306a36Sopenharmony_ci /* '.' + '..' + subdirs */ 303662306a36Sopenharmony_ci stat->nlink = 1 + 1 + ci->i_subdirs; 303762306a36Sopenharmony_ci } 303862306a36Sopenharmony_ci 303962306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC; 304062306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) 304162306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_ENCRYPTED; 304262306a36Sopenharmony_ci stat->attributes_mask |= (STATX_ATTR_CHANGE_MONOTONIC | 304362306a36Sopenharmony_ci STATX_ATTR_ENCRYPTED); 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci stat->result_mask = request_mask & valid_mask; 304662306a36Sopenharmony_ci return err; 304762306a36Sopenharmony_ci} 304862306a36Sopenharmony_ci 304962306a36Sopenharmony_civoid ceph_inode_shutdown(struct inode *inode) 305062306a36Sopenharmony_ci{ 305162306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 305262306a36Sopenharmony_ci struct rb_node *p; 305362306a36Sopenharmony_ci int iputs = 0; 305462306a36Sopenharmony_ci bool invalidate = false; 305562306a36Sopenharmony_ci 305662306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 305762306a36Sopenharmony_ci ci->i_ceph_flags |= CEPH_I_SHUTDOWN; 305862306a36Sopenharmony_ci p = rb_first(&ci->i_caps); 305962306a36Sopenharmony_ci while (p) { 306062306a36Sopenharmony_ci struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); 306162306a36Sopenharmony_ci 306262306a36Sopenharmony_ci p = rb_next(p); 306362306a36Sopenharmony_ci iputs += ceph_purge_inode_cap(inode, cap, &invalidate); 306462306a36Sopenharmony_ci } 306562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 306662306a36Sopenharmony_ci 306762306a36Sopenharmony_ci if (invalidate) 306862306a36Sopenharmony_ci ceph_queue_invalidate(inode); 306962306a36Sopenharmony_ci while (iputs--) 307062306a36Sopenharmony_ci iput(inode); 307162306a36Sopenharmony_ci} 3072