162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/fs.h> 562306a36Sopenharmony_ci#include <linux/wait.h> 662306a36Sopenharmony_ci#include <linux/slab.h> 762306a36Sopenharmony_ci#include <linux/gfp.h> 862306a36Sopenharmony_ci#include <linux/sched.h> 962306a36Sopenharmony_ci#include <linux/debugfs.h> 1062306a36Sopenharmony_ci#include <linux/seq_file.h> 1162306a36Sopenharmony_ci#include <linux/ratelimit.h> 1262306a36Sopenharmony_ci#include <linux/bits.h> 1362306a36Sopenharmony_ci#include <linux/ktime.h> 1462306a36Sopenharmony_ci#include <linux/bitmap.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ci#include "super.h" 1762306a36Sopenharmony_ci#include "mds_client.h" 1862306a36Sopenharmony_ci#include "crypto.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci#include <linux/ceph/ceph_features.h> 2162306a36Sopenharmony_ci#include <linux/ceph/messenger.h> 2262306a36Sopenharmony_ci#include <linux/ceph/decode.h> 2362306a36Sopenharmony_ci#include <linux/ceph/pagelist.h> 2462306a36Sopenharmony_ci#include <linux/ceph/auth.h> 2562306a36Sopenharmony_ci#include <linux/ceph/debugfs.h> 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci#define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE) 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci/* 3062306a36Sopenharmony_ci * A cluster of MDS (metadata server) daemons is responsible for 3162306a36Sopenharmony_ci * managing the file system namespace (the directory hierarchy and 3262306a36Sopenharmony_ci * inodes) and for coordinating shared access to storage. Metadata is 3362306a36Sopenharmony_ci * partitioning hierarchically across a number of servers, and that 3462306a36Sopenharmony_ci * partition varies over time as the cluster adjusts the distribution 3562306a36Sopenharmony_ci * in order to balance load. 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * The MDS client is primarily responsible to managing synchronous 3862306a36Sopenharmony_ci * metadata requests for operations like open, unlink, and so forth. 3962306a36Sopenharmony_ci * If there is a MDS failure, we find out about it when we (possibly 4062306a36Sopenharmony_ci * request and) receive a new MDS map, and can resubmit affected 4162306a36Sopenharmony_ci * requests. 4262306a36Sopenharmony_ci * 4362306a36Sopenharmony_ci * For the most part, though, we take advantage of a lossless 4462306a36Sopenharmony_ci * communications channel to the MDS, and do not need to worry about 4562306a36Sopenharmony_ci * timing out or resubmitting requests. 4662306a36Sopenharmony_ci * 4762306a36Sopenharmony_ci * We maintain a stateful "session" with each MDS we interact with. 4862306a36Sopenharmony_ci * Within each session, we sent periodic heartbeat messages to ensure 4962306a36Sopenharmony_ci * any capabilities or leases we have been issues remain valid. If 5062306a36Sopenharmony_ci * the session times out and goes stale, our leases and capabilities 5162306a36Sopenharmony_ci * are no longer valid. 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistruct ceph_reconnect_state { 5562306a36Sopenharmony_ci struct ceph_mds_session *session; 5662306a36Sopenharmony_ci int nr_caps, nr_realms; 5762306a36Sopenharmony_ci struct ceph_pagelist *pagelist; 5862306a36Sopenharmony_ci unsigned msg_version; 5962306a36Sopenharmony_ci bool allow_multi; 6062306a36Sopenharmony_ci}; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_cistatic void __wake_requests(struct ceph_mds_client *mdsc, 6362306a36Sopenharmony_ci struct list_head *head); 6462306a36Sopenharmony_cistatic void ceph_cap_release_work(struct work_struct *work); 6562306a36Sopenharmony_cistatic void ceph_cap_reclaim_work(struct work_struct *work); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic const struct ceph_connection_operations mds_con_ops; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci/* 7162306a36Sopenharmony_ci * mds reply parsing 7262306a36Sopenharmony_ci */ 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistatic int parse_reply_info_quota(void **p, void *end, 7562306a36Sopenharmony_ci struct ceph_mds_reply_info_in *info) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci u8 struct_v, struct_compat; 7862306a36Sopenharmony_ci u32 struct_len; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 8162306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 8262306a36Sopenharmony_ci /* struct_v is expected to be >= 1. we only 8362306a36Sopenharmony_ci * understand encoding with struct_compat == 1. */ 8462306a36Sopenharmony_ci if (!struct_v || struct_compat != 1) 8562306a36Sopenharmony_ci goto bad; 8662306a36Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 8762306a36Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 8862306a36Sopenharmony_ci end = *p + struct_len; 8962306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->max_bytes, bad); 9062306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->max_files, bad); 9162306a36Sopenharmony_ci *p = end; 9262306a36Sopenharmony_ci return 0; 9362306a36Sopenharmony_cibad: 9462306a36Sopenharmony_ci return -EIO; 9562306a36Sopenharmony_ci} 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci/* 9862306a36Sopenharmony_ci * parse individual inode info 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_cistatic int parse_reply_info_in(void **p, void *end, 10162306a36Sopenharmony_ci struct ceph_mds_reply_info_in *info, 10262306a36Sopenharmony_ci u64 features) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci int err = 0; 10562306a36Sopenharmony_ci u8 struct_v = 0; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci if (features == (u64)-1) { 10862306a36Sopenharmony_ci u32 struct_len; 10962306a36Sopenharmony_ci u8 struct_compat; 11062306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 11162306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 11262306a36Sopenharmony_ci /* struct_v is expected to be >= 1. we only understand 11362306a36Sopenharmony_ci * encoding with struct_compat == 1. */ 11462306a36Sopenharmony_ci if (!struct_v || struct_compat != 1) 11562306a36Sopenharmony_ci goto bad; 11662306a36Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 11762306a36Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 11862306a36Sopenharmony_ci end = *p + struct_len; 11962306a36Sopenharmony_ci } 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci ceph_decode_need(p, end, sizeof(struct ceph_mds_reply_inode), bad); 12262306a36Sopenharmony_ci info->in = *p; 12362306a36Sopenharmony_ci *p += sizeof(struct ceph_mds_reply_inode) + 12462306a36Sopenharmony_ci sizeof(*info->in->fragtree.splits) * 12562306a36Sopenharmony_ci le32_to_cpu(info->in->fragtree.nsplits); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->symlink_len, bad); 12862306a36Sopenharmony_ci ceph_decode_need(p, end, info->symlink_len, bad); 12962306a36Sopenharmony_ci info->symlink = *p; 13062306a36Sopenharmony_ci *p += info->symlink_len; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci ceph_decode_copy_safe(p, end, &info->dir_layout, 13362306a36Sopenharmony_ci sizeof(info->dir_layout), bad); 13462306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->xattr_len, bad); 13562306a36Sopenharmony_ci ceph_decode_need(p, end, info->xattr_len, bad); 13662306a36Sopenharmony_ci info->xattr_data = *p; 13762306a36Sopenharmony_ci *p += info->xattr_len; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci if (features == (u64)-1) { 14062306a36Sopenharmony_ci /* inline data */ 14162306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->inline_version, bad); 14262306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->inline_len, bad); 14362306a36Sopenharmony_ci ceph_decode_need(p, end, info->inline_len, bad); 14462306a36Sopenharmony_ci info->inline_data = *p; 14562306a36Sopenharmony_ci *p += info->inline_len; 14662306a36Sopenharmony_ci /* quota */ 14762306a36Sopenharmony_ci err = parse_reply_info_quota(p, end, info); 14862306a36Sopenharmony_ci if (err < 0) 14962306a36Sopenharmony_ci goto out_bad; 15062306a36Sopenharmony_ci /* pool namespace */ 15162306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->pool_ns_len, bad); 15262306a36Sopenharmony_ci if (info->pool_ns_len > 0) { 15362306a36Sopenharmony_ci ceph_decode_need(p, end, info->pool_ns_len, bad); 15462306a36Sopenharmony_ci info->pool_ns_data = *p; 15562306a36Sopenharmony_ci *p += info->pool_ns_len; 15662306a36Sopenharmony_ci } 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci /* btime */ 15962306a36Sopenharmony_ci ceph_decode_need(p, end, sizeof(info->btime), bad); 16062306a36Sopenharmony_ci ceph_decode_copy(p, &info->btime, sizeof(info->btime)); 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci /* change attribute */ 16362306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->change_attr, bad); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci /* dir pin */ 16662306a36Sopenharmony_ci if (struct_v >= 2) { 16762306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->dir_pin, bad); 16862306a36Sopenharmony_ci } else { 16962306a36Sopenharmony_ci info->dir_pin = -ENODATA; 17062306a36Sopenharmony_ci } 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci /* snapshot birth time, remains zero for v<=2 */ 17362306a36Sopenharmony_ci if (struct_v >= 3) { 17462306a36Sopenharmony_ci ceph_decode_need(p, end, sizeof(info->snap_btime), bad); 17562306a36Sopenharmony_ci ceph_decode_copy(p, &info->snap_btime, 17662306a36Sopenharmony_ci sizeof(info->snap_btime)); 17762306a36Sopenharmony_ci } else { 17862306a36Sopenharmony_ci memset(&info->snap_btime, 0, sizeof(info->snap_btime)); 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci /* snapshot count, remains zero for v<=3 */ 18262306a36Sopenharmony_ci if (struct_v >= 4) { 18362306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->rsnaps, bad); 18462306a36Sopenharmony_ci } else { 18562306a36Sopenharmony_ci info->rsnaps = 0; 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci if (struct_v >= 5) { 18962306a36Sopenharmony_ci u32 alen; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci ceph_decode_32_safe(p, end, alen, bad); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci while (alen--) { 19462306a36Sopenharmony_ci u32 len; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci /* key */ 19762306a36Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 19862306a36Sopenharmony_ci ceph_decode_skip_n(p, end, len, bad); 19962306a36Sopenharmony_ci /* value */ 20062306a36Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 20162306a36Sopenharmony_ci ceph_decode_skip_n(p, end, len, bad); 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* fscrypt flag -- ignore */ 20662306a36Sopenharmony_ci if (struct_v >= 6) 20762306a36Sopenharmony_ci ceph_decode_skip_8(p, end, bad); 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci info->fscrypt_auth = NULL; 21062306a36Sopenharmony_ci info->fscrypt_auth_len = 0; 21162306a36Sopenharmony_ci info->fscrypt_file = NULL; 21262306a36Sopenharmony_ci info->fscrypt_file_len = 0; 21362306a36Sopenharmony_ci if (struct_v >= 7) { 21462306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->fscrypt_auth_len, bad); 21562306a36Sopenharmony_ci if (info->fscrypt_auth_len) { 21662306a36Sopenharmony_ci info->fscrypt_auth = kmalloc(info->fscrypt_auth_len, 21762306a36Sopenharmony_ci GFP_KERNEL); 21862306a36Sopenharmony_ci if (!info->fscrypt_auth) 21962306a36Sopenharmony_ci return -ENOMEM; 22062306a36Sopenharmony_ci ceph_decode_copy_safe(p, end, info->fscrypt_auth, 22162306a36Sopenharmony_ci info->fscrypt_auth_len, bad); 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->fscrypt_file_len, bad); 22462306a36Sopenharmony_ci if (info->fscrypt_file_len) { 22562306a36Sopenharmony_ci info->fscrypt_file = kmalloc(info->fscrypt_file_len, 22662306a36Sopenharmony_ci GFP_KERNEL); 22762306a36Sopenharmony_ci if (!info->fscrypt_file) 22862306a36Sopenharmony_ci return -ENOMEM; 22962306a36Sopenharmony_ci ceph_decode_copy_safe(p, end, info->fscrypt_file, 23062306a36Sopenharmony_ci info->fscrypt_file_len, bad); 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci *p = end; 23462306a36Sopenharmony_ci } else { 23562306a36Sopenharmony_ci /* legacy (unversioned) struct */ 23662306a36Sopenharmony_ci if (features & CEPH_FEATURE_MDS_INLINE_DATA) { 23762306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->inline_version, bad); 23862306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->inline_len, bad); 23962306a36Sopenharmony_ci ceph_decode_need(p, end, info->inline_len, bad); 24062306a36Sopenharmony_ci info->inline_data = *p; 24162306a36Sopenharmony_ci *p += info->inline_len; 24262306a36Sopenharmony_ci } else 24362306a36Sopenharmony_ci info->inline_version = CEPH_INLINE_NONE; 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci if (features & CEPH_FEATURE_MDS_QUOTA) { 24662306a36Sopenharmony_ci err = parse_reply_info_quota(p, end, info); 24762306a36Sopenharmony_ci if (err < 0) 24862306a36Sopenharmony_ci goto out_bad; 24962306a36Sopenharmony_ci } else { 25062306a36Sopenharmony_ci info->max_bytes = 0; 25162306a36Sopenharmony_ci info->max_files = 0; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci info->pool_ns_len = 0; 25562306a36Sopenharmony_ci info->pool_ns_data = NULL; 25662306a36Sopenharmony_ci if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { 25762306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->pool_ns_len, bad); 25862306a36Sopenharmony_ci if (info->pool_ns_len > 0) { 25962306a36Sopenharmony_ci ceph_decode_need(p, end, info->pool_ns_len, bad); 26062306a36Sopenharmony_ci info->pool_ns_data = *p; 26162306a36Sopenharmony_ci *p += info->pool_ns_len; 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci if (features & CEPH_FEATURE_FS_BTIME) { 26662306a36Sopenharmony_ci ceph_decode_need(p, end, sizeof(info->btime), bad); 26762306a36Sopenharmony_ci ceph_decode_copy(p, &info->btime, sizeof(info->btime)); 26862306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->change_attr, bad); 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci info->dir_pin = -ENODATA; 27262306a36Sopenharmony_ci /* info->snap_btime and info->rsnaps remain zero */ 27362306a36Sopenharmony_ci } 27462306a36Sopenharmony_ci return 0; 27562306a36Sopenharmony_cibad: 27662306a36Sopenharmony_ci err = -EIO; 27762306a36Sopenharmony_ciout_bad: 27862306a36Sopenharmony_ci return err; 27962306a36Sopenharmony_ci} 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic int parse_reply_info_dir(void **p, void *end, 28262306a36Sopenharmony_ci struct ceph_mds_reply_dirfrag **dirfrag, 28362306a36Sopenharmony_ci u64 features) 28462306a36Sopenharmony_ci{ 28562306a36Sopenharmony_ci if (features == (u64)-1) { 28662306a36Sopenharmony_ci u8 struct_v, struct_compat; 28762306a36Sopenharmony_ci u32 struct_len; 28862306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 28962306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 29062306a36Sopenharmony_ci /* struct_v is expected to be >= 1. we only understand 29162306a36Sopenharmony_ci * encoding whose struct_compat == 1. */ 29262306a36Sopenharmony_ci if (!struct_v || struct_compat != 1) 29362306a36Sopenharmony_ci goto bad; 29462306a36Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 29562306a36Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 29662306a36Sopenharmony_ci end = *p + struct_len; 29762306a36Sopenharmony_ci } 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci ceph_decode_need(p, end, sizeof(**dirfrag), bad); 30062306a36Sopenharmony_ci *dirfrag = *p; 30162306a36Sopenharmony_ci *p += sizeof(**dirfrag) + sizeof(u32) * le32_to_cpu((*dirfrag)->ndist); 30262306a36Sopenharmony_ci if (unlikely(*p > end)) 30362306a36Sopenharmony_ci goto bad; 30462306a36Sopenharmony_ci if (features == (u64)-1) 30562306a36Sopenharmony_ci *p = end; 30662306a36Sopenharmony_ci return 0; 30762306a36Sopenharmony_cibad: 30862306a36Sopenharmony_ci return -EIO; 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_cistatic int parse_reply_info_lease(void **p, void *end, 31262306a36Sopenharmony_ci struct ceph_mds_reply_lease **lease, 31362306a36Sopenharmony_ci u64 features, u32 *altname_len, u8 **altname) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci u8 struct_v; 31662306a36Sopenharmony_ci u32 struct_len; 31762306a36Sopenharmony_ci void *lend; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci if (features == (u64)-1) { 32062306a36Sopenharmony_ci u8 struct_compat; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 32362306a36Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci /* struct_v is expected to be >= 1. we only understand 32662306a36Sopenharmony_ci * encoding whose struct_compat == 1. */ 32762306a36Sopenharmony_ci if (!struct_v || struct_compat != 1) 32862306a36Sopenharmony_ci goto bad; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 33162306a36Sopenharmony_ci } else { 33262306a36Sopenharmony_ci struct_len = sizeof(**lease); 33362306a36Sopenharmony_ci *altname_len = 0; 33462306a36Sopenharmony_ci *altname = NULL; 33562306a36Sopenharmony_ci } 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci lend = *p + struct_len; 33862306a36Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 33962306a36Sopenharmony_ci *lease = *p; 34062306a36Sopenharmony_ci *p += sizeof(**lease); 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci if (features == (u64)-1) { 34362306a36Sopenharmony_ci if (struct_v >= 2) { 34462306a36Sopenharmony_ci ceph_decode_32_safe(p, end, *altname_len, bad); 34562306a36Sopenharmony_ci ceph_decode_need(p, end, *altname_len, bad); 34662306a36Sopenharmony_ci *altname = *p; 34762306a36Sopenharmony_ci *p += *altname_len; 34862306a36Sopenharmony_ci } else { 34962306a36Sopenharmony_ci *altname = NULL; 35062306a36Sopenharmony_ci *altname_len = 0; 35162306a36Sopenharmony_ci } 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci *p = lend; 35462306a36Sopenharmony_ci return 0; 35562306a36Sopenharmony_cibad: 35662306a36Sopenharmony_ci return -EIO; 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_ci/* 36062306a36Sopenharmony_ci * parse a normal reply, which may contain a (dir+)dentry and/or a 36162306a36Sopenharmony_ci * target inode. 36262306a36Sopenharmony_ci */ 36362306a36Sopenharmony_cistatic int parse_reply_info_trace(void **p, void *end, 36462306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 36562306a36Sopenharmony_ci u64 features) 36662306a36Sopenharmony_ci{ 36762306a36Sopenharmony_ci int err; 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci if (info->head->is_dentry) { 37062306a36Sopenharmony_ci err = parse_reply_info_in(p, end, &info->diri, features); 37162306a36Sopenharmony_ci if (err < 0) 37262306a36Sopenharmony_ci goto out_bad; 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci err = parse_reply_info_dir(p, end, &info->dirfrag, features); 37562306a36Sopenharmony_ci if (err < 0) 37662306a36Sopenharmony_ci goto out_bad; 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci ceph_decode_32_safe(p, end, info->dname_len, bad); 37962306a36Sopenharmony_ci ceph_decode_need(p, end, info->dname_len, bad); 38062306a36Sopenharmony_ci info->dname = *p; 38162306a36Sopenharmony_ci *p += info->dname_len; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci err = parse_reply_info_lease(p, end, &info->dlease, features, 38462306a36Sopenharmony_ci &info->altname_len, &info->altname); 38562306a36Sopenharmony_ci if (err < 0) 38662306a36Sopenharmony_ci goto out_bad; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci if (info->head->is_target) { 39062306a36Sopenharmony_ci err = parse_reply_info_in(p, end, &info->targeti, features); 39162306a36Sopenharmony_ci if (err < 0) 39262306a36Sopenharmony_ci goto out_bad; 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci if (unlikely(*p != end)) 39662306a36Sopenharmony_ci goto bad; 39762306a36Sopenharmony_ci return 0; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_cibad: 40062306a36Sopenharmony_ci err = -EIO; 40162306a36Sopenharmony_ciout_bad: 40262306a36Sopenharmony_ci pr_err("problem parsing mds trace %d\n", err); 40362306a36Sopenharmony_ci return err; 40462306a36Sopenharmony_ci} 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci/* 40762306a36Sopenharmony_ci * parse readdir results 40862306a36Sopenharmony_ci */ 40962306a36Sopenharmony_cistatic int parse_reply_info_readdir(void **p, void *end, 41062306a36Sopenharmony_ci struct ceph_mds_request *req, 41162306a36Sopenharmony_ci u64 features) 41262306a36Sopenharmony_ci{ 41362306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; 41462306a36Sopenharmony_ci u32 num, i = 0; 41562306a36Sopenharmony_ci int err; 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci err = parse_reply_info_dir(p, end, &info->dir_dir, features); 41862306a36Sopenharmony_ci if (err < 0) 41962306a36Sopenharmony_ci goto out_bad; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci ceph_decode_need(p, end, sizeof(num) + 2, bad); 42262306a36Sopenharmony_ci num = ceph_decode_32(p); 42362306a36Sopenharmony_ci { 42462306a36Sopenharmony_ci u16 flags = ceph_decode_16(p); 42562306a36Sopenharmony_ci info->dir_end = !!(flags & CEPH_READDIR_FRAG_END); 42662306a36Sopenharmony_ci info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE); 42762306a36Sopenharmony_ci info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER); 42862306a36Sopenharmony_ci info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH); 42962306a36Sopenharmony_ci } 43062306a36Sopenharmony_ci if (num == 0) 43162306a36Sopenharmony_ci goto done; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci BUG_ON(!info->dir_entries); 43462306a36Sopenharmony_ci if ((unsigned long)(info->dir_entries + num) > 43562306a36Sopenharmony_ci (unsigned long)info->dir_entries + info->dir_buf_size) { 43662306a36Sopenharmony_ci pr_err("dir contents are larger than expected\n"); 43762306a36Sopenharmony_ci WARN_ON(1); 43862306a36Sopenharmony_ci goto bad; 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci info->dir_nr = num; 44262306a36Sopenharmony_ci while (num) { 44362306a36Sopenharmony_ci struct inode *inode = d_inode(req->r_dentry); 44462306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 44562306a36Sopenharmony_ci struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i; 44662306a36Sopenharmony_ci struct fscrypt_str tname = FSTR_INIT(NULL, 0); 44762306a36Sopenharmony_ci struct fscrypt_str oname = FSTR_INIT(NULL, 0); 44862306a36Sopenharmony_ci struct ceph_fname fname; 44962306a36Sopenharmony_ci u32 altname_len, _name_len; 45062306a36Sopenharmony_ci u8 *altname, *_name; 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci /* dentry */ 45362306a36Sopenharmony_ci ceph_decode_32_safe(p, end, _name_len, bad); 45462306a36Sopenharmony_ci ceph_decode_need(p, end, _name_len, bad); 45562306a36Sopenharmony_ci _name = *p; 45662306a36Sopenharmony_ci *p += _name_len; 45762306a36Sopenharmony_ci dout("parsed dir dname '%.*s'\n", _name_len, _name); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci if (info->hash_order) 46062306a36Sopenharmony_ci rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash, 46162306a36Sopenharmony_ci _name, _name_len); 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* dentry lease */ 46462306a36Sopenharmony_ci err = parse_reply_info_lease(p, end, &rde->lease, features, 46562306a36Sopenharmony_ci &altname_len, &altname); 46662306a36Sopenharmony_ci if (err) 46762306a36Sopenharmony_ci goto out_bad; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci /* 47062306a36Sopenharmony_ci * Try to dencrypt the dentry names and update them 47162306a36Sopenharmony_ci * in the ceph_mds_reply_dir_entry struct. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_ci fname.dir = inode; 47462306a36Sopenharmony_ci fname.name = _name; 47562306a36Sopenharmony_ci fname.name_len = _name_len; 47662306a36Sopenharmony_ci fname.ctext = altname; 47762306a36Sopenharmony_ci fname.ctext_len = altname_len; 47862306a36Sopenharmony_ci /* 47962306a36Sopenharmony_ci * The _name_len maybe larger than altname_len, such as 48062306a36Sopenharmony_ci * when the human readable name length is in range of 48162306a36Sopenharmony_ci * (CEPH_NOHASH_NAME_MAX, CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE), 48262306a36Sopenharmony_ci * then the copy in ceph_fname_to_usr will corrupt the 48362306a36Sopenharmony_ci * data if there has no encryption key. 48462306a36Sopenharmony_ci * 48562306a36Sopenharmony_ci * Just set the no_copy flag and then if there has no 48662306a36Sopenharmony_ci * encryption key the oname.name will be assigned to 48762306a36Sopenharmony_ci * _name always. 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci fname.no_copy = true; 49062306a36Sopenharmony_ci if (altname_len == 0) { 49162306a36Sopenharmony_ci /* 49262306a36Sopenharmony_ci * Set tname to _name, and this will be used 49362306a36Sopenharmony_ci * to do the base64_decode in-place. It's 49462306a36Sopenharmony_ci * safe because the decoded string should 49562306a36Sopenharmony_ci * always be shorter, which is 3/4 of origin 49662306a36Sopenharmony_ci * string. 49762306a36Sopenharmony_ci */ 49862306a36Sopenharmony_ci tname.name = _name; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci /* 50162306a36Sopenharmony_ci * Set oname to _name too, and this will be 50262306a36Sopenharmony_ci * used to do the dencryption in-place. 50362306a36Sopenharmony_ci */ 50462306a36Sopenharmony_ci oname.name = _name; 50562306a36Sopenharmony_ci oname.len = _name_len; 50662306a36Sopenharmony_ci } else { 50762306a36Sopenharmony_ci /* 50862306a36Sopenharmony_ci * This will do the decryption only in-place 50962306a36Sopenharmony_ci * from altname cryptext directly. 51062306a36Sopenharmony_ci */ 51162306a36Sopenharmony_ci oname.name = altname; 51262306a36Sopenharmony_ci oname.len = altname_len; 51362306a36Sopenharmony_ci } 51462306a36Sopenharmony_ci rde->is_nokey = false; 51562306a36Sopenharmony_ci err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey); 51662306a36Sopenharmony_ci if (err) { 51762306a36Sopenharmony_ci pr_err("%s unable to decode %.*s, got %d\n", __func__, 51862306a36Sopenharmony_ci _name_len, _name, err); 51962306a36Sopenharmony_ci goto out_bad; 52062306a36Sopenharmony_ci } 52162306a36Sopenharmony_ci rde->name = oname.name; 52262306a36Sopenharmony_ci rde->name_len = oname.len; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci /* inode */ 52562306a36Sopenharmony_ci err = parse_reply_info_in(p, end, &rde->inode, features); 52662306a36Sopenharmony_ci if (err < 0) 52762306a36Sopenharmony_ci goto out_bad; 52862306a36Sopenharmony_ci /* ceph_readdir_prepopulate() will update it */ 52962306a36Sopenharmony_ci rde->offset = 0; 53062306a36Sopenharmony_ci i++; 53162306a36Sopenharmony_ci num--; 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_cidone: 53562306a36Sopenharmony_ci /* Skip over any unrecognized fields */ 53662306a36Sopenharmony_ci *p = end; 53762306a36Sopenharmony_ci return 0; 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_cibad: 54062306a36Sopenharmony_ci err = -EIO; 54162306a36Sopenharmony_ciout_bad: 54262306a36Sopenharmony_ci pr_err("problem parsing dir contents %d\n", err); 54362306a36Sopenharmony_ci return err; 54462306a36Sopenharmony_ci} 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci/* 54762306a36Sopenharmony_ci * parse fcntl F_GETLK results 54862306a36Sopenharmony_ci */ 54962306a36Sopenharmony_cistatic int parse_reply_info_filelock(void **p, void *end, 55062306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 55162306a36Sopenharmony_ci u64 features) 55262306a36Sopenharmony_ci{ 55362306a36Sopenharmony_ci if (*p + sizeof(*info->filelock_reply) > end) 55462306a36Sopenharmony_ci goto bad; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci info->filelock_reply = *p; 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci /* Skip over any unrecognized fields */ 55962306a36Sopenharmony_ci *p = end; 56062306a36Sopenharmony_ci return 0; 56162306a36Sopenharmony_cibad: 56262306a36Sopenharmony_ci return -EIO; 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci#if BITS_PER_LONG == 64 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci#define DELEGATED_INO_AVAILABLE xa_mk_value(1) 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_cistatic int ceph_parse_deleg_inos(void **p, void *end, 57162306a36Sopenharmony_ci struct ceph_mds_session *s) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci u32 sets; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci ceph_decode_32_safe(p, end, sets, bad); 57662306a36Sopenharmony_ci dout("got %u sets of delegated inodes\n", sets); 57762306a36Sopenharmony_ci while (sets--) { 57862306a36Sopenharmony_ci u64 start, len; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci ceph_decode_64_safe(p, end, start, bad); 58162306a36Sopenharmony_ci ceph_decode_64_safe(p, end, len, bad); 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci /* Don't accept a delegation of system inodes */ 58462306a36Sopenharmony_ci if (start < CEPH_INO_SYSTEM_BASE) { 58562306a36Sopenharmony_ci pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n", 58662306a36Sopenharmony_ci start, len); 58762306a36Sopenharmony_ci continue; 58862306a36Sopenharmony_ci } 58962306a36Sopenharmony_ci while (len--) { 59062306a36Sopenharmony_ci int err = xa_insert(&s->s_delegated_inos, start++, 59162306a36Sopenharmony_ci DELEGATED_INO_AVAILABLE, 59262306a36Sopenharmony_ci GFP_KERNEL); 59362306a36Sopenharmony_ci if (!err) { 59462306a36Sopenharmony_ci dout("added delegated inode 0x%llx\n", 59562306a36Sopenharmony_ci start - 1); 59662306a36Sopenharmony_ci } else if (err == -EBUSY) { 59762306a36Sopenharmony_ci pr_warn("MDS delegated inode 0x%llx more than once.\n", 59862306a36Sopenharmony_ci start - 1); 59962306a36Sopenharmony_ci } else { 60062306a36Sopenharmony_ci return err; 60162306a36Sopenharmony_ci } 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci } 60462306a36Sopenharmony_ci return 0; 60562306a36Sopenharmony_cibad: 60662306a36Sopenharmony_ci return -EIO; 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ci 60962306a36Sopenharmony_ciu64 ceph_get_deleg_ino(struct ceph_mds_session *s) 61062306a36Sopenharmony_ci{ 61162306a36Sopenharmony_ci unsigned long ino; 61262306a36Sopenharmony_ci void *val; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci xa_for_each(&s->s_delegated_inos, ino, val) { 61562306a36Sopenharmony_ci val = xa_erase(&s->s_delegated_inos, ino); 61662306a36Sopenharmony_ci if (val == DELEGATED_INO_AVAILABLE) 61762306a36Sopenharmony_ci return ino; 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci return 0; 62062306a36Sopenharmony_ci} 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ciint ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino) 62362306a36Sopenharmony_ci{ 62462306a36Sopenharmony_ci return xa_insert(&s->s_delegated_inos, ino, DELEGATED_INO_AVAILABLE, 62562306a36Sopenharmony_ci GFP_KERNEL); 62662306a36Sopenharmony_ci} 62762306a36Sopenharmony_ci#else /* BITS_PER_LONG == 64 */ 62862306a36Sopenharmony_ci/* 62962306a36Sopenharmony_ci * FIXME: xarrays can't handle 64-bit indexes on a 32-bit arch. For now, just 63062306a36Sopenharmony_ci * ignore delegated_inos on 32 bit arch. Maybe eventually add xarrays for top 63162306a36Sopenharmony_ci * and bottom words? 63262306a36Sopenharmony_ci */ 63362306a36Sopenharmony_cistatic int ceph_parse_deleg_inos(void **p, void *end, 63462306a36Sopenharmony_ci struct ceph_mds_session *s) 63562306a36Sopenharmony_ci{ 63662306a36Sopenharmony_ci u32 sets; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci ceph_decode_32_safe(p, end, sets, bad); 63962306a36Sopenharmony_ci if (sets) 64062306a36Sopenharmony_ci ceph_decode_skip_n(p, end, sets * 2 * sizeof(__le64), bad); 64162306a36Sopenharmony_ci return 0; 64262306a36Sopenharmony_cibad: 64362306a36Sopenharmony_ci return -EIO; 64462306a36Sopenharmony_ci} 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ciu64 ceph_get_deleg_ino(struct ceph_mds_session *s) 64762306a36Sopenharmony_ci{ 64862306a36Sopenharmony_ci return 0; 64962306a36Sopenharmony_ci} 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ciint ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino) 65262306a36Sopenharmony_ci{ 65362306a36Sopenharmony_ci return 0; 65462306a36Sopenharmony_ci} 65562306a36Sopenharmony_ci#endif /* BITS_PER_LONG == 64 */ 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci/* 65862306a36Sopenharmony_ci * parse create results 65962306a36Sopenharmony_ci */ 66062306a36Sopenharmony_cistatic int parse_reply_info_create(void **p, void *end, 66162306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 66262306a36Sopenharmony_ci u64 features, struct ceph_mds_session *s) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci int ret; 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci if (features == (u64)-1 || 66762306a36Sopenharmony_ci (features & CEPH_FEATURE_REPLY_CREATE_INODE)) { 66862306a36Sopenharmony_ci if (*p == end) { 66962306a36Sopenharmony_ci /* Malformed reply? */ 67062306a36Sopenharmony_ci info->has_create_ino = false; 67162306a36Sopenharmony_ci } else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) { 67262306a36Sopenharmony_ci info->has_create_ino = true; 67362306a36Sopenharmony_ci /* struct_v, struct_compat, and len */ 67462306a36Sopenharmony_ci ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad); 67562306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->ino, bad); 67662306a36Sopenharmony_ci ret = ceph_parse_deleg_inos(p, end, s); 67762306a36Sopenharmony_ci if (ret) 67862306a36Sopenharmony_ci return ret; 67962306a36Sopenharmony_ci } else { 68062306a36Sopenharmony_ci /* legacy */ 68162306a36Sopenharmony_ci ceph_decode_64_safe(p, end, info->ino, bad); 68262306a36Sopenharmony_ci info->has_create_ino = true; 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci } else { 68562306a36Sopenharmony_ci if (*p != end) 68662306a36Sopenharmony_ci goto bad; 68762306a36Sopenharmony_ci } 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci /* Skip over any unrecognized fields */ 69062306a36Sopenharmony_ci *p = end; 69162306a36Sopenharmony_ci return 0; 69262306a36Sopenharmony_cibad: 69362306a36Sopenharmony_ci return -EIO; 69462306a36Sopenharmony_ci} 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_cistatic int parse_reply_info_getvxattr(void **p, void *end, 69762306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 69862306a36Sopenharmony_ci u64 features) 69962306a36Sopenharmony_ci{ 70062306a36Sopenharmony_ci u32 value_len; 70162306a36Sopenharmony_ci 70262306a36Sopenharmony_ci ceph_decode_skip_8(p, end, bad); /* skip current version: 1 */ 70362306a36Sopenharmony_ci ceph_decode_skip_8(p, end, bad); /* skip first version: 1 */ 70462306a36Sopenharmony_ci ceph_decode_skip_32(p, end, bad); /* skip payload length */ 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci ceph_decode_32_safe(p, end, value_len, bad); 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci if (value_len == end - *p) { 70962306a36Sopenharmony_ci info->xattr_info.xattr_value = *p; 71062306a36Sopenharmony_ci info->xattr_info.xattr_value_len = value_len; 71162306a36Sopenharmony_ci *p = end; 71262306a36Sopenharmony_ci return value_len; 71362306a36Sopenharmony_ci } 71462306a36Sopenharmony_cibad: 71562306a36Sopenharmony_ci return -EIO; 71662306a36Sopenharmony_ci} 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci/* 71962306a36Sopenharmony_ci * parse extra results 72062306a36Sopenharmony_ci */ 72162306a36Sopenharmony_cistatic int parse_reply_info_extra(void **p, void *end, 72262306a36Sopenharmony_ci struct ceph_mds_request *req, 72362306a36Sopenharmony_ci u64 features, struct ceph_mds_session *s) 72462306a36Sopenharmony_ci{ 72562306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; 72662306a36Sopenharmony_ci u32 op = le32_to_cpu(info->head->op); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci if (op == CEPH_MDS_OP_GETFILELOCK) 72962306a36Sopenharmony_ci return parse_reply_info_filelock(p, end, info, features); 73062306a36Sopenharmony_ci else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP) 73162306a36Sopenharmony_ci return parse_reply_info_readdir(p, end, req, features); 73262306a36Sopenharmony_ci else if (op == CEPH_MDS_OP_CREATE) 73362306a36Sopenharmony_ci return parse_reply_info_create(p, end, info, features, s); 73462306a36Sopenharmony_ci else if (op == CEPH_MDS_OP_GETVXATTR) 73562306a36Sopenharmony_ci return parse_reply_info_getvxattr(p, end, info, features); 73662306a36Sopenharmony_ci else 73762306a36Sopenharmony_ci return -EIO; 73862306a36Sopenharmony_ci} 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci/* 74162306a36Sopenharmony_ci * parse entire mds reply 74262306a36Sopenharmony_ci */ 74362306a36Sopenharmony_cistatic int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg, 74462306a36Sopenharmony_ci struct ceph_mds_request *req, u64 features) 74562306a36Sopenharmony_ci{ 74662306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *info = &req->r_reply_info; 74762306a36Sopenharmony_ci void *p, *end; 74862306a36Sopenharmony_ci u32 len; 74962306a36Sopenharmony_ci int err; 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci info->head = msg->front.iov_base; 75262306a36Sopenharmony_ci p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head); 75362306a36Sopenharmony_ci end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head); 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci /* trace */ 75662306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 75762306a36Sopenharmony_ci if (len > 0) { 75862306a36Sopenharmony_ci ceph_decode_need(&p, end, len, bad); 75962306a36Sopenharmony_ci err = parse_reply_info_trace(&p, p+len, info, features); 76062306a36Sopenharmony_ci if (err < 0) 76162306a36Sopenharmony_ci goto out_bad; 76262306a36Sopenharmony_ci } 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci /* extra */ 76562306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 76662306a36Sopenharmony_ci if (len > 0) { 76762306a36Sopenharmony_ci ceph_decode_need(&p, end, len, bad); 76862306a36Sopenharmony_ci err = parse_reply_info_extra(&p, p+len, req, features, s); 76962306a36Sopenharmony_ci if (err < 0) 77062306a36Sopenharmony_ci goto out_bad; 77162306a36Sopenharmony_ci } 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci /* snap blob */ 77462306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 77562306a36Sopenharmony_ci info->snapblob_len = len; 77662306a36Sopenharmony_ci info->snapblob = p; 77762306a36Sopenharmony_ci p += len; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if (p != end) 78062306a36Sopenharmony_ci goto bad; 78162306a36Sopenharmony_ci return 0; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_cibad: 78462306a36Sopenharmony_ci err = -EIO; 78562306a36Sopenharmony_ciout_bad: 78662306a36Sopenharmony_ci pr_err("mds parse_reply err %d\n", err); 78762306a36Sopenharmony_ci ceph_msg_dump(msg); 78862306a36Sopenharmony_ci return err; 78962306a36Sopenharmony_ci} 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_cistatic void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) 79262306a36Sopenharmony_ci{ 79362306a36Sopenharmony_ci int i; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci kfree(info->diri.fscrypt_auth); 79662306a36Sopenharmony_ci kfree(info->diri.fscrypt_file); 79762306a36Sopenharmony_ci kfree(info->targeti.fscrypt_auth); 79862306a36Sopenharmony_ci kfree(info->targeti.fscrypt_file); 79962306a36Sopenharmony_ci if (!info->dir_entries) 80062306a36Sopenharmony_ci return; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci for (i = 0; i < info->dir_nr; i++) { 80362306a36Sopenharmony_ci struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i; 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci kfree(rde->inode.fscrypt_auth); 80662306a36Sopenharmony_ci kfree(rde->inode.fscrypt_file); 80762306a36Sopenharmony_ci } 80862306a36Sopenharmony_ci free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); 80962306a36Sopenharmony_ci} 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci/* 81262306a36Sopenharmony_ci * In async unlink case the kclient won't wait for the first reply 81362306a36Sopenharmony_ci * from MDS and just drop all the links and unhash the dentry and then 81462306a36Sopenharmony_ci * succeeds immediately. 81562306a36Sopenharmony_ci * 81662306a36Sopenharmony_ci * For any new create/link/rename,etc requests followed by using the 81762306a36Sopenharmony_ci * same file names we must wait for the first reply of the inflight 81862306a36Sopenharmony_ci * unlink request, or the MDS possibly will fail these following 81962306a36Sopenharmony_ci * requests with -EEXIST if the inflight async unlink request was 82062306a36Sopenharmony_ci * delayed for some reasons. 82162306a36Sopenharmony_ci * 82262306a36Sopenharmony_ci * And the worst case is that for the none async openc request it will 82362306a36Sopenharmony_ci * successfully open the file if the CDentry hasn't been unlinked yet, 82462306a36Sopenharmony_ci * but later the previous delayed async unlink request will remove the 82562306a36Sopenharmony_ci * CDenty. That means the just created file is possiblly deleted later 82662306a36Sopenharmony_ci * by accident. 82762306a36Sopenharmony_ci * 82862306a36Sopenharmony_ci * We need to wait for the inflight async unlink requests to finish 82962306a36Sopenharmony_ci * when creating new files/directories by using the same file names. 83062306a36Sopenharmony_ci */ 83162306a36Sopenharmony_ciint ceph_wait_on_conflict_unlink(struct dentry *dentry) 83262306a36Sopenharmony_ci{ 83362306a36Sopenharmony_ci struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); 83462306a36Sopenharmony_ci struct dentry *pdentry = dentry->d_parent; 83562306a36Sopenharmony_ci struct dentry *udentry, *found = NULL; 83662306a36Sopenharmony_ci struct ceph_dentry_info *di; 83762306a36Sopenharmony_ci struct qstr dname; 83862306a36Sopenharmony_ci u32 hash = dentry->d_name.hash; 83962306a36Sopenharmony_ci int err; 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci dname.name = dentry->d_name.name; 84262306a36Sopenharmony_ci dname.len = dentry->d_name.len; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci rcu_read_lock(); 84562306a36Sopenharmony_ci hash_for_each_possible_rcu(fsc->async_unlink_conflict, di, 84662306a36Sopenharmony_ci hnode, hash) { 84762306a36Sopenharmony_ci udentry = di->dentry; 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci spin_lock(&udentry->d_lock); 85062306a36Sopenharmony_ci if (udentry->d_name.hash != hash) 85162306a36Sopenharmony_ci goto next; 85262306a36Sopenharmony_ci if (unlikely(udentry->d_parent != pdentry)) 85362306a36Sopenharmony_ci goto next; 85462306a36Sopenharmony_ci if (!hash_hashed(&di->hnode)) 85562306a36Sopenharmony_ci goto next; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci if (!test_bit(CEPH_DENTRY_ASYNC_UNLINK_BIT, &di->flags)) 85862306a36Sopenharmony_ci pr_warn("%s dentry %p:%pd async unlink bit is not set\n", 85962306a36Sopenharmony_ci __func__, dentry, dentry); 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci if (!d_same_name(udentry, pdentry, &dname)) 86262306a36Sopenharmony_ci goto next; 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci found = dget_dlock(udentry); 86562306a36Sopenharmony_ci spin_unlock(&udentry->d_lock); 86662306a36Sopenharmony_ci break; 86762306a36Sopenharmony_cinext: 86862306a36Sopenharmony_ci spin_unlock(&udentry->d_lock); 86962306a36Sopenharmony_ci } 87062306a36Sopenharmony_ci rcu_read_unlock(); 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci if (likely(!found)) 87362306a36Sopenharmony_ci return 0; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci dout("%s dentry %p:%pd conflict with old %p:%pd\n", __func__, 87662306a36Sopenharmony_ci dentry, dentry, found, found); 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_UNLINK_BIT, 87962306a36Sopenharmony_ci TASK_KILLABLE); 88062306a36Sopenharmony_ci dput(found); 88162306a36Sopenharmony_ci return err; 88262306a36Sopenharmony_ci} 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci/* 88662306a36Sopenharmony_ci * sessions 88762306a36Sopenharmony_ci */ 88862306a36Sopenharmony_ciconst char *ceph_session_state_name(int s) 88962306a36Sopenharmony_ci{ 89062306a36Sopenharmony_ci switch (s) { 89162306a36Sopenharmony_ci case CEPH_MDS_SESSION_NEW: return "new"; 89262306a36Sopenharmony_ci case CEPH_MDS_SESSION_OPENING: return "opening"; 89362306a36Sopenharmony_ci case CEPH_MDS_SESSION_OPEN: return "open"; 89462306a36Sopenharmony_ci case CEPH_MDS_SESSION_HUNG: return "hung"; 89562306a36Sopenharmony_ci case CEPH_MDS_SESSION_CLOSING: return "closing"; 89662306a36Sopenharmony_ci case CEPH_MDS_SESSION_CLOSED: return "closed"; 89762306a36Sopenharmony_ci case CEPH_MDS_SESSION_RESTARTING: return "restarting"; 89862306a36Sopenharmony_ci case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting"; 89962306a36Sopenharmony_ci case CEPH_MDS_SESSION_REJECTED: return "rejected"; 90062306a36Sopenharmony_ci default: return "???"; 90162306a36Sopenharmony_ci } 90262306a36Sopenharmony_ci} 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_cistruct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s) 90562306a36Sopenharmony_ci{ 90662306a36Sopenharmony_ci if (refcount_inc_not_zero(&s->s_ref)) 90762306a36Sopenharmony_ci return s; 90862306a36Sopenharmony_ci return NULL; 90962306a36Sopenharmony_ci} 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_civoid ceph_put_mds_session(struct ceph_mds_session *s) 91262306a36Sopenharmony_ci{ 91362306a36Sopenharmony_ci if (IS_ERR_OR_NULL(s)) 91462306a36Sopenharmony_ci return; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci if (refcount_dec_and_test(&s->s_ref)) { 91762306a36Sopenharmony_ci if (s->s_auth.authorizer) 91862306a36Sopenharmony_ci ceph_auth_destroy_authorizer(s->s_auth.authorizer); 91962306a36Sopenharmony_ci WARN_ON(mutex_is_locked(&s->s_mutex)); 92062306a36Sopenharmony_ci xa_destroy(&s->s_delegated_inos); 92162306a36Sopenharmony_ci kfree(s); 92262306a36Sopenharmony_ci } 92362306a36Sopenharmony_ci} 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci/* 92662306a36Sopenharmony_ci * called under mdsc->mutex 92762306a36Sopenharmony_ci */ 92862306a36Sopenharmony_cistruct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, 92962306a36Sopenharmony_ci int mds) 93062306a36Sopenharmony_ci{ 93162306a36Sopenharmony_ci if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) 93262306a36Sopenharmony_ci return NULL; 93362306a36Sopenharmony_ci return ceph_get_mds_session(mdsc->sessions[mds]); 93462306a36Sopenharmony_ci} 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_cistatic bool __have_session(struct ceph_mds_client *mdsc, int mds) 93762306a36Sopenharmony_ci{ 93862306a36Sopenharmony_ci if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) 93962306a36Sopenharmony_ci return false; 94062306a36Sopenharmony_ci else 94162306a36Sopenharmony_ci return true; 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_cistatic int __verify_registered_session(struct ceph_mds_client *mdsc, 94562306a36Sopenharmony_ci struct ceph_mds_session *s) 94662306a36Sopenharmony_ci{ 94762306a36Sopenharmony_ci if (s->s_mds >= mdsc->max_sessions || 94862306a36Sopenharmony_ci mdsc->sessions[s->s_mds] != s) 94962306a36Sopenharmony_ci return -ENOENT; 95062306a36Sopenharmony_ci return 0; 95162306a36Sopenharmony_ci} 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci/* 95462306a36Sopenharmony_ci * create+register a new session for given mds. 95562306a36Sopenharmony_ci * called under mdsc->mutex. 95662306a36Sopenharmony_ci */ 95762306a36Sopenharmony_cistatic struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, 95862306a36Sopenharmony_ci int mds) 95962306a36Sopenharmony_ci{ 96062306a36Sopenharmony_ci struct ceph_mds_session *s; 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) 96362306a36Sopenharmony_ci return ERR_PTR(-EIO); 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci if (mds >= mdsc->mdsmap->possible_max_rank) 96662306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 96762306a36Sopenharmony_ci 96862306a36Sopenharmony_ci s = kzalloc(sizeof(*s), GFP_NOFS); 96962306a36Sopenharmony_ci if (!s) 97062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci if (mds >= mdsc->max_sessions) { 97362306a36Sopenharmony_ci int newmax = 1 << get_count_order(mds + 1); 97462306a36Sopenharmony_ci struct ceph_mds_session **sa; 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci dout("%s: realloc to %d\n", __func__, newmax); 97762306a36Sopenharmony_ci sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); 97862306a36Sopenharmony_ci if (!sa) 97962306a36Sopenharmony_ci goto fail_realloc; 98062306a36Sopenharmony_ci if (mdsc->sessions) { 98162306a36Sopenharmony_ci memcpy(sa, mdsc->sessions, 98262306a36Sopenharmony_ci mdsc->max_sessions * sizeof(void *)); 98362306a36Sopenharmony_ci kfree(mdsc->sessions); 98462306a36Sopenharmony_ci } 98562306a36Sopenharmony_ci mdsc->sessions = sa; 98662306a36Sopenharmony_ci mdsc->max_sessions = newmax; 98762306a36Sopenharmony_ci } 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci dout("%s: mds%d\n", __func__, mds); 99062306a36Sopenharmony_ci s->s_mdsc = mdsc; 99162306a36Sopenharmony_ci s->s_mds = mds; 99262306a36Sopenharmony_ci s->s_state = CEPH_MDS_SESSION_NEW; 99362306a36Sopenharmony_ci mutex_init(&s->s_mutex); 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_ci ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci atomic_set(&s->s_cap_gen, 1); 99862306a36Sopenharmony_ci s->s_cap_ttl = jiffies - 1; 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_ci spin_lock_init(&s->s_cap_lock); 100162306a36Sopenharmony_ci INIT_LIST_HEAD(&s->s_caps); 100262306a36Sopenharmony_ci refcount_set(&s->s_ref, 1); 100362306a36Sopenharmony_ci INIT_LIST_HEAD(&s->s_waiting); 100462306a36Sopenharmony_ci INIT_LIST_HEAD(&s->s_unsafe); 100562306a36Sopenharmony_ci xa_init(&s->s_delegated_inos); 100662306a36Sopenharmony_ci INIT_LIST_HEAD(&s->s_cap_releases); 100762306a36Sopenharmony_ci INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work); 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci INIT_LIST_HEAD(&s->s_cap_dirty); 101062306a36Sopenharmony_ci INIT_LIST_HEAD(&s->s_cap_flushing); 101162306a36Sopenharmony_ci 101262306a36Sopenharmony_ci mdsc->sessions[mds] = s; 101362306a36Sopenharmony_ci atomic_inc(&mdsc->num_sessions); 101462306a36Sopenharmony_ci refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, 101762306a36Sopenharmony_ci ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci return s; 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_cifail_realloc: 102262306a36Sopenharmony_ci kfree(s); 102362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 102462306a36Sopenharmony_ci} 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci/* 102762306a36Sopenharmony_ci * called under mdsc->mutex 102862306a36Sopenharmony_ci */ 102962306a36Sopenharmony_cistatic void __unregister_session(struct ceph_mds_client *mdsc, 103062306a36Sopenharmony_ci struct ceph_mds_session *s) 103162306a36Sopenharmony_ci{ 103262306a36Sopenharmony_ci dout("__unregister_session mds%d %p\n", s->s_mds, s); 103362306a36Sopenharmony_ci BUG_ON(mdsc->sessions[s->s_mds] != s); 103462306a36Sopenharmony_ci mdsc->sessions[s->s_mds] = NULL; 103562306a36Sopenharmony_ci ceph_con_close(&s->s_con); 103662306a36Sopenharmony_ci ceph_put_mds_session(s); 103762306a36Sopenharmony_ci atomic_dec(&mdsc->num_sessions); 103862306a36Sopenharmony_ci} 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci/* 104162306a36Sopenharmony_ci * drop session refs in request. 104262306a36Sopenharmony_ci * 104362306a36Sopenharmony_ci * should be last request ref, or hold mdsc->mutex 104462306a36Sopenharmony_ci */ 104562306a36Sopenharmony_cistatic void put_request_session(struct ceph_mds_request *req) 104662306a36Sopenharmony_ci{ 104762306a36Sopenharmony_ci if (req->r_session) { 104862306a36Sopenharmony_ci ceph_put_mds_session(req->r_session); 104962306a36Sopenharmony_ci req->r_session = NULL; 105062306a36Sopenharmony_ci } 105162306a36Sopenharmony_ci} 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_civoid ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, 105462306a36Sopenharmony_ci void (*cb)(struct ceph_mds_session *), 105562306a36Sopenharmony_ci bool check_state) 105662306a36Sopenharmony_ci{ 105762306a36Sopenharmony_ci int mds; 105862306a36Sopenharmony_ci 105962306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 106062306a36Sopenharmony_ci for (mds = 0; mds < mdsc->max_sessions; ++mds) { 106162306a36Sopenharmony_ci struct ceph_mds_session *s; 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci s = __ceph_lookup_mds_session(mdsc, mds); 106462306a36Sopenharmony_ci if (!s) 106562306a36Sopenharmony_ci continue; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci if (check_state && !check_session_state(s)) { 106862306a36Sopenharmony_ci ceph_put_mds_session(s); 106962306a36Sopenharmony_ci continue; 107062306a36Sopenharmony_ci } 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 107362306a36Sopenharmony_ci cb(s); 107462306a36Sopenharmony_ci ceph_put_mds_session(s); 107562306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 107662306a36Sopenharmony_ci } 107762306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 107862306a36Sopenharmony_ci} 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_civoid ceph_mdsc_release_request(struct kref *kref) 108162306a36Sopenharmony_ci{ 108262306a36Sopenharmony_ci struct ceph_mds_request *req = container_of(kref, 108362306a36Sopenharmony_ci struct ceph_mds_request, 108462306a36Sopenharmony_ci r_kref); 108562306a36Sopenharmony_ci ceph_mdsc_release_dir_caps_no_check(req); 108662306a36Sopenharmony_ci destroy_reply_info(&req->r_reply_info); 108762306a36Sopenharmony_ci if (req->r_request) 108862306a36Sopenharmony_ci ceph_msg_put(req->r_request); 108962306a36Sopenharmony_ci if (req->r_reply) 109062306a36Sopenharmony_ci ceph_msg_put(req->r_reply); 109162306a36Sopenharmony_ci if (req->r_inode) { 109262306a36Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 109362306a36Sopenharmony_ci iput(req->r_inode); 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci if (req->r_parent) { 109662306a36Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); 109762306a36Sopenharmony_ci iput(req->r_parent); 109862306a36Sopenharmony_ci } 109962306a36Sopenharmony_ci iput(req->r_target_inode); 110062306a36Sopenharmony_ci iput(req->r_new_inode); 110162306a36Sopenharmony_ci if (req->r_dentry) 110262306a36Sopenharmony_ci dput(req->r_dentry); 110362306a36Sopenharmony_ci if (req->r_old_dentry) 110462306a36Sopenharmony_ci dput(req->r_old_dentry); 110562306a36Sopenharmony_ci if (req->r_old_dentry_dir) { 110662306a36Sopenharmony_ci /* 110762306a36Sopenharmony_ci * track (and drop pins for) r_old_dentry_dir 110862306a36Sopenharmony_ci * separately, since r_old_dentry's d_parent may have 110962306a36Sopenharmony_ci * changed between the dir mutex being dropped and 111062306a36Sopenharmony_ci * this request being freed. 111162306a36Sopenharmony_ci */ 111262306a36Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), 111362306a36Sopenharmony_ci CEPH_CAP_PIN); 111462306a36Sopenharmony_ci iput(req->r_old_dentry_dir); 111562306a36Sopenharmony_ci } 111662306a36Sopenharmony_ci kfree(req->r_path1); 111762306a36Sopenharmony_ci kfree(req->r_path2); 111862306a36Sopenharmony_ci put_cred(req->r_cred); 111962306a36Sopenharmony_ci if (req->r_pagelist) 112062306a36Sopenharmony_ci ceph_pagelist_release(req->r_pagelist); 112162306a36Sopenharmony_ci kfree(req->r_fscrypt_auth); 112262306a36Sopenharmony_ci kfree(req->r_altname); 112362306a36Sopenharmony_ci put_request_session(req); 112462306a36Sopenharmony_ci ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); 112562306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty(&req->r_wait)); 112662306a36Sopenharmony_ci kmem_cache_free(ceph_mds_request_cachep, req); 112762306a36Sopenharmony_ci} 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ciDEFINE_RB_FUNCS(request, struct ceph_mds_request, r_tid, r_node) 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_ci/* 113262306a36Sopenharmony_ci * lookup session, bump ref if found. 113362306a36Sopenharmony_ci * 113462306a36Sopenharmony_ci * called under mdsc->mutex. 113562306a36Sopenharmony_ci */ 113662306a36Sopenharmony_cistatic struct ceph_mds_request * 113762306a36Sopenharmony_cilookup_get_request(struct ceph_mds_client *mdsc, u64 tid) 113862306a36Sopenharmony_ci{ 113962306a36Sopenharmony_ci struct ceph_mds_request *req; 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci req = lookup_request(&mdsc->request_tree, tid); 114262306a36Sopenharmony_ci if (req) 114362306a36Sopenharmony_ci ceph_mdsc_get_request(req); 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci return req; 114662306a36Sopenharmony_ci} 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci/* 114962306a36Sopenharmony_ci * Register an in-flight request, and assign a tid. Link to directory 115062306a36Sopenharmony_ci * are modifying (if any). 115162306a36Sopenharmony_ci * 115262306a36Sopenharmony_ci * Called under mdsc->mutex. 115362306a36Sopenharmony_ci */ 115462306a36Sopenharmony_cistatic void __register_request(struct ceph_mds_client *mdsc, 115562306a36Sopenharmony_ci struct ceph_mds_request *req, 115662306a36Sopenharmony_ci struct inode *dir) 115762306a36Sopenharmony_ci{ 115862306a36Sopenharmony_ci int ret = 0; 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci req->r_tid = ++mdsc->last_tid; 116162306a36Sopenharmony_ci if (req->r_num_caps) { 116262306a36Sopenharmony_ci ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation, 116362306a36Sopenharmony_ci req->r_num_caps); 116462306a36Sopenharmony_ci if (ret < 0) { 116562306a36Sopenharmony_ci pr_err("__register_request %p " 116662306a36Sopenharmony_ci "failed to reserve caps: %d\n", req, ret); 116762306a36Sopenharmony_ci /* set req->r_err to fail early from __do_request */ 116862306a36Sopenharmony_ci req->r_err = ret; 116962306a36Sopenharmony_ci return; 117062306a36Sopenharmony_ci } 117162306a36Sopenharmony_ci } 117262306a36Sopenharmony_ci dout("__register_request %p tid %lld\n", req, req->r_tid); 117362306a36Sopenharmony_ci ceph_mdsc_get_request(req); 117462306a36Sopenharmony_ci insert_request(&mdsc->request_tree, req); 117562306a36Sopenharmony_ci 117662306a36Sopenharmony_ci req->r_cred = get_current_cred(); 117762306a36Sopenharmony_ci 117862306a36Sopenharmony_ci if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) 117962306a36Sopenharmony_ci mdsc->oldest_tid = req->r_tid; 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci if (dir) { 118262306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(dir); 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci ihold(dir); 118562306a36Sopenharmony_ci req->r_unsafe_dir = dir; 118662306a36Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 118762306a36Sopenharmony_ci list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); 118862306a36Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 118962306a36Sopenharmony_ci } 119062306a36Sopenharmony_ci} 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_cistatic void __unregister_request(struct ceph_mds_client *mdsc, 119362306a36Sopenharmony_ci struct ceph_mds_request *req) 119462306a36Sopenharmony_ci{ 119562306a36Sopenharmony_ci dout("__unregister_request %p tid %lld\n", req, req->r_tid); 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci /* Never leave an unregistered request on an unsafe list! */ 119862306a36Sopenharmony_ci list_del_init(&req->r_unsafe_item); 119962306a36Sopenharmony_ci 120062306a36Sopenharmony_ci if (req->r_tid == mdsc->oldest_tid) { 120162306a36Sopenharmony_ci struct rb_node *p = rb_next(&req->r_node); 120262306a36Sopenharmony_ci mdsc->oldest_tid = 0; 120362306a36Sopenharmony_ci while (p) { 120462306a36Sopenharmony_ci struct ceph_mds_request *next_req = 120562306a36Sopenharmony_ci rb_entry(p, struct ceph_mds_request, r_node); 120662306a36Sopenharmony_ci if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) { 120762306a36Sopenharmony_ci mdsc->oldest_tid = next_req->r_tid; 120862306a36Sopenharmony_ci break; 120962306a36Sopenharmony_ci } 121062306a36Sopenharmony_ci p = rb_next(p); 121162306a36Sopenharmony_ci } 121262306a36Sopenharmony_ci } 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_ci erase_request(&mdsc->request_tree, req); 121562306a36Sopenharmony_ci 121662306a36Sopenharmony_ci if (req->r_unsafe_dir) { 121762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); 121862306a36Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 121962306a36Sopenharmony_ci list_del_init(&req->r_unsafe_dir_item); 122062306a36Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci if (req->r_target_inode && 122362306a36Sopenharmony_ci test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 122462306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); 122562306a36Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 122662306a36Sopenharmony_ci list_del_init(&req->r_unsafe_target_item); 122762306a36Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 122862306a36Sopenharmony_ci } 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_ci if (req->r_unsafe_dir) { 123162306a36Sopenharmony_ci iput(req->r_unsafe_dir); 123262306a36Sopenharmony_ci req->r_unsafe_dir = NULL; 123362306a36Sopenharmony_ci } 123462306a36Sopenharmony_ci 123562306a36Sopenharmony_ci complete_all(&req->r_safe_completion); 123662306a36Sopenharmony_ci 123762306a36Sopenharmony_ci ceph_mdsc_put_request(req); 123862306a36Sopenharmony_ci} 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci/* 124162306a36Sopenharmony_ci * Walk back up the dentry tree until we hit a dentry representing a 124262306a36Sopenharmony_ci * non-snapshot inode. We do this using the rcu_read_lock (which must be held 124362306a36Sopenharmony_ci * when calling this) to ensure that the objects won't disappear while we're 124462306a36Sopenharmony_ci * working with them. Once we hit a candidate dentry, we attempt to take a 124562306a36Sopenharmony_ci * reference to it, and return that as the result. 124662306a36Sopenharmony_ci */ 124762306a36Sopenharmony_cistatic struct inode *get_nonsnap_parent(struct dentry *dentry) 124862306a36Sopenharmony_ci{ 124962306a36Sopenharmony_ci struct inode *inode = NULL; 125062306a36Sopenharmony_ci 125162306a36Sopenharmony_ci while (dentry && !IS_ROOT(dentry)) { 125262306a36Sopenharmony_ci inode = d_inode_rcu(dentry); 125362306a36Sopenharmony_ci if (!inode || ceph_snap(inode) == CEPH_NOSNAP) 125462306a36Sopenharmony_ci break; 125562306a36Sopenharmony_ci dentry = dentry->d_parent; 125662306a36Sopenharmony_ci } 125762306a36Sopenharmony_ci if (inode) 125862306a36Sopenharmony_ci inode = igrab(inode); 125962306a36Sopenharmony_ci return inode; 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci/* 126362306a36Sopenharmony_ci * Choose mds to send request to next. If there is a hint set in the 126462306a36Sopenharmony_ci * request (e.g., due to a prior forward hint from the mds), use that. 126562306a36Sopenharmony_ci * Otherwise, consult frag tree and/or caps to identify the 126662306a36Sopenharmony_ci * appropriate mds. If all else fails, choose randomly. 126762306a36Sopenharmony_ci * 126862306a36Sopenharmony_ci * Called under mdsc->mutex. 126962306a36Sopenharmony_ci */ 127062306a36Sopenharmony_cistatic int __choose_mds(struct ceph_mds_client *mdsc, 127162306a36Sopenharmony_ci struct ceph_mds_request *req, 127262306a36Sopenharmony_ci bool *random) 127362306a36Sopenharmony_ci{ 127462306a36Sopenharmony_ci struct inode *inode; 127562306a36Sopenharmony_ci struct ceph_inode_info *ci; 127662306a36Sopenharmony_ci struct ceph_cap *cap; 127762306a36Sopenharmony_ci int mode = req->r_direct_mode; 127862306a36Sopenharmony_ci int mds = -1; 127962306a36Sopenharmony_ci u32 hash = req->r_direct_hash; 128062306a36Sopenharmony_ci bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci if (random) 128362306a36Sopenharmony_ci *random = false; 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci /* 128662306a36Sopenharmony_ci * is there a specific mds we should try? ignore hint if we have 128762306a36Sopenharmony_ci * no session and the mds is not up (active or recovering). 128862306a36Sopenharmony_ci */ 128962306a36Sopenharmony_ci if (req->r_resend_mds >= 0 && 129062306a36Sopenharmony_ci (__have_session(mdsc, req->r_resend_mds) || 129162306a36Sopenharmony_ci ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) { 129262306a36Sopenharmony_ci dout("%s using resend_mds mds%d\n", __func__, 129362306a36Sopenharmony_ci req->r_resend_mds); 129462306a36Sopenharmony_ci return req->r_resend_mds; 129562306a36Sopenharmony_ci } 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci if (mode == USE_RANDOM_MDS) 129862306a36Sopenharmony_ci goto random; 129962306a36Sopenharmony_ci 130062306a36Sopenharmony_ci inode = NULL; 130162306a36Sopenharmony_ci if (req->r_inode) { 130262306a36Sopenharmony_ci if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) { 130362306a36Sopenharmony_ci inode = req->r_inode; 130462306a36Sopenharmony_ci ihold(inode); 130562306a36Sopenharmony_ci } else { 130662306a36Sopenharmony_ci /* req->r_dentry is non-null for LSSNAP request */ 130762306a36Sopenharmony_ci rcu_read_lock(); 130862306a36Sopenharmony_ci inode = get_nonsnap_parent(req->r_dentry); 130962306a36Sopenharmony_ci rcu_read_unlock(); 131062306a36Sopenharmony_ci dout("%s using snapdir's parent %p\n", __func__, inode); 131162306a36Sopenharmony_ci } 131262306a36Sopenharmony_ci } else if (req->r_dentry) { 131362306a36Sopenharmony_ci /* ignore race with rename; old or new d_parent is okay */ 131462306a36Sopenharmony_ci struct dentry *parent; 131562306a36Sopenharmony_ci struct inode *dir; 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci rcu_read_lock(); 131862306a36Sopenharmony_ci parent = READ_ONCE(req->r_dentry->d_parent); 131962306a36Sopenharmony_ci dir = req->r_parent ? : d_inode_rcu(parent); 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_ci if (!dir || dir->i_sb != mdsc->fsc->sb) { 132262306a36Sopenharmony_ci /* not this fs or parent went negative */ 132362306a36Sopenharmony_ci inode = d_inode(req->r_dentry); 132462306a36Sopenharmony_ci if (inode) 132562306a36Sopenharmony_ci ihold(inode); 132662306a36Sopenharmony_ci } else if (ceph_snap(dir) != CEPH_NOSNAP) { 132762306a36Sopenharmony_ci /* direct snapped/virtual snapdir requests 132862306a36Sopenharmony_ci * based on parent dir inode */ 132962306a36Sopenharmony_ci inode = get_nonsnap_parent(parent); 133062306a36Sopenharmony_ci dout("%s using nonsnap parent %p\n", __func__, inode); 133162306a36Sopenharmony_ci } else { 133262306a36Sopenharmony_ci /* dentry target */ 133362306a36Sopenharmony_ci inode = d_inode(req->r_dentry); 133462306a36Sopenharmony_ci if (!inode || mode == USE_AUTH_MDS) { 133562306a36Sopenharmony_ci /* dir + name */ 133662306a36Sopenharmony_ci inode = igrab(dir); 133762306a36Sopenharmony_ci hash = ceph_dentry_hash(dir, req->r_dentry); 133862306a36Sopenharmony_ci is_hash = true; 133962306a36Sopenharmony_ci } else { 134062306a36Sopenharmony_ci ihold(inode); 134162306a36Sopenharmony_ci } 134262306a36Sopenharmony_ci } 134362306a36Sopenharmony_ci rcu_read_unlock(); 134462306a36Sopenharmony_ci } 134562306a36Sopenharmony_ci 134662306a36Sopenharmony_ci dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash, 134762306a36Sopenharmony_ci hash, mode); 134862306a36Sopenharmony_ci if (!inode) 134962306a36Sopenharmony_ci goto random; 135062306a36Sopenharmony_ci ci = ceph_inode(inode); 135162306a36Sopenharmony_ci 135262306a36Sopenharmony_ci if (is_hash && S_ISDIR(inode->i_mode)) { 135362306a36Sopenharmony_ci struct ceph_inode_frag frag; 135462306a36Sopenharmony_ci int found; 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci ceph_choose_frag(ci, hash, &frag, &found); 135762306a36Sopenharmony_ci if (found) { 135862306a36Sopenharmony_ci if (mode == USE_ANY_MDS && frag.ndist > 0) { 135962306a36Sopenharmony_ci u8 r; 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci /* choose a random replica */ 136262306a36Sopenharmony_ci get_random_bytes(&r, 1); 136362306a36Sopenharmony_ci r %= frag.ndist; 136462306a36Sopenharmony_ci mds = frag.dist[r]; 136562306a36Sopenharmony_ci dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n", 136662306a36Sopenharmony_ci __func__, inode, ceph_vinop(inode), 136762306a36Sopenharmony_ci frag.frag, mds, (int)r, frag.ndist); 136862306a36Sopenharmony_ci if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 136962306a36Sopenharmony_ci CEPH_MDS_STATE_ACTIVE && 137062306a36Sopenharmony_ci !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) 137162306a36Sopenharmony_ci goto out; 137262306a36Sopenharmony_ci } 137362306a36Sopenharmony_ci 137462306a36Sopenharmony_ci /* since this file/dir wasn't known to be 137562306a36Sopenharmony_ci * replicated, then we want to look for the 137662306a36Sopenharmony_ci * authoritative mds. */ 137762306a36Sopenharmony_ci if (frag.mds >= 0) { 137862306a36Sopenharmony_ci /* choose auth mds */ 137962306a36Sopenharmony_ci mds = frag.mds; 138062306a36Sopenharmony_ci dout("%s %p %llx.%llx frag %u mds%d (auth)\n", 138162306a36Sopenharmony_ci __func__, inode, ceph_vinop(inode), 138262306a36Sopenharmony_ci frag.frag, mds); 138362306a36Sopenharmony_ci if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 138462306a36Sopenharmony_ci CEPH_MDS_STATE_ACTIVE) { 138562306a36Sopenharmony_ci if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, 138662306a36Sopenharmony_ci mds)) 138762306a36Sopenharmony_ci goto out; 138862306a36Sopenharmony_ci } 138962306a36Sopenharmony_ci } 139062306a36Sopenharmony_ci mode = USE_AUTH_MDS; 139162306a36Sopenharmony_ci } 139262306a36Sopenharmony_ci } 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 139562306a36Sopenharmony_ci cap = NULL; 139662306a36Sopenharmony_ci if (mode == USE_AUTH_MDS) 139762306a36Sopenharmony_ci cap = ci->i_auth_cap; 139862306a36Sopenharmony_ci if (!cap && !RB_EMPTY_ROOT(&ci->i_caps)) 139962306a36Sopenharmony_ci cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 140062306a36Sopenharmony_ci if (!cap) { 140162306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 140262306a36Sopenharmony_ci iput(inode); 140362306a36Sopenharmony_ci goto random; 140462306a36Sopenharmony_ci } 140562306a36Sopenharmony_ci mds = cap->session->s_mds; 140662306a36Sopenharmony_ci dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__, 140762306a36Sopenharmony_ci inode, ceph_vinop(inode), mds, 140862306a36Sopenharmony_ci cap == ci->i_auth_cap ? "auth " : "", cap); 140962306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 141062306a36Sopenharmony_ciout: 141162306a36Sopenharmony_ci iput(inode); 141262306a36Sopenharmony_ci return mds; 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_cirandom: 141562306a36Sopenharmony_ci if (random) 141662306a36Sopenharmony_ci *random = true; 141762306a36Sopenharmony_ci 141862306a36Sopenharmony_ci mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap); 141962306a36Sopenharmony_ci dout("%s chose random mds%d\n", __func__, mds); 142062306a36Sopenharmony_ci return mds; 142162306a36Sopenharmony_ci} 142262306a36Sopenharmony_ci 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci/* 142562306a36Sopenharmony_ci * session messages 142662306a36Sopenharmony_ci */ 142762306a36Sopenharmony_cistruct ceph_msg *ceph_create_session_msg(u32 op, u64 seq) 142862306a36Sopenharmony_ci{ 142962306a36Sopenharmony_ci struct ceph_msg *msg; 143062306a36Sopenharmony_ci struct ceph_mds_session_head *h; 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS, 143362306a36Sopenharmony_ci false); 143462306a36Sopenharmony_ci if (!msg) { 143562306a36Sopenharmony_ci pr_err("ENOMEM creating session %s msg\n", 143662306a36Sopenharmony_ci ceph_session_op_name(op)); 143762306a36Sopenharmony_ci return NULL; 143862306a36Sopenharmony_ci } 143962306a36Sopenharmony_ci h = msg->front.iov_base; 144062306a36Sopenharmony_ci h->op = cpu_to_le32(op); 144162306a36Sopenharmony_ci h->seq = cpu_to_le64(seq); 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci return msg; 144462306a36Sopenharmony_ci} 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_cistatic const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; 144762306a36Sopenharmony_ci#define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8) 144862306a36Sopenharmony_cistatic int encode_supported_features(void **p, void *end) 144962306a36Sopenharmony_ci{ 145062306a36Sopenharmony_ci static const size_t count = ARRAY_SIZE(feature_bits); 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci if (count > 0) { 145362306a36Sopenharmony_ci size_t i; 145462306a36Sopenharmony_ci size_t size = FEATURE_BYTES(count); 145562306a36Sopenharmony_ci unsigned long bit; 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 + size > end)) 145862306a36Sopenharmony_ci return -ERANGE; 145962306a36Sopenharmony_ci 146062306a36Sopenharmony_ci ceph_encode_32(p, size); 146162306a36Sopenharmony_ci memset(*p, 0, size); 146262306a36Sopenharmony_ci for (i = 0; i < count; i++) { 146362306a36Sopenharmony_ci bit = feature_bits[i]; 146462306a36Sopenharmony_ci ((unsigned char *)(*p))[bit / 8] |= BIT(bit % 8); 146562306a36Sopenharmony_ci } 146662306a36Sopenharmony_ci *p += size; 146762306a36Sopenharmony_ci } else { 146862306a36Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 > end)) 146962306a36Sopenharmony_ci return -ERANGE; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci ceph_encode_32(p, 0); 147262306a36Sopenharmony_ci } 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci return 0; 147562306a36Sopenharmony_ci} 147662306a36Sopenharmony_ci 147762306a36Sopenharmony_cistatic const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; 147862306a36Sopenharmony_ci#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) 147962306a36Sopenharmony_cistatic int encode_metric_spec(void **p, void *end) 148062306a36Sopenharmony_ci{ 148162306a36Sopenharmony_ci static const size_t count = ARRAY_SIZE(metric_bits); 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_ci /* header */ 148462306a36Sopenharmony_ci if (WARN_ON_ONCE(*p + 2 > end)) 148562306a36Sopenharmony_ci return -ERANGE; 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci ceph_encode_8(p, 1); /* version */ 148862306a36Sopenharmony_ci ceph_encode_8(p, 1); /* compat */ 148962306a36Sopenharmony_ci 149062306a36Sopenharmony_ci if (count > 0) { 149162306a36Sopenharmony_ci size_t i; 149262306a36Sopenharmony_ci size_t size = METRIC_BYTES(count); 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 + 4 + size > end)) 149562306a36Sopenharmony_ci return -ERANGE; 149662306a36Sopenharmony_ci 149762306a36Sopenharmony_ci /* metric spec info length */ 149862306a36Sopenharmony_ci ceph_encode_32(p, 4 + size); 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci /* metric spec */ 150162306a36Sopenharmony_ci ceph_encode_32(p, size); 150262306a36Sopenharmony_ci memset(*p, 0, size); 150362306a36Sopenharmony_ci for (i = 0; i < count; i++) 150462306a36Sopenharmony_ci ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); 150562306a36Sopenharmony_ci *p += size; 150662306a36Sopenharmony_ci } else { 150762306a36Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 + 4 > end)) 150862306a36Sopenharmony_ci return -ERANGE; 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci /* metric spec info length */ 151162306a36Sopenharmony_ci ceph_encode_32(p, 4); 151262306a36Sopenharmony_ci /* metric spec */ 151362306a36Sopenharmony_ci ceph_encode_32(p, 0); 151462306a36Sopenharmony_ci } 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci return 0; 151762306a36Sopenharmony_ci} 151862306a36Sopenharmony_ci 151962306a36Sopenharmony_ci/* 152062306a36Sopenharmony_ci * session message, specialization for CEPH_SESSION_REQUEST_OPEN 152162306a36Sopenharmony_ci * to include additional client metadata fields. 152262306a36Sopenharmony_ci */ 152362306a36Sopenharmony_cistatic struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq) 152462306a36Sopenharmony_ci{ 152562306a36Sopenharmony_ci struct ceph_msg *msg; 152662306a36Sopenharmony_ci struct ceph_mds_session_head *h; 152762306a36Sopenharmony_ci int i; 152862306a36Sopenharmony_ci int extra_bytes = 0; 152962306a36Sopenharmony_ci int metadata_key_count = 0; 153062306a36Sopenharmony_ci struct ceph_options *opt = mdsc->fsc->client->options; 153162306a36Sopenharmony_ci struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; 153262306a36Sopenharmony_ci size_t size, count; 153362306a36Sopenharmony_ci void *p, *end; 153462306a36Sopenharmony_ci int ret; 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci const char* metadata[][2] = { 153762306a36Sopenharmony_ci {"hostname", mdsc->nodename}, 153862306a36Sopenharmony_ci {"kernel_version", init_utsname()->release}, 153962306a36Sopenharmony_ci {"entity_id", opt->name ? : ""}, 154062306a36Sopenharmony_ci {"root", fsopt->server_path ? : "/"}, 154162306a36Sopenharmony_ci {NULL, NULL} 154262306a36Sopenharmony_ci }; 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_ci /* Calculate serialized length of metadata */ 154562306a36Sopenharmony_ci extra_bytes = 4; /* map length */ 154662306a36Sopenharmony_ci for (i = 0; metadata[i][0]; ++i) { 154762306a36Sopenharmony_ci extra_bytes += 8 + strlen(metadata[i][0]) + 154862306a36Sopenharmony_ci strlen(metadata[i][1]); 154962306a36Sopenharmony_ci metadata_key_count++; 155062306a36Sopenharmony_ci } 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci /* supported feature */ 155362306a36Sopenharmony_ci size = 0; 155462306a36Sopenharmony_ci count = ARRAY_SIZE(feature_bits); 155562306a36Sopenharmony_ci if (count > 0) 155662306a36Sopenharmony_ci size = FEATURE_BYTES(count); 155762306a36Sopenharmony_ci extra_bytes += 4 + size; 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci /* metric spec */ 156062306a36Sopenharmony_ci size = 0; 156162306a36Sopenharmony_ci count = ARRAY_SIZE(metric_bits); 156262306a36Sopenharmony_ci if (count > 0) 156362306a36Sopenharmony_ci size = METRIC_BYTES(count); 156462306a36Sopenharmony_ci extra_bytes += 2 + 4 + 4 + size; 156562306a36Sopenharmony_ci 156662306a36Sopenharmony_ci /* Allocate the message */ 156762306a36Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, 156862306a36Sopenharmony_ci GFP_NOFS, false); 156962306a36Sopenharmony_ci if (!msg) { 157062306a36Sopenharmony_ci pr_err("ENOMEM creating session open msg\n"); 157162306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 157262306a36Sopenharmony_ci } 157362306a36Sopenharmony_ci p = msg->front.iov_base; 157462306a36Sopenharmony_ci end = p + msg->front.iov_len; 157562306a36Sopenharmony_ci 157662306a36Sopenharmony_ci h = p; 157762306a36Sopenharmony_ci h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); 157862306a36Sopenharmony_ci h->seq = cpu_to_le64(seq); 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci /* 158162306a36Sopenharmony_ci * Serialize client metadata into waiting buffer space, using 158262306a36Sopenharmony_ci * the format that userspace expects for map<string, string> 158362306a36Sopenharmony_ci * 158462306a36Sopenharmony_ci * ClientSession messages with metadata are v4 158562306a36Sopenharmony_ci */ 158662306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(4); 158762306a36Sopenharmony_ci msg->hdr.compat_version = cpu_to_le16(1); 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci /* The write pointer, following the session_head structure */ 159062306a36Sopenharmony_ci p += sizeof(*h); 159162306a36Sopenharmony_ci 159262306a36Sopenharmony_ci /* Number of entries in the map */ 159362306a36Sopenharmony_ci ceph_encode_32(&p, metadata_key_count); 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci /* Two length-prefixed strings for each entry in the map */ 159662306a36Sopenharmony_ci for (i = 0; metadata[i][0]; ++i) { 159762306a36Sopenharmony_ci size_t const key_len = strlen(metadata[i][0]); 159862306a36Sopenharmony_ci size_t const val_len = strlen(metadata[i][1]); 159962306a36Sopenharmony_ci 160062306a36Sopenharmony_ci ceph_encode_32(&p, key_len); 160162306a36Sopenharmony_ci memcpy(p, metadata[i][0], key_len); 160262306a36Sopenharmony_ci p += key_len; 160362306a36Sopenharmony_ci ceph_encode_32(&p, val_len); 160462306a36Sopenharmony_ci memcpy(p, metadata[i][1], val_len); 160562306a36Sopenharmony_ci p += val_len; 160662306a36Sopenharmony_ci } 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci ret = encode_supported_features(&p, end); 160962306a36Sopenharmony_ci if (ret) { 161062306a36Sopenharmony_ci pr_err("encode_supported_features failed!\n"); 161162306a36Sopenharmony_ci ceph_msg_put(msg); 161262306a36Sopenharmony_ci return ERR_PTR(ret); 161362306a36Sopenharmony_ci } 161462306a36Sopenharmony_ci 161562306a36Sopenharmony_ci ret = encode_metric_spec(&p, end); 161662306a36Sopenharmony_ci if (ret) { 161762306a36Sopenharmony_ci pr_err("encode_metric_spec failed!\n"); 161862306a36Sopenharmony_ci ceph_msg_put(msg); 161962306a36Sopenharmony_ci return ERR_PTR(ret); 162062306a36Sopenharmony_ci } 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 162362306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci return msg; 162662306a36Sopenharmony_ci} 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci/* 162962306a36Sopenharmony_ci * send session open request. 163062306a36Sopenharmony_ci * 163162306a36Sopenharmony_ci * called under mdsc->mutex 163262306a36Sopenharmony_ci */ 163362306a36Sopenharmony_cistatic int __open_session(struct ceph_mds_client *mdsc, 163462306a36Sopenharmony_ci struct ceph_mds_session *session) 163562306a36Sopenharmony_ci{ 163662306a36Sopenharmony_ci struct ceph_msg *msg; 163762306a36Sopenharmony_ci int mstate; 163862306a36Sopenharmony_ci int mds = session->s_mds; 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) 164162306a36Sopenharmony_ci return -EIO; 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci /* wait for mds to go active? */ 164462306a36Sopenharmony_ci mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); 164562306a36Sopenharmony_ci dout("open_session to mds%d (%s)\n", mds, 164662306a36Sopenharmony_ci ceph_mds_state_name(mstate)); 164762306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_OPENING; 164862306a36Sopenharmony_ci session->s_renew_requested = jiffies; 164962306a36Sopenharmony_ci 165062306a36Sopenharmony_ci /* send connect message */ 165162306a36Sopenharmony_ci msg = create_session_open_msg(mdsc, session->s_seq); 165262306a36Sopenharmony_ci if (IS_ERR(msg)) 165362306a36Sopenharmony_ci return PTR_ERR(msg); 165462306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 165562306a36Sopenharmony_ci return 0; 165662306a36Sopenharmony_ci} 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ci/* 165962306a36Sopenharmony_ci * open sessions for any export targets for the given mds 166062306a36Sopenharmony_ci * 166162306a36Sopenharmony_ci * called under mdsc->mutex 166262306a36Sopenharmony_ci */ 166362306a36Sopenharmony_cistatic struct ceph_mds_session * 166462306a36Sopenharmony_ci__open_export_target_session(struct ceph_mds_client *mdsc, int target) 166562306a36Sopenharmony_ci{ 166662306a36Sopenharmony_ci struct ceph_mds_session *session; 166762306a36Sopenharmony_ci int ret; 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, target); 167062306a36Sopenharmony_ci if (!session) { 167162306a36Sopenharmony_ci session = register_session(mdsc, target); 167262306a36Sopenharmony_ci if (IS_ERR(session)) 167362306a36Sopenharmony_ci return session; 167462306a36Sopenharmony_ci } 167562306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_NEW || 167662306a36Sopenharmony_ci session->s_state == CEPH_MDS_SESSION_CLOSING) { 167762306a36Sopenharmony_ci ret = __open_session(mdsc, session); 167862306a36Sopenharmony_ci if (ret) 167962306a36Sopenharmony_ci return ERR_PTR(ret); 168062306a36Sopenharmony_ci } 168162306a36Sopenharmony_ci 168262306a36Sopenharmony_ci return session; 168362306a36Sopenharmony_ci} 168462306a36Sopenharmony_ci 168562306a36Sopenharmony_cistruct ceph_mds_session * 168662306a36Sopenharmony_ciceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target) 168762306a36Sopenharmony_ci{ 168862306a36Sopenharmony_ci struct ceph_mds_session *session; 168962306a36Sopenharmony_ci 169062306a36Sopenharmony_ci dout("open_export_target_session to mds%d\n", target); 169162306a36Sopenharmony_ci 169262306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 169362306a36Sopenharmony_ci session = __open_export_target_session(mdsc, target); 169462306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci return session; 169762306a36Sopenharmony_ci} 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_cistatic void __open_export_target_sessions(struct ceph_mds_client *mdsc, 170062306a36Sopenharmony_ci struct ceph_mds_session *session) 170162306a36Sopenharmony_ci{ 170262306a36Sopenharmony_ci struct ceph_mds_info *mi; 170362306a36Sopenharmony_ci struct ceph_mds_session *ts; 170462306a36Sopenharmony_ci int i, mds = session->s_mds; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci if (mds >= mdsc->mdsmap->possible_max_rank) 170762306a36Sopenharmony_ci return; 170862306a36Sopenharmony_ci 170962306a36Sopenharmony_ci mi = &mdsc->mdsmap->m_info[mds]; 171062306a36Sopenharmony_ci dout("open_export_target_sessions for mds%d (%d targets)\n", 171162306a36Sopenharmony_ci session->s_mds, mi->num_export_targets); 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci for (i = 0; i < mi->num_export_targets; i++) { 171462306a36Sopenharmony_ci ts = __open_export_target_session(mdsc, mi->export_targets[i]); 171562306a36Sopenharmony_ci ceph_put_mds_session(ts); 171662306a36Sopenharmony_ci } 171762306a36Sopenharmony_ci} 171862306a36Sopenharmony_ci 171962306a36Sopenharmony_civoid ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, 172062306a36Sopenharmony_ci struct ceph_mds_session *session) 172162306a36Sopenharmony_ci{ 172262306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 172362306a36Sopenharmony_ci __open_export_target_sessions(mdsc, session); 172462306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 172562306a36Sopenharmony_ci} 172662306a36Sopenharmony_ci 172762306a36Sopenharmony_ci/* 172862306a36Sopenharmony_ci * session caps 172962306a36Sopenharmony_ci */ 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_cistatic void detach_cap_releases(struct ceph_mds_session *session, 173262306a36Sopenharmony_ci struct list_head *target) 173362306a36Sopenharmony_ci{ 173462306a36Sopenharmony_ci lockdep_assert_held(&session->s_cap_lock); 173562306a36Sopenharmony_ci 173662306a36Sopenharmony_ci list_splice_init(&session->s_cap_releases, target); 173762306a36Sopenharmony_ci session->s_num_cap_releases = 0; 173862306a36Sopenharmony_ci dout("dispose_cap_releases mds%d\n", session->s_mds); 173962306a36Sopenharmony_ci} 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_cistatic void dispose_cap_releases(struct ceph_mds_client *mdsc, 174262306a36Sopenharmony_ci struct list_head *dispose) 174362306a36Sopenharmony_ci{ 174462306a36Sopenharmony_ci while (!list_empty(dispose)) { 174562306a36Sopenharmony_ci struct ceph_cap *cap; 174662306a36Sopenharmony_ci /* zero out the in-progress message */ 174762306a36Sopenharmony_ci cap = list_first_entry(dispose, struct ceph_cap, session_caps); 174862306a36Sopenharmony_ci list_del(&cap->session_caps); 174962306a36Sopenharmony_ci ceph_put_cap(mdsc, cap); 175062306a36Sopenharmony_ci } 175162306a36Sopenharmony_ci} 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_cistatic void cleanup_session_requests(struct ceph_mds_client *mdsc, 175462306a36Sopenharmony_ci struct ceph_mds_session *session) 175562306a36Sopenharmony_ci{ 175662306a36Sopenharmony_ci struct ceph_mds_request *req; 175762306a36Sopenharmony_ci struct rb_node *p; 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci dout("cleanup_session_requests mds%d\n", session->s_mds); 176062306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 176162306a36Sopenharmony_ci while (!list_empty(&session->s_unsafe)) { 176262306a36Sopenharmony_ci req = list_first_entry(&session->s_unsafe, 176362306a36Sopenharmony_ci struct ceph_mds_request, r_unsafe_item); 176462306a36Sopenharmony_ci pr_warn_ratelimited(" dropping unsafe request %llu\n", 176562306a36Sopenharmony_ci req->r_tid); 176662306a36Sopenharmony_ci if (req->r_target_inode) 176762306a36Sopenharmony_ci mapping_set_error(req->r_target_inode->i_mapping, -EIO); 176862306a36Sopenharmony_ci if (req->r_unsafe_dir) 176962306a36Sopenharmony_ci mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO); 177062306a36Sopenharmony_ci __unregister_request(mdsc, req); 177162306a36Sopenharmony_ci } 177262306a36Sopenharmony_ci /* zero r_attempts, so kick_requests() will re-send requests */ 177362306a36Sopenharmony_ci p = rb_first(&mdsc->request_tree); 177462306a36Sopenharmony_ci while (p) { 177562306a36Sopenharmony_ci req = rb_entry(p, struct ceph_mds_request, r_node); 177662306a36Sopenharmony_ci p = rb_next(p); 177762306a36Sopenharmony_ci if (req->r_session && 177862306a36Sopenharmony_ci req->r_session->s_mds == session->s_mds) 177962306a36Sopenharmony_ci req->r_attempts = 0; 178062306a36Sopenharmony_ci } 178162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 178262306a36Sopenharmony_ci} 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci/* 178562306a36Sopenharmony_ci * Helper to safely iterate over all caps associated with a session, with 178662306a36Sopenharmony_ci * special care taken to handle a racing __ceph_remove_cap(). 178762306a36Sopenharmony_ci * 178862306a36Sopenharmony_ci * Caller must hold session s_mutex. 178962306a36Sopenharmony_ci */ 179062306a36Sopenharmony_ciint ceph_iterate_session_caps(struct ceph_mds_session *session, 179162306a36Sopenharmony_ci int (*cb)(struct inode *, int mds, void *), 179262306a36Sopenharmony_ci void *arg) 179362306a36Sopenharmony_ci{ 179462306a36Sopenharmony_ci struct list_head *p; 179562306a36Sopenharmony_ci struct ceph_cap *cap; 179662306a36Sopenharmony_ci struct inode *inode, *last_inode = NULL; 179762306a36Sopenharmony_ci struct ceph_cap *old_cap = NULL; 179862306a36Sopenharmony_ci int ret; 179962306a36Sopenharmony_ci 180062306a36Sopenharmony_ci dout("iterate_session_caps %p mds%d\n", session, session->s_mds); 180162306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 180262306a36Sopenharmony_ci p = session->s_caps.next; 180362306a36Sopenharmony_ci while (p != &session->s_caps) { 180462306a36Sopenharmony_ci int mds; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci cap = list_entry(p, struct ceph_cap, session_caps); 180762306a36Sopenharmony_ci inode = igrab(&cap->ci->netfs.inode); 180862306a36Sopenharmony_ci if (!inode) { 180962306a36Sopenharmony_ci p = p->next; 181062306a36Sopenharmony_ci continue; 181162306a36Sopenharmony_ci } 181262306a36Sopenharmony_ci session->s_cap_iterator = cap; 181362306a36Sopenharmony_ci mds = cap->mds; 181462306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 181562306a36Sopenharmony_ci 181662306a36Sopenharmony_ci if (last_inode) { 181762306a36Sopenharmony_ci iput(last_inode); 181862306a36Sopenharmony_ci last_inode = NULL; 181962306a36Sopenharmony_ci } 182062306a36Sopenharmony_ci if (old_cap) { 182162306a36Sopenharmony_ci ceph_put_cap(session->s_mdsc, old_cap); 182262306a36Sopenharmony_ci old_cap = NULL; 182362306a36Sopenharmony_ci } 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_ci ret = cb(inode, mds, arg); 182662306a36Sopenharmony_ci last_inode = inode; 182762306a36Sopenharmony_ci 182862306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 182962306a36Sopenharmony_ci p = p->next; 183062306a36Sopenharmony_ci if (!cap->ci) { 183162306a36Sopenharmony_ci dout("iterate_session_caps finishing cap %p removal\n", 183262306a36Sopenharmony_ci cap); 183362306a36Sopenharmony_ci BUG_ON(cap->session != session); 183462306a36Sopenharmony_ci cap->session = NULL; 183562306a36Sopenharmony_ci list_del_init(&cap->session_caps); 183662306a36Sopenharmony_ci session->s_nr_caps--; 183762306a36Sopenharmony_ci atomic64_dec(&session->s_mdsc->metric.total_caps); 183862306a36Sopenharmony_ci if (cap->queue_release) 183962306a36Sopenharmony_ci __ceph_queue_cap_release(session, cap); 184062306a36Sopenharmony_ci else 184162306a36Sopenharmony_ci old_cap = cap; /* put_cap it w/o locks held */ 184262306a36Sopenharmony_ci } 184362306a36Sopenharmony_ci if (ret < 0) 184462306a36Sopenharmony_ci goto out; 184562306a36Sopenharmony_ci } 184662306a36Sopenharmony_ci ret = 0; 184762306a36Sopenharmony_ciout: 184862306a36Sopenharmony_ci session->s_cap_iterator = NULL; 184962306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci iput(last_inode); 185262306a36Sopenharmony_ci if (old_cap) 185362306a36Sopenharmony_ci ceph_put_cap(session->s_mdsc, old_cap); 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci return ret; 185662306a36Sopenharmony_ci} 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_cistatic int remove_session_caps_cb(struct inode *inode, int mds, void *arg) 185962306a36Sopenharmony_ci{ 186062306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 186162306a36Sopenharmony_ci bool invalidate = false; 186262306a36Sopenharmony_ci struct ceph_cap *cap; 186362306a36Sopenharmony_ci int iputs = 0; 186462306a36Sopenharmony_ci 186562306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 186662306a36Sopenharmony_ci cap = __get_cap_for_mds(ci, mds); 186762306a36Sopenharmony_ci if (cap) { 186862306a36Sopenharmony_ci dout(" removing cap %p, ci is %p, inode is %p\n", 186962306a36Sopenharmony_ci cap, ci, &ci->netfs.inode); 187062306a36Sopenharmony_ci 187162306a36Sopenharmony_ci iputs = ceph_purge_inode_cap(inode, cap, &invalidate); 187262306a36Sopenharmony_ci } 187362306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci if (cap) 187662306a36Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 187762306a36Sopenharmony_ci if (invalidate) 187862306a36Sopenharmony_ci ceph_queue_invalidate(inode); 187962306a36Sopenharmony_ci while (iputs--) 188062306a36Sopenharmony_ci iput(inode); 188162306a36Sopenharmony_ci return 0; 188262306a36Sopenharmony_ci} 188362306a36Sopenharmony_ci 188462306a36Sopenharmony_ci/* 188562306a36Sopenharmony_ci * caller must hold session s_mutex 188662306a36Sopenharmony_ci */ 188762306a36Sopenharmony_cistatic void remove_session_caps(struct ceph_mds_session *session) 188862306a36Sopenharmony_ci{ 188962306a36Sopenharmony_ci struct ceph_fs_client *fsc = session->s_mdsc->fsc; 189062306a36Sopenharmony_ci struct super_block *sb = fsc->sb; 189162306a36Sopenharmony_ci LIST_HEAD(dispose); 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci dout("remove_session_caps on %p\n", session); 189462306a36Sopenharmony_ci ceph_iterate_session_caps(session, remove_session_caps_cb, fsc); 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci wake_up_all(&fsc->mdsc->cap_flushing_wq); 189762306a36Sopenharmony_ci 189862306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 189962306a36Sopenharmony_ci if (session->s_nr_caps > 0) { 190062306a36Sopenharmony_ci struct inode *inode; 190162306a36Sopenharmony_ci struct ceph_cap *cap, *prev = NULL; 190262306a36Sopenharmony_ci struct ceph_vino vino; 190362306a36Sopenharmony_ci /* 190462306a36Sopenharmony_ci * iterate_session_caps() skips inodes that are being 190562306a36Sopenharmony_ci * deleted, we need to wait until deletions are complete. 190662306a36Sopenharmony_ci * __wait_on_freeing_inode() is designed for the job, 190762306a36Sopenharmony_ci * but it is not exported, so use lookup inode function 190862306a36Sopenharmony_ci * to access it. 190962306a36Sopenharmony_ci */ 191062306a36Sopenharmony_ci while (!list_empty(&session->s_caps)) { 191162306a36Sopenharmony_ci cap = list_entry(session->s_caps.next, 191262306a36Sopenharmony_ci struct ceph_cap, session_caps); 191362306a36Sopenharmony_ci if (cap == prev) 191462306a36Sopenharmony_ci break; 191562306a36Sopenharmony_ci prev = cap; 191662306a36Sopenharmony_ci vino = cap->ci->i_vino; 191762306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci inode = ceph_find_inode(sb, vino); 192062306a36Sopenharmony_ci iput(inode); 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 192362306a36Sopenharmony_ci } 192462306a36Sopenharmony_ci } 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci // drop cap expires and unlock s_cap_lock 192762306a36Sopenharmony_ci detach_cap_releases(session, &dispose); 192862306a36Sopenharmony_ci 192962306a36Sopenharmony_ci BUG_ON(session->s_nr_caps > 0); 193062306a36Sopenharmony_ci BUG_ON(!list_empty(&session->s_cap_flushing)); 193162306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 193262306a36Sopenharmony_ci dispose_cap_releases(session->s_mdsc, &dispose); 193362306a36Sopenharmony_ci} 193462306a36Sopenharmony_ci 193562306a36Sopenharmony_cienum { 193662306a36Sopenharmony_ci RECONNECT, 193762306a36Sopenharmony_ci RENEWCAPS, 193862306a36Sopenharmony_ci FORCE_RO, 193962306a36Sopenharmony_ci}; 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci/* 194262306a36Sopenharmony_ci * wake up any threads waiting on this session's caps. if the cap is 194362306a36Sopenharmony_ci * old (didn't get renewed on the client reconnect), remove it now. 194462306a36Sopenharmony_ci * 194562306a36Sopenharmony_ci * caller must hold s_mutex. 194662306a36Sopenharmony_ci */ 194762306a36Sopenharmony_cistatic int wake_up_session_cb(struct inode *inode, int mds, void *arg) 194862306a36Sopenharmony_ci{ 194962306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 195062306a36Sopenharmony_ci unsigned long ev = (unsigned long)arg; 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci if (ev == RECONNECT) { 195362306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 195462306a36Sopenharmony_ci ci->i_wanted_max_size = 0; 195562306a36Sopenharmony_ci ci->i_requested_max_size = 0; 195662306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 195762306a36Sopenharmony_ci } else if (ev == RENEWCAPS) { 195862306a36Sopenharmony_ci struct ceph_cap *cap; 195962306a36Sopenharmony_ci 196062306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 196162306a36Sopenharmony_ci cap = __get_cap_for_mds(ci, mds); 196262306a36Sopenharmony_ci /* mds did not re-issue stale cap */ 196362306a36Sopenharmony_ci if (cap && cap->cap_gen < atomic_read(&cap->session->s_cap_gen)) 196462306a36Sopenharmony_ci cap->issued = cap->implemented = CEPH_CAP_PIN; 196562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 196662306a36Sopenharmony_ci } else if (ev == FORCE_RO) { 196762306a36Sopenharmony_ci } 196862306a36Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 196962306a36Sopenharmony_ci return 0; 197062306a36Sopenharmony_ci} 197162306a36Sopenharmony_ci 197262306a36Sopenharmony_cistatic void wake_up_session_caps(struct ceph_mds_session *session, int ev) 197362306a36Sopenharmony_ci{ 197462306a36Sopenharmony_ci dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); 197562306a36Sopenharmony_ci ceph_iterate_session_caps(session, wake_up_session_cb, 197662306a36Sopenharmony_ci (void *)(unsigned long)ev); 197762306a36Sopenharmony_ci} 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_ci/* 198062306a36Sopenharmony_ci * Send periodic message to MDS renewing all currently held caps. The 198162306a36Sopenharmony_ci * ack will reset the expiration for all caps from this session. 198262306a36Sopenharmony_ci * 198362306a36Sopenharmony_ci * caller holds s_mutex 198462306a36Sopenharmony_ci */ 198562306a36Sopenharmony_cistatic int send_renew_caps(struct ceph_mds_client *mdsc, 198662306a36Sopenharmony_ci struct ceph_mds_session *session) 198762306a36Sopenharmony_ci{ 198862306a36Sopenharmony_ci struct ceph_msg *msg; 198962306a36Sopenharmony_ci int state; 199062306a36Sopenharmony_ci 199162306a36Sopenharmony_ci if (time_after_eq(jiffies, session->s_cap_ttl) && 199262306a36Sopenharmony_ci time_after_eq(session->s_cap_ttl, session->s_renew_requested)) 199362306a36Sopenharmony_ci pr_info("mds%d caps stale\n", session->s_mds); 199462306a36Sopenharmony_ci session->s_renew_requested = jiffies; 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci /* do not try to renew caps until a recovering mds has reconnected 199762306a36Sopenharmony_ci * with its clients. */ 199862306a36Sopenharmony_ci state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds); 199962306a36Sopenharmony_ci if (state < CEPH_MDS_STATE_RECONNECT) { 200062306a36Sopenharmony_ci dout("send_renew_caps ignoring mds%d (%s)\n", 200162306a36Sopenharmony_ci session->s_mds, ceph_mds_state_name(state)); 200262306a36Sopenharmony_ci return 0; 200362306a36Sopenharmony_ci } 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci dout("send_renew_caps to mds%d (%s)\n", session->s_mds, 200662306a36Sopenharmony_ci ceph_mds_state_name(state)); 200762306a36Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 200862306a36Sopenharmony_ci ++session->s_renew_seq); 200962306a36Sopenharmony_ci if (!msg) 201062306a36Sopenharmony_ci return -ENOMEM; 201162306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 201262306a36Sopenharmony_ci return 0; 201362306a36Sopenharmony_ci} 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_cistatic int send_flushmsg_ack(struct ceph_mds_client *mdsc, 201662306a36Sopenharmony_ci struct ceph_mds_session *session, u64 seq) 201762306a36Sopenharmony_ci{ 201862306a36Sopenharmony_ci struct ceph_msg *msg; 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", 202162306a36Sopenharmony_ci session->s_mds, ceph_session_state_name(session->s_state), seq); 202262306a36Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); 202362306a36Sopenharmony_ci if (!msg) 202462306a36Sopenharmony_ci return -ENOMEM; 202562306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 202662306a36Sopenharmony_ci return 0; 202762306a36Sopenharmony_ci} 202862306a36Sopenharmony_ci 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_ci/* 203162306a36Sopenharmony_ci * Note new cap ttl, and any transition from stale -> not stale (fresh?). 203262306a36Sopenharmony_ci * 203362306a36Sopenharmony_ci * Called under session->s_mutex 203462306a36Sopenharmony_ci */ 203562306a36Sopenharmony_cistatic void renewed_caps(struct ceph_mds_client *mdsc, 203662306a36Sopenharmony_ci struct ceph_mds_session *session, int is_renew) 203762306a36Sopenharmony_ci{ 203862306a36Sopenharmony_ci int was_stale; 203962306a36Sopenharmony_ci int wake = 0; 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 204262306a36Sopenharmony_ci was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl); 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci session->s_cap_ttl = session->s_renew_requested + 204562306a36Sopenharmony_ci mdsc->mdsmap->m_session_timeout*HZ; 204662306a36Sopenharmony_ci 204762306a36Sopenharmony_ci if (was_stale) { 204862306a36Sopenharmony_ci if (time_before(jiffies, session->s_cap_ttl)) { 204962306a36Sopenharmony_ci pr_info("mds%d caps renewed\n", session->s_mds); 205062306a36Sopenharmony_ci wake = 1; 205162306a36Sopenharmony_ci } else { 205262306a36Sopenharmony_ci pr_info("mds%d caps still stale\n", session->s_mds); 205362306a36Sopenharmony_ci } 205462306a36Sopenharmony_ci } 205562306a36Sopenharmony_ci dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n", 205662306a36Sopenharmony_ci session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh", 205762306a36Sopenharmony_ci time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh"); 205862306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci if (wake) 206162306a36Sopenharmony_ci wake_up_session_caps(session, RENEWCAPS); 206262306a36Sopenharmony_ci} 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci/* 206562306a36Sopenharmony_ci * send a session close request 206662306a36Sopenharmony_ci */ 206762306a36Sopenharmony_cistatic int request_close_session(struct ceph_mds_session *session) 206862306a36Sopenharmony_ci{ 206962306a36Sopenharmony_ci struct ceph_msg *msg; 207062306a36Sopenharmony_ci 207162306a36Sopenharmony_ci dout("request_close_session mds%d state %s seq %lld\n", 207262306a36Sopenharmony_ci session->s_mds, ceph_session_state_name(session->s_state), 207362306a36Sopenharmony_ci session->s_seq); 207462306a36Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE, 207562306a36Sopenharmony_ci session->s_seq); 207662306a36Sopenharmony_ci if (!msg) 207762306a36Sopenharmony_ci return -ENOMEM; 207862306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 207962306a36Sopenharmony_ci return 1; 208062306a36Sopenharmony_ci} 208162306a36Sopenharmony_ci 208262306a36Sopenharmony_ci/* 208362306a36Sopenharmony_ci * Called with s_mutex held. 208462306a36Sopenharmony_ci */ 208562306a36Sopenharmony_cistatic int __close_session(struct ceph_mds_client *mdsc, 208662306a36Sopenharmony_ci struct ceph_mds_session *session) 208762306a36Sopenharmony_ci{ 208862306a36Sopenharmony_ci if (session->s_state >= CEPH_MDS_SESSION_CLOSING) 208962306a36Sopenharmony_ci return 0; 209062306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_CLOSING; 209162306a36Sopenharmony_ci return request_close_session(session); 209262306a36Sopenharmony_ci} 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_cistatic bool drop_negative_children(struct dentry *dentry) 209562306a36Sopenharmony_ci{ 209662306a36Sopenharmony_ci struct dentry *child; 209762306a36Sopenharmony_ci bool all_negative = true; 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_ci if (!d_is_dir(dentry)) 210062306a36Sopenharmony_ci goto out; 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci spin_lock(&dentry->d_lock); 210362306a36Sopenharmony_ci list_for_each_entry(child, &dentry->d_subdirs, d_child) { 210462306a36Sopenharmony_ci if (d_really_is_positive(child)) { 210562306a36Sopenharmony_ci all_negative = false; 210662306a36Sopenharmony_ci break; 210762306a36Sopenharmony_ci } 210862306a36Sopenharmony_ci } 210962306a36Sopenharmony_ci spin_unlock(&dentry->d_lock); 211062306a36Sopenharmony_ci 211162306a36Sopenharmony_ci if (all_negative) 211262306a36Sopenharmony_ci shrink_dcache_parent(dentry); 211362306a36Sopenharmony_ciout: 211462306a36Sopenharmony_ci return all_negative; 211562306a36Sopenharmony_ci} 211662306a36Sopenharmony_ci 211762306a36Sopenharmony_ci/* 211862306a36Sopenharmony_ci * Trim old(er) caps. 211962306a36Sopenharmony_ci * 212062306a36Sopenharmony_ci * Because we can't cache an inode without one or more caps, we do 212162306a36Sopenharmony_ci * this indirectly: if a cap is unused, we prune its aliases, at which 212262306a36Sopenharmony_ci * point the inode will hopefully get dropped to. 212362306a36Sopenharmony_ci * 212462306a36Sopenharmony_ci * Yes, this is a bit sloppy. Our only real goal here is to respond to 212562306a36Sopenharmony_ci * memory pressure from the MDS, though, so it needn't be perfect. 212662306a36Sopenharmony_ci */ 212762306a36Sopenharmony_cistatic int trim_caps_cb(struct inode *inode, int mds, void *arg) 212862306a36Sopenharmony_ci{ 212962306a36Sopenharmony_ci int *remaining = arg; 213062306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 213162306a36Sopenharmony_ci int used, wanted, oissued, mine; 213262306a36Sopenharmony_ci struct ceph_cap *cap; 213362306a36Sopenharmony_ci 213462306a36Sopenharmony_ci if (*remaining <= 0) 213562306a36Sopenharmony_ci return -1; 213662306a36Sopenharmony_ci 213762306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 213862306a36Sopenharmony_ci cap = __get_cap_for_mds(ci, mds); 213962306a36Sopenharmony_ci if (!cap) { 214062306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 214162306a36Sopenharmony_ci return 0; 214262306a36Sopenharmony_ci } 214362306a36Sopenharmony_ci mine = cap->issued | cap->implemented; 214462306a36Sopenharmony_ci used = __ceph_caps_used(ci); 214562306a36Sopenharmony_ci wanted = __ceph_caps_file_wanted(ci); 214662306a36Sopenharmony_ci oissued = __ceph_caps_issued_other(ci, cap); 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n", 214962306a36Sopenharmony_ci inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), 215062306a36Sopenharmony_ci ceph_cap_string(used), ceph_cap_string(wanted)); 215162306a36Sopenharmony_ci if (cap == ci->i_auth_cap) { 215262306a36Sopenharmony_ci if (ci->i_dirty_caps || ci->i_flushing_caps || 215362306a36Sopenharmony_ci !list_empty(&ci->i_cap_snaps)) 215462306a36Sopenharmony_ci goto out; 215562306a36Sopenharmony_ci if ((used | wanted) & CEPH_CAP_ANY_WR) 215662306a36Sopenharmony_ci goto out; 215762306a36Sopenharmony_ci /* Note: it's possible that i_filelock_ref becomes non-zero 215862306a36Sopenharmony_ci * after dropping auth caps. It doesn't hurt because reply 215962306a36Sopenharmony_ci * of lock mds request will re-add auth caps. */ 216062306a36Sopenharmony_ci if (atomic_read(&ci->i_filelock_ref) > 0) 216162306a36Sopenharmony_ci goto out; 216262306a36Sopenharmony_ci } 216362306a36Sopenharmony_ci /* The inode has cached pages, but it's no longer used. 216462306a36Sopenharmony_ci * we can safely drop it */ 216562306a36Sopenharmony_ci if (S_ISREG(inode->i_mode) && 216662306a36Sopenharmony_ci wanted == 0 && used == CEPH_CAP_FILE_CACHE && 216762306a36Sopenharmony_ci !(oissued & CEPH_CAP_FILE_CACHE)) { 216862306a36Sopenharmony_ci used = 0; 216962306a36Sopenharmony_ci oissued = 0; 217062306a36Sopenharmony_ci } 217162306a36Sopenharmony_ci if ((used | wanted) & ~oissued & mine) 217262306a36Sopenharmony_ci goto out; /* we need these caps */ 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci if (oissued) { 217562306a36Sopenharmony_ci /* we aren't the only cap.. just remove us */ 217662306a36Sopenharmony_ci ceph_remove_cap(cap, true); 217762306a36Sopenharmony_ci (*remaining)--; 217862306a36Sopenharmony_ci } else { 217962306a36Sopenharmony_ci struct dentry *dentry; 218062306a36Sopenharmony_ci /* try dropping referring dentries */ 218162306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 218262306a36Sopenharmony_ci dentry = d_find_any_alias(inode); 218362306a36Sopenharmony_ci if (dentry && drop_negative_children(dentry)) { 218462306a36Sopenharmony_ci int count; 218562306a36Sopenharmony_ci dput(dentry); 218662306a36Sopenharmony_ci d_prune_aliases(inode); 218762306a36Sopenharmony_ci count = atomic_read(&inode->i_count); 218862306a36Sopenharmony_ci if (count == 1) 218962306a36Sopenharmony_ci (*remaining)--; 219062306a36Sopenharmony_ci dout("trim_caps_cb %p cap %p pruned, count now %d\n", 219162306a36Sopenharmony_ci inode, cap, count); 219262306a36Sopenharmony_ci } else { 219362306a36Sopenharmony_ci dput(dentry); 219462306a36Sopenharmony_ci } 219562306a36Sopenharmony_ci return 0; 219662306a36Sopenharmony_ci } 219762306a36Sopenharmony_ci 219862306a36Sopenharmony_ciout: 219962306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 220062306a36Sopenharmony_ci return 0; 220162306a36Sopenharmony_ci} 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_ci/* 220462306a36Sopenharmony_ci * Trim session cap count down to some max number. 220562306a36Sopenharmony_ci */ 220662306a36Sopenharmony_ciint ceph_trim_caps(struct ceph_mds_client *mdsc, 220762306a36Sopenharmony_ci struct ceph_mds_session *session, 220862306a36Sopenharmony_ci int max_caps) 220962306a36Sopenharmony_ci{ 221062306a36Sopenharmony_ci int trim_caps = session->s_nr_caps - max_caps; 221162306a36Sopenharmony_ci 221262306a36Sopenharmony_ci dout("trim_caps mds%d start: %d / %d, trim %d\n", 221362306a36Sopenharmony_ci session->s_mds, session->s_nr_caps, max_caps, trim_caps); 221462306a36Sopenharmony_ci if (trim_caps > 0) { 221562306a36Sopenharmony_ci int remaining = trim_caps; 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_ci ceph_iterate_session_caps(session, trim_caps_cb, &remaining); 221862306a36Sopenharmony_ci dout("trim_caps mds%d done: %d / %d, trimmed %d\n", 221962306a36Sopenharmony_ci session->s_mds, session->s_nr_caps, max_caps, 222062306a36Sopenharmony_ci trim_caps - remaining); 222162306a36Sopenharmony_ci } 222262306a36Sopenharmony_ci 222362306a36Sopenharmony_ci ceph_flush_cap_releases(mdsc, session); 222462306a36Sopenharmony_ci return 0; 222562306a36Sopenharmony_ci} 222662306a36Sopenharmony_ci 222762306a36Sopenharmony_cistatic int check_caps_flush(struct ceph_mds_client *mdsc, 222862306a36Sopenharmony_ci u64 want_flush_tid) 222962306a36Sopenharmony_ci{ 223062306a36Sopenharmony_ci int ret = 1; 223162306a36Sopenharmony_ci 223262306a36Sopenharmony_ci spin_lock(&mdsc->cap_dirty_lock); 223362306a36Sopenharmony_ci if (!list_empty(&mdsc->cap_flush_list)) { 223462306a36Sopenharmony_ci struct ceph_cap_flush *cf = 223562306a36Sopenharmony_ci list_first_entry(&mdsc->cap_flush_list, 223662306a36Sopenharmony_ci struct ceph_cap_flush, g_list); 223762306a36Sopenharmony_ci if (cf->tid <= want_flush_tid) { 223862306a36Sopenharmony_ci dout("check_caps_flush still flushing tid " 223962306a36Sopenharmony_ci "%llu <= %llu\n", cf->tid, want_flush_tid); 224062306a36Sopenharmony_ci ret = 0; 224162306a36Sopenharmony_ci } 224262306a36Sopenharmony_ci } 224362306a36Sopenharmony_ci spin_unlock(&mdsc->cap_dirty_lock); 224462306a36Sopenharmony_ci return ret; 224562306a36Sopenharmony_ci} 224662306a36Sopenharmony_ci 224762306a36Sopenharmony_ci/* 224862306a36Sopenharmony_ci * flush all dirty inode data to disk. 224962306a36Sopenharmony_ci * 225062306a36Sopenharmony_ci * returns true if we've flushed through want_flush_tid 225162306a36Sopenharmony_ci */ 225262306a36Sopenharmony_cistatic void wait_caps_flush(struct ceph_mds_client *mdsc, 225362306a36Sopenharmony_ci u64 want_flush_tid) 225462306a36Sopenharmony_ci{ 225562306a36Sopenharmony_ci dout("check_caps_flush want %llu\n", want_flush_tid); 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_ci wait_event(mdsc->cap_flushing_wq, 225862306a36Sopenharmony_ci check_caps_flush(mdsc, want_flush_tid)); 225962306a36Sopenharmony_ci 226062306a36Sopenharmony_ci dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); 226162306a36Sopenharmony_ci} 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci/* 226462306a36Sopenharmony_ci * called under s_mutex 226562306a36Sopenharmony_ci */ 226662306a36Sopenharmony_cistatic void ceph_send_cap_releases(struct ceph_mds_client *mdsc, 226762306a36Sopenharmony_ci struct ceph_mds_session *session) 226862306a36Sopenharmony_ci{ 226962306a36Sopenharmony_ci struct ceph_msg *msg = NULL; 227062306a36Sopenharmony_ci struct ceph_mds_cap_release *head; 227162306a36Sopenharmony_ci struct ceph_mds_cap_item *item; 227262306a36Sopenharmony_ci struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; 227362306a36Sopenharmony_ci struct ceph_cap *cap; 227462306a36Sopenharmony_ci LIST_HEAD(tmp_list); 227562306a36Sopenharmony_ci int num_cap_releases; 227662306a36Sopenharmony_ci __le32 barrier, *cap_barrier; 227762306a36Sopenharmony_ci 227862306a36Sopenharmony_ci down_read(&osdc->lock); 227962306a36Sopenharmony_ci barrier = cpu_to_le32(osdc->epoch_barrier); 228062306a36Sopenharmony_ci up_read(&osdc->lock); 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 228362306a36Sopenharmony_ciagain: 228462306a36Sopenharmony_ci list_splice_init(&session->s_cap_releases, &tmp_list); 228562306a36Sopenharmony_ci num_cap_releases = session->s_num_cap_releases; 228662306a36Sopenharmony_ci session->s_num_cap_releases = 0; 228762306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 228862306a36Sopenharmony_ci 228962306a36Sopenharmony_ci while (!list_empty(&tmp_list)) { 229062306a36Sopenharmony_ci if (!msg) { 229162306a36Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, 229262306a36Sopenharmony_ci PAGE_SIZE, GFP_NOFS, false); 229362306a36Sopenharmony_ci if (!msg) 229462306a36Sopenharmony_ci goto out_err; 229562306a36Sopenharmony_ci head = msg->front.iov_base; 229662306a36Sopenharmony_ci head->num = cpu_to_le32(0); 229762306a36Sopenharmony_ci msg->front.iov_len = sizeof(*head); 229862306a36Sopenharmony_ci 229962306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(2); 230062306a36Sopenharmony_ci msg->hdr.compat_version = cpu_to_le16(1); 230162306a36Sopenharmony_ci } 230262306a36Sopenharmony_ci 230362306a36Sopenharmony_ci cap = list_first_entry(&tmp_list, struct ceph_cap, 230462306a36Sopenharmony_ci session_caps); 230562306a36Sopenharmony_ci list_del(&cap->session_caps); 230662306a36Sopenharmony_ci num_cap_releases--; 230762306a36Sopenharmony_ci 230862306a36Sopenharmony_ci head = msg->front.iov_base; 230962306a36Sopenharmony_ci put_unaligned_le32(get_unaligned_le32(&head->num) + 1, 231062306a36Sopenharmony_ci &head->num); 231162306a36Sopenharmony_ci item = msg->front.iov_base + msg->front.iov_len; 231262306a36Sopenharmony_ci item->ino = cpu_to_le64(cap->cap_ino); 231362306a36Sopenharmony_ci item->cap_id = cpu_to_le64(cap->cap_id); 231462306a36Sopenharmony_ci item->migrate_seq = cpu_to_le32(cap->mseq); 231562306a36Sopenharmony_ci item->seq = cpu_to_le32(cap->issue_seq); 231662306a36Sopenharmony_ci msg->front.iov_len += sizeof(*item); 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci ceph_put_cap(mdsc, cap); 231962306a36Sopenharmony_ci 232062306a36Sopenharmony_ci if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { 232162306a36Sopenharmony_ci // Append cap_barrier field 232262306a36Sopenharmony_ci cap_barrier = msg->front.iov_base + msg->front.iov_len; 232362306a36Sopenharmony_ci *cap_barrier = barrier; 232462306a36Sopenharmony_ci msg->front.iov_len += sizeof(*cap_barrier); 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 232762306a36Sopenharmony_ci dout("send_cap_releases mds%d %p\n", session->s_mds, msg); 232862306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 232962306a36Sopenharmony_ci msg = NULL; 233062306a36Sopenharmony_ci } 233162306a36Sopenharmony_ci } 233262306a36Sopenharmony_ci 233362306a36Sopenharmony_ci BUG_ON(num_cap_releases != 0); 233462306a36Sopenharmony_ci 233562306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 233662306a36Sopenharmony_ci if (!list_empty(&session->s_cap_releases)) 233762306a36Sopenharmony_ci goto again; 233862306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci if (msg) { 234162306a36Sopenharmony_ci // Append cap_barrier field 234262306a36Sopenharmony_ci cap_barrier = msg->front.iov_base + msg->front.iov_len; 234362306a36Sopenharmony_ci *cap_barrier = barrier; 234462306a36Sopenharmony_ci msg->front.iov_len += sizeof(*cap_barrier); 234562306a36Sopenharmony_ci 234662306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 234762306a36Sopenharmony_ci dout("send_cap_releases mds%d %p\n", session->s_mds, msg); 234862306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 234962306a36Sopenharmony_ci } 235062306a36Sopenharmony_ci return; 235162306a36Sopenharmony_ciout_err: 235262306a36Sopenharmony_ci pr_err("send_cap_releases mds%d, failed to allocate message\n", 235362306a36Sopenharmony_ci session->s_mds); 235462306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 235562306a36Sopenharmony_ci list_splice(&tmp_list, &session->s_cap_releases); 235662306a36Sopenharmony_ci session->s_num_cap_releases += num_cap_releases; 235762306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 235862306a36Sopenharmony_ci} 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_cistatic void ceph_cap_release_work(struct work_struct *work) 236162306a36Sopenharmony_ci{ 236262306a36Sopenharmony_ci struct ceph_mds_session *session = 236362306a36Sopenharmony_ci container_of(work, struct ceph_mds_session, s_cap_release_work); 236462306a36Sopenharmony_ci 236562306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 236662306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_OPEN || 236762306a36Sopenharmony_ci session->s_state == CEPH_MDS_SESSION_HUNG) 236862306a36Sopenharmony_ci ceph_send_cap_releases(session->s_mdsc, session); 236962306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 237062306a36Sopenharmony_ci ceph_put_mds_session(session); 237162306a36Sopenharmony_ci} 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_civoid ceph_flush_cap_releases(struct ceph_mds_client *mdsc, 237462306a36Sopenharmony_ci struct ceph_mds_session *session) 237562306a36Sopenharmony_ci{ 237662306a36Sopenharmony_ci if (mdsc->stopping) 237762306a36Sopenharmony_ci return; 237862306a36Sopenharmony_ci 237962306a36Sopenharmony_ci ceph_get_mds_session(session); 238062306a36Sopenharmony_ci if (queue_work(mdsc->fsc->cap_wq, 238162306a36Sopenharmony_ci &session->s_cap_release_work)) { 238262306a36Sopenharmony_ci dout("cap release work queued\n"); 238362306a36Sopenharmony_ci } else { 238462306a36Sopenharmony_ci ceph_put_mds_session(session); 238562306a36Sopenharmony_ci dout("failed to queue cap release work\n"); 238662306a36Sopenharmony_ci } 238762306a36Sopenharmony_ci} 238862306a36Sopenharmony_ci 238962306a36Sopenharmony_ci/* 239062306a36Sopenharmony_ci * caller holds session->s_cap_lock 239162306a36Sopenharmony_ci */ 239262306a36Sopenharmony_civoid __ceph_queue_cap_release(struct ceph_mds_session *session, 239362306a36Sopenharmony_ci struct ceph_cap *cap) 239462306a36Sopenharmony_ci{ 239562306a36Sopenharmony_ci list_add_tail(&cap->session_caps, &session->s_cap_releases); 239662306a36Sopenharmony_ci session->s_num_cap_releases++; 239762306a36Sopenharmony_ci 239862306a36Sopenharmony_ci if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE)) 239962306a36Sopenharmony_ci ceph_flush_cap_releases(session->s_mdsc, session); 240062306a36Sopenharmony_ci} 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_cistatic void ceph_cap_reclaim_work(struct work_struct *work) 240362306a36Sopenharmony_ci{ 240462306a36Sopenharmony_ci struct ceph_mds_client *mdsc = 240562306a36Sopenharmony_ci container_of(work, struct ceph_mds_client, cap_reclaim_work); 240662306a36Sopenharmony_ci int ret = ceph_trim_dentries(mdsc); 240762306a36Sopenharmony_ci if (ret == -EAGAIN) 240862306a36Sopenharmony_ci ceph_queue_cap_reclaim_work(mdsc); 240962306a36Sopenharmony_ci} 241062306a36Sopenharmony_ci 241162306a36Sopenharmony_civoid ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc) 241262306a36Sopenharmony_ci{ 241362306a36Sopenharmony_ci if (mdsc->stopping) 241462306a36Sopenharmony_ci return; 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) { 241762306a36Sopenharmony_ci dout("caps reclaim work queued\n"); 241862306a36Sopenharmony_ci } else { 241962306a36Sopenharmony_ci dout("failed to queue caps release work\n"); 242062306a36Sopenharmony_ci } 242162306a36Sopenharmony_ci} 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_civoid ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr) 242462306a36Sopenharmony_ci{ 242562306a36Sopenharmony_ci int val; 242662306a36Sopenharmony_ci if (!nr) 242762306a36Sopenharmony_ci return; 242862306a36Sopenharmony_ci val = atomic_add_return(nr, &mdsc->cap_reclaim_pending); 242962306a36Sopenharmony_ci if ((val % CEPH_CAPS_PER_RELEASE) < nr) { 243062306a36Sopenharmony_ci atomic_set(&mdsc->cap_reclaim_pending, 0); 243162306a36Sopenharmony_ci ceph_queue_cap_reclaim_work(mdsc); 243262306a36Sopenharmony_ci } 243362306a36Sopenharmony_ci} 243462306a36Sopenharmony_ci 243562306a36Sopenharmony_ci/* 243662306a36Sopenharmony_ci * requests 243762306a36Sopenharmony_ci */ 243862306a36Sopenharmony_ci 243962306a36Sopenharmony_ciint ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, 244062306a36Sopenharmony_ci struct inode *dir) 244162306a36Sopenharmony_ci{ 244262306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(dir); 244362306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 244462306a36Sopenharmony_ci struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; 244562306a36Sopenharmony_ci size_t size = sizeof(struct ceph_mds_reply_dir_entry); 244662306a36Sopenharmony_ci unsigned int num_entries; 244762306a36Sopenharmony_ci int order; 244862306a36Sopenharmony_ci 244962306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 245062306a36Sopenharmony_ci num_entries = ci->i_files + ci->i_subdirs; 245162306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 245262306a36Sopenharmony_ci num_entries = max(num_entries, 1U); 245362306a36Sopenharmony_ci num_entries = min(num_entries, opt->max_readdir); 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci order = get_order(size * num_entries); 245662306a36Sopenharmony_ci while (order >= 0) { 245762306a36Sopenharmony_ci rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL | 245862306a36Sopenharmony_ci __GFP_NOWARN | 245962306a36Sopenharmony_ci __GFP_ZERO, 246062306a36Sopenharmony_ci order); 246162306a36Sopenharmony_ci if (rinfo->dir_entries) 246262306a36Sopenharmony_ci break; 246362306a36Sopenharmony_ci order--; 246462306a36Sopenharmony_ci } 246562306a36Sopenharmony_ci if (!rinfo->dir_entries) 246662306a36Sopenharmony_ci return -ENOMEM; 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci num_entries = (PAGE_SIZE << order) / size; 246962306a36Sopenharmony_ci num_entries = min(num_entries, opt->max_readdir); 247062306a36Sopenharmony_ci 247162306a36Sopenharmony_ci rinfo->dir_buf_size = PAGE_SIZE << order; 247262306a36Sopenharmony_ci req->r_num_caps = num_entries + 1; 247362306a36Sopenharmony_ci req->r_args.readdir.max_entries = cpu_to_le32(num_entries); 247462306a36Sopenharmony_ci req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes); 247562306a36Sopenharmony_ci return 0; 247662306a36Sopenharmony_ci} 247762306a36Sopenharmony_ci 247862306a36Sopenharmony_ci/* 247962306a36Sopenharmony_ci * Create an mds request. 248062306a36Sopenharmony_ci */ 248162306a36Sopenharmony_cistruct ceph_mds_request * 248262306a36Sopenharmony_ciceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) 248362306a36Sopenharmony_ci{ 248462306a36Sopenharmony_ci struct ceph_mds_request *req; 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci req = kmem_cache_zalloc(ceph_mds_request_cachep, GFP_NOFS); 248762306a36Sopenharmony_ci if (!req) 248862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_ci mutex_init(&req->r_fill_mutex); 249162306a36Sopenharmony_ci req->r_mdsc = mdsc; 249262306a36Sopenharmony_ci req->r_started = jiffies; 249362306a36Sopenharmony_ci req->r_start_latency = ktime_get(); 249462306a36Sopenharmony_ci req->r_resend_mds = -1; 249562306a36Sopenharmony_ci INIT_LIST_HEAD(&req->r_unsafe_dir_item); 249662306a36Sopenharmony_ci INIT_LIST_HEAD(&req->r_unsafe_target_item); 249762306a36Sopenharmony_ci req->r_fmode = -1; 249862306a36Sopenharmony_ci req->r_feature_needed = -1; 249962306a36Sopenharmony_ci kref_init(&req->r_kref); 250062306a36Sopenharmony_ci RB_CLEAR_NODE(&req->r_node); 250162306a36Sopenharmony_ci INIT_LIST_HEAD(&req->r_wait); 250262306a36Sopenharmony_ci init_completion(&req->r_completion); 250362306a36Sopenharmony_ci init_completion(&req->r_safe_completion); 250462306a36Sopenharmony_ci INIT_LIST_HEAD(&req->r_unsafe_item); 250562306a36Sopenharmony_ci 250662306a36Sopenharmony_ci ktime_get_coarse_real_ts64(&req->r_stamp); 250762306a36Sopenharmony_ci 250862306a36Sopenharmony_ci req->r_op = op; 250962306a36Sopenharmony_ci req->r_direct_mode = mode; 251062306a36Sopenharmony_ci return req; 251162306a36Sopenharmony_ci} 251262306a36Sopenharmony_ci 251362306a36Sopenharmony_ci/* 251462306a36Sopenharmony_ci * return oldest (lowest) request, tid in request tree, 0 if none. 251562306a36Sopenharmony_ci * 251662306a36Sopenharmony_ci * called under mdsc->mutex. 251762306a36Sopenharmony_ci */ 251862306a36Sopenharmony_cistatic struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc) 251962306a36Sopenharmony_ci{ 252062306a36Sopenharmony_ci if (RB_EMPTY_ROOT(&mdsc->request_tree)) 252162306a36Sopenharmony_ci return NULL; 252262306a36Sopenharmony_ci return rb_entry(rb_first(&mdsc->request_tree), 252362306a36Sopenharmony_ci struct ceph_mds_request, r_node); 252462306a36Sopenharmony_ci} 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_cistatic inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) 252762306a36Sopenharmony_ci{ 252862306a36Sopenharmony_ci return mdsc->oldest_tid; 252962306a36Sopenharmony_ci} 253062306a36Sopenharmony_ci 253162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_FS_ENCRYPTION) 253262306a36Sopenharmony_cistatic u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) 253362306a36Sopenharmony_ci{ 253462306a36Sopenharmony_ci struct inode *dir = req->r_parent; 253562306a36Sopenharmony_ci struct dentry *dentry = req->r_dentry; 253662306a36Sopenharmony_ci u8 *cryptbuf = NULL; 253762306a36Sopenharmony_ci u32 len = 0; 253862306a36Sopenharmony_ci int ret = 0; 253962306a36Sopenharmony_ci 254062306a36Sopenharmony_ci /* only encode if we have parent and dentry */ 254162306a36Sopenharmony_ci if (!dir || !dentry) 254262306a36Sopenharmony_ci goto success; 254362306a36Sopenharmony_ci 254462306a36Sopenharmony_ci /* No-op unless this is encrypted */ 254562306a36Sopenharmony_ci if (!IS_ENCRYPTED(dir)) 254662306a36Sopenharmony_ci goto success; 254762306a36Sopenharmony_ci 254862306a36Sopenharmony_ci ret = ceph_fscrypt_prepare_readdir(dir); 254962306a36Sopenharmony_ci if (ret < 0) 255062306a36Sopenharmony_ci return ERR_PTR(ret); 255162306a36Sopenharmony_ci 255262306a36Sopenharmony_ci /* No key? Just ignore it. */ 255362306a36Sopenharmony_ci if (!fscrypt_has_encryption_key(dir)) 255462306a36Sopenharmony_ci goto success; 255562306a36Sopenharmony_ci 255662306a36Sopenharmony_ci if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX, 255762306a36Sopenharmony_ci &len)) { 255862306a36Sopenharmony_ci WARN_ON_ONCE(1); 255962306a36Sopenharmony_ci return ERR_PTR(-ENAMETOOLONG); 256062306a36Sopenharmony_ci } 256162306a36Sopenharmony_ci 256262306a36Sopenharmony_ci /* No need to append altname if name is short enough */ 256362306a36Sopenharmony_ci if (len <= CEPH_NOHASH_NAME_MAX) { 256462306a36Sopenharmony_ci len = 0; 256562306a36Sopenharmony_ci goto success; 256662306a36Sopenharmony_ci } 256762306a36Sopenharmony_ci 256862306a36Sopenharmony_ci cryptbuf = kmalloc(len, GFP_KERNEL); 256962306a36Sopenharmony_ci if (!cryptbuf) 257062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 257162306a36Sopenharmony_ci 257262306a36Sopenharmony_ci ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len); 257362306a36Sopenharmony_ci if (ret) { 257462306a36Sopenharmony_ci kfree(cryptbuf); 257562306a36Sopenharmony_ci return ERR_PTR(ret); 257662306a36Sopenharmony_ci } 257762306a36Sopenharmony_cisuccess: 257862306a36Sopenharmony_ci *plen = len; 257962306a36Sopenharmony_ci return cryptbuf; 258062306a36Sopenharmony_ci} 258162306a36Sopenharmony_ci#else 258262306a36Sopenharmony_cistatic u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen) 258362306a36Sopenharmony_ci{ 258462306a36Sopenharmony_ci *plen = 0; 258562306a36Sopenharmony_ci return NULL; 258662306a36Sopenharmony_ci} 258762306a36Sopenharmony_ci#endif 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci/** 259062306a36Sopenharmony_ci * ceph_mdsc_build_path - build a path string to a given dentry 259162306a36Sopenharmony_ci * @dentry: dentry to which path should be built 259262306a36Sopenharmony_ci * @plen: returned length of string 259362306a36Sopenharmony_ci * @pbase: returned base inode number 259462306a36Sopenharmony_ci * @for_wire: is this path going to be sent to the MDS? 259562306a36Sopenharmony_ci * 259662306a36Sopenharmony_ci * Build a string that represents the path to the dentry. This is mostly called 259762306a36Sopenharmony_ci * for two different purposes: 259862306a36Sopenharmony_ci * 259962306a36Sopenharmony_ci * 1) we need to build a path string to send to the MDS (for_wire == true) 260062306a36Sopenharmony_ci * 2) we need a path string for local presentation (e.g. debugfs) 260162306a36Sopenharmony_ci * (for_wire == false) 260262306a36Sopenharmony_ci * 260362306a36Sopenharmony_ci * The path is built in reverse, starting with the dentry. Walk back up toward 260462306a36Sopenharmony_ci * the root, building the path until the first non-snapped inode is reached 260562306a36Sopenharmony_ci * (for_wire) or the root inode is reached (!for_wire). 260662306a36Sopenharmony_ci * 260762306a36Sopenharmony_ci * Encode hidden .snap dirs as a double /, i.e. 260862306a36Sopenharmony_ci * foo/.snap/bar -> foo//bar 260962306a36Sopenharmony_ci */ 261062306a36Sopenharmony_cichar *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, 261162306a36Sopenharmony_ci int for_wire) 261262306a36Sopenharmony_ci{ 261362306a36Sopenharmony_ci struct dentry *cur; 261462306a36Sopenharmony_ci struct inode *inode; 261562306a36Sopenharmony_ci char *path; 261662306a36Sopenharmony_ci int pos; 261762306a36Sopenharmony_ci unsigned seq; 261862306a36Sopenharmony_ci u64 base; 261962306a36Sopenharmony_ci 262062306a36Sopenharmony_ci if (!dentry) 262162306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_ci path = __getname(); 262462306a36Sopenharmony_ci if (!path) 262562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 262662306a36Sopenharmony_ciretry: 262762306a36Sopenharmony_ci pos = PATH_MAX - 1; 262862306a36Sopenharmony_ci path[pos] = '\0'; 262962306a36Sopenharmony_ci 263062306a36Sopenharmony_ci seq = read_seqbegin(&rename_lock); 263162306a36Sopenharmony_ci cur = dget(dentry); 263262306a36Sopenharmony_ci for (;;) { 263362306a36Sopenharmony_ci struct dentry *parent; 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci spin_lock(&cur->d_lock); 263662306a36Sopenharmony_ci inode = d_inode(cur); 263762306a36Sopenharmony_ci if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { 263862306a36Sopenharmony_ci dout("build_path path+%d: %p SNAPDIR\n", 263962306a36Sopenharmony_ci pos, cur); 264062306a36Sopenharmony_ci spin_unlock(&cur->d_lock); 264162306a36Sopenharmony_ci parent = dget_parent(cur); 264262306a36Sopenharmony_ci } else if (for_wire && inode && dentry != cur && 264362306a36Sopenharmony_ci ceph_snap(inode) == CEPH_NOSNAP) { 264462306a36Sopenharmony_ci spin_unlock(&cur->d_lock); 264562306a36Sopenharmony_ci pos++; /* get rid of any prepended '/' */ 264662306a36Sopenharmony_ci break; 264762306a36Sopenharmony_ci } else if (!for_wire || !IS_ENCRYPTED(d_inode(cur->d_parent))) { 264862306a36Sopenharmony_ci pos -= cur->d_name.len; 264962306a36Sopenharmony_ci if (pos < 0) { 265062306a36Sopenharmony_ci spin_unlock(&cur->d_lock); 265162306a36Sopenharmony_ci break; 265262306a36Sopenharmony_ci } 265362306a36Sopenharmony_ci memcpy(path + pos, cur->d_name.name, cur->d_name.len); 265462306a36Sopenharmony_ci spin_unlock(&cur->d_lock); 265562306a36Sopenharmony_ci parent = dget_parent(cur); 265662306a36Sopenharmony_ci } else { 265762306a36Sopenharmony_ci int len, ret; 265862306a36Sopenharmony_ci char buf[NAME_MAX]; 265962306a36Sopenharmony_ci 266062306a36Sopenharmony_ci /* 266162306a36Sopenharmony_ci * Proactively copy name into buf, in case we need to 266262306a36Sopenharmony_ci * present it as-is. 266362306a36Sopenharmony_ci */ 266462306a36Sopenharmony_ci memcpy(buf, cur->d_name.name, cur->d_name.len); 266562306a36Sopenharmony_ci len = cur->d_name.len; 266662306a36Sopenharmony_ci spin_unlock(&cur->d_lock); 266762306a36Sopenharmony_ci parent = dget_parent(cur); 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci ret = ceph_fscrypt_prepare_readdir(d_inode(parent)); 267062306a36Sopenharmony_ci if (ret < 0) { 267162306a36Sopenharmony_ci dput(parent); 267262306a36Sopenharmony_ci dput(cur); 267362306a36Sopenharmony_ci return ERR_PTR(ret); 267462306a36Sopenharmony_ci } 267562306a36Sopenharmony_ci 267662306a36Sopenharmony_ci if (fscrypt_has_encryption_key(d_inode(parent))) { 267762306a36Sopenharmony_ci len = ceph_encode_encrypted_fname(d_inode(parent), 267862306a36Sopenharmony_ci cur, buf); 267962306a36Sopenharmony_ci if (len < 0) { 268062306a36Sopenharmony_ci dput(parent); 268162306a36Sopenharmony_ci dput(cur); 268262306a36Sopenharmony_ci return ERR_PTR(len); 268362306a36Sopenharmony_ci } 268462306a36Sopenharmony_ci } 268562306a36Sopenharmony_ci pos -= len; 268662306a36Sopenharmony_ci if (pos < 0) { 268762306a36Sopenharmony_ci dput(parent); 268862306a36Sopenharmony_ci break; 268962306a36Sopenharmony_ci } 269062306a36Sopenharmony_ci memcpy(path + pos, buf, len); 269162306a36Sopenharmony_ci } 269262306a36Sopenharmony_ci dput(cur); 269362306a36Sopenharmony_ci cur = parent; 269462306a36Sopenharmony_ci 269562306a36Sopenharmony_ci /* Are we at the root? */ 269662306a36Sopenharmony_ci if (IS_ROOT(cur)) 269762306a36Sopenharmony_ci break; 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_ci /* Are we out of buffer? */ 270062306a36Sopenharmony_ci if (--pos < 0) 270162306a36Sopenharmony_ci break; 270262306a36Sopenharmony_ci 270362306a36Sopenharmony_ci path[pos] = '/'; 270462306a36Sopenharmony_ci } 270562306a36Sopenharmony_ci inode = d_inode(cur); 270662306a36Sopenharmony_ci base = inode ? ceph_ino(inode) : 0; 270762306a36Sopenharmony_ci dput(cur); 270862306a36Sopenharmony_ci 270962306a36Sopenharmony_ci if (read_seqretry(&rename_lock, seq)) 271062306a36Sopenharmony_ci goto retry; 271162306a36Sopenharmony_ci 271262306a36Sopenharmony_ci if (pos < 0) { 271362306a36Sopenharmony_ci /* 271462306a36Sopenharmony_ci * A rename didn't occur, but somehow we didn't end up where 271562306a36Sopenharmony_ci * we thought we would. Throw a warning and try again. 271662306a36Sopenharmony_ci */ 271762306a36Sopenharmony_ci pr_warn("build_path did not end path lookup where expected (pos = %d)\n", 271862306a36Sopenharmony_ci pos); 271962306a36Sopenharmony_ci goto retry; 272062306a36Sopenharmony_ci } 272162306a36Sopenharmony_ci 272262306a36Sopenharmony_ci *pbase = base; 272362306a36Sopenharmony_ci *plen = PATH_MAX - 1 - pos; 272462306a36Sopenharmony_ci dout("build_path on %p %d built %llx '%.*s'\n", 272562306a36Sopenharmony_ci dentry, d_count(dentry), base, *plen, path + pos); 272662306a36Sopenharmony_ci return path + pos; 272762306a36Sopenharmony_ci} 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_cistatic int build_dentry_path(struct dentry *dentry, struct inode *dir, 273062306a36Sopenharmony_ci const char **ppath, int *ppathlen, u64 *pino, 273162306a36Sopenharmony_ci bool *pfreepath, bool parent_locked) 273262306a36Sopenharmony_ci{ 273362306a36Sopenharmony_ci char *path; 273462306a36Sopenharmony_ci 273562306a36Sopenharmony_ci rcu_read_lock(); 273662306a36Sopenharmony_ci if (!dir) 273762306a36Sopenharmony_ci dir = d_inode_rcu(dentry->d_parent); 273862306a36Sopenharmony_ci if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP && 273962306a36Sopenharmony_ci !IS_ENCRYPTED(dir)) { 274062306a36Sopenharmony_ci *pino = ceph_ino(dir); 274162306a36Sopenharmony_ci rcu_read_unlock(); 274262306a36Sopenharmony_ci *ppath = dentry->d_name.name; 274362306a36Sopenharmony_ci *ppathlen = dentry->d_name.len; 274462306a36Sopenharmony_ci return 0; 274562306a36Sopenharmony_ci } 274662306a36Sopenharmony_ci rcu_read_unlock(); 274762306a36Sopenharmony_ci path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); 274862306a36Sopenharmony_ci if (IS_ERR(path)) 274962306a36Sopenharmony_ci return PTR_ERR(path); 275062306a36Sopenharmony_ci *ppath = path; 275162306a36Sopenharmony_ci *pfreepath = true; 275262306a36Sopenharmony_ci return 0; 275362306a36Sopenharmony_ci} 275462306a36Sopenharmony_ci 275562306a36Sopenharmony_cistatic int build_inode_path(struct inode *inode, 275662306a36Sopenharmony_ci const char **ppath, int *ppathlen, u64 *pino, 275762306a36Sopenharmony_ci bool *pfreepath) 275862306a36Sopenharmony_ci{ 275962306a36Sopenharmony_ci struct dentry *dentry; 276062306a36Sopenharmony_ci char *path; 276162306a36Sopenharmony_ci 276262306a36Sopenharmony_ci if (ceph_snap(inode) == CEPH_NOSNAP) { 276362306a36Sopenharmony_ci *pino = ceph_ino(inode); 276462306a36Sopenharmony_ci *ppathlen = 0; 276562306a36Sopenharmony_ci return 0; 276662306a36Sopenharmony_ci } 276762306a36Sopenharmony_ci dentry = d_find_alias(inode); 276862306a36Sopenharmony_ci path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); 276962306a36Sopenharmony_ci dput(dentry); 277062306a36Sopenharmony_ci if (IS_ERR(path)) 277162306a36Sopenharmony_ci return PTR_ERR(path); 277262306a36Sopenharmony_ci *ppath = path; 277362306a36Sopenharmony_ci *pfreepath = true; 277462306a36Sopenharmony_ci return 0; 277562306a36Sopenharmony_ci} 277662306a36Sopenharmony_ci 277762306a36Sopenharmony_ci/* 277862306a36Sopenharmony_ci * request arguments may be specified via an inode *, a dentry *, or 277962306a36Sopenharmony_ci * an explicit ino+path. 278062306a36Sopenharmony_ci */ 278162306a36Sopenharmony_cistatic int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, 278262306a36Sopenharmony_ci struct inode *rdiri, const char *rpath, 278362306a36Sopenharmony_ci u64 rino, const char **ppath, int *pathlen, 278462306a36Sopenharmony_ci u64 *ino, bool *freepath, bool parent_locked) 278562306a36Sopenharmony_ci{ 278662306a36Sopenharmony_ci int r = 0; 278762306a36Sopenharmony_ci 278862306a36Sopenharmony_ci if (rinode) { 278962306a36Sopenharmony_ci r = build_inode_path(rinode, ppath, pathlen, ino, freepath); 279062306a36Sopenharmony_ci dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), 279162306a36Sopenharmony_ci ceph_snap(rinode)); 279262306a36Sopenharmony_ci } else if (rdentry) { 279362306a36Sopenharmony_ci r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, 279462306a36Sopenharmony_ci freepath, parent_locked); 279562306a36Sopenharmony_ci dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, 279662306a36Sopenharmony_ci *ppath); 279762306a36Sopenharmony_ci } else if (rpath || rino) { 279862306a36Sopenharmony_ci *ino = rino; 279962306a36Sopenharmony_ci *ppath = rpath; 280062306a36Sopenharmony_ci *pathlen = rpath ? strlen(rpath) : 0; 280162306a36Sopenharmony_ci dout(" path %.*s\n", *pathlen, rpath); 280262306a36Sopenharmony_ci } 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci return r; 280562306a36Sopenharmony_ci} 280662306a36Sopenharmony_ci 280762306a36Sopenharmony_cistatic void encode_mclientrequest_tail(void **p, 280862306a36Sopenharmony_ci const struct ceph_mds_request *req) 280962306a36Sopenharmony_ci{ 281062306a36Sopenharmony_ci struct ceph_timespec ts; 281162306a36Sopenharmony_ci int i; 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_ci ceph_encode_timespec64(&ts, &req->r_stamp); 281462306a36Sopenharmony_ci ceph_encode_copy(p, &ts, sizeof(ts)); 281562306a36Sopenharmony_ci 281662306a36Sopenharmony_ci /* v4: gid_list */ 281762306a36Sopenharmony_ci ceph_encode_32(p, req->r_cred->group_info->ngroups); 281862306a36Sopenharmony_ci for (i = 0; i < req->r_cred->group_info->ngroups; i++) 281962306a36Sopenharmony_ci ceph_encode_64(p, from_kgid(&init_user_ns, 282062306a36Sopenharmony_ci req->r_cred->group_info->gid[i])); 282162306a36Sopenharmony_ci 282262306a36Sopenharmony_ci /* v5: altname */ 282362306a36Sopenharmony_ci ceph_encode_32(p, req->r_altname_len); 282462306a36Sopenharmony_ci ceph_encode_copy(p, req->r_altname, req->r_altname_len); 282562306a36Sopenharmony_ci 282662306a36Sopenharmony_ci /* v6: fscrypt_auth and fscrypt_file */ 282762306a36Sopenharmony_ci if (req->r_fscrypt_auth) { 282862306a36Sopenharmony_ci u32 authlen = ceph_fscrypt_auth_len(req->r_fscrypt_auth); 282962306a36Sopenharmony_ci 283062306a36Sopenharmony_ci ceph_encode_32(p, authlen); 283162306a36Sopenharmony_ci ceph_encode_copy(p, req->r_fscrypt_auth, authlen); 283262306a36Sopenharmony_ci } else { 283362306a36Sopenharmony_ci ceph_encode_32(p, 0); 283462306a36Sopenharmony_ci } 283562306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags)) { 283662306a36Sopenharmony_ci ceph_encode_32(p, sizeof(__le64)); 283762306a36Sopenharmony_ci ceph_encode_64(p, req->r_fscrypt_file); 283862306a36Sopenharmony_ci } else { 283962306a36Sopenharmony_ci ceph_encode_32(p, 0); 284062306a36Sopenharmony_ci } 284162306a36Sopenharmony_ci} 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_cistatic struct ceph_mds_request_head_legacy * 284462306a36Sopenharmony_cifind_legacy_request_head(void *p, u64 features) 284562306a36Sopenharmony_ci{ 284662306a36Sopenharmony_ci bool legacy = !(features & CEPH_FEATURE_FS_BTIME); 284762306a36Sopenharmony_ci struct ceph_mds_request_head_old *ohead; 284862306a36Sopenharmony_ci 284962306a36Sopenharmony_ci if (legacy) 285062306a36Sopenharmony_ci return (struct ceph_mds_request_head_legacy *)p; 285162306a36Sopenharmony_ci ohead = (struct ceph_mds_request_head_old *)p; 285262306a36Sopenharmony_ci return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid; 285362306a36Sopenharmony_ci} 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_ci/* 285662306a36Sopenharmony_ci * called under mdsc->mutex 285762306a36Sopenharmony_ci */ 285862306a36Sopenharmony_cistatic struct ceph_msg *create_request_message(struct ceph_mds_session *session, 285962306a36Sopenharmony_ci struct ceph_mds_request *req, 286062306a36Sopenharmony_ci bool drop_cap_releases) 286162306a36Sopenharmony_ci{ 286262306a36Sopenharmony_ci int mds = session->s_mds; 286362306a36Sopenharmony_ci struct ceph_mds_client *mdsc = session->s_mdsc; 286462306a36Sopenharmony_ci struct ceph_msg *msg; 286562306a36Sopenharmony_ci struct ceph_mds_request_head_legacy *lhead; 286662306a36Sopenharmony_ci const char *path1 = NULL; 286762306a36Sopenharmony_ci const char *path2 = NULL; 286862306a36Sopenharmony_ci u64 ino1 = 0, ino2 = 0; 286962306a36Sopenharmony_ci int pathlen1 = 0, pathlen2 = 0; 287062306a36Sopenharmony_ci bool freepath1 = false, freepath2 = false; 287162306a36Sopenharmony_ci struct dentry *old_dentry = NULL; 287262306a36Sopenharmony_ci int len; 287362306a36Sopenharmony_ci u16 releases; 287462306a36Sopenharmony_ci void *p, *end; 287562306a36Sopenharmony_ci int ret; 287662306a36Sopenharmony_ci bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); 287762306a36Sopenharmony_ci bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, 287862306a36Sopenharmony_ci &session->s_features); 287962306a36Sopenharmony_ci 288062306a36Sopenharmony_ci ret = set_request_path_attr(req->r_inode, req->r_dentry, 288162306a36Sopenharmony_ci req->r_parent, req->r_path1, req->r_ino1.ino, 288262306a36Sopenharmony_ci &path1, &pathlen1, &ino1, &freepath1, 288362306a36Sopenharmony_ci test_bit(CEPH_MDS_R_PARENT_LOCKED, 288462306a36Sopenharmony_ci &req->r_req_flags)); 288562306a36Sopenharmony_ci if (ret < 0) { 288662306a36Sopenharmony_ci msg = ERR_PTR(ret); 288762306a36Sopenharmony_ci goto out; 288862306a36Sopenharmony_ci } 288962306a36Sopenharmony_ci 289062306a36Sopenharmony_ci /* If r_old_dentry is set, then assume that its parent is locked */ 289162306a36Sopenharmony_ci if (req->r_old_dentry && 289262306a36Sopenharmony_ci !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) 289362306a36Sopenharmony_ci old_dentry = req->r_old_dentry; 289462306a36Sopenharmony_ci ret = set_request_path_attr(NULL, old_dentry, 289562306a36Sopenharmony_ci req->r_old_dentry_dir, 289662306a36Sopenharmony_ci req->r_path2, req->r_ino2.ino, 289762306a36Sopenharmony_ci &path2, &pathlen2, &ino2, &freepath2, true); 289862306a36Sopenharmony_ci if (ret < 0) { 289962306a36Sopenharmony_ci msg = ERR_PTR(ret); 290062306a36Sopenharmony_ci goto out_free1; 290162306a36Sopenharmony_ci } 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_ci req->r_altname = get_fscrypt_altname(req, &req->r_altname_len); 290462306a36Sopenharmony_ci if (IS_ERR(req->r_altname)) { 290562306a36Sopenharmony_ci msg = ERR_CAST(req->r_altname); 290662306a36Sopenharmony_ci req->r_altname = NULL; 290762306a36Sopenharmony_ci goto out_free2; 290862306a36Sopenharmony_ci } 290962306a36Sopenharmony_ci 291062306a36Sopenharmony_ci /* 291162306a36Sopenharmony_ci * For old cephs without supporting the 32bit retry/fwd feature 291262306a36Sopenharmony_ci * it will copy the raw memories directly when decoding the 291362306a36Sopenharmony_ci * requests. While new cephs will decode the head depending the 291462306a36Sopenharmony_ci * version member, so we need to make sure it will be compatible 291562306a36Sopenharmony_ci * with them both. 291662306a36Sopenharmony_ci */ 291762306a36Sopenharmony_ci if (legacy) 291862306a36Sopenharmony_ci len = sizeof(struct ceph_mds_request_head_legacy); 291962306a36Sopenharmony_ci else if (old_version) 292062306a36Sopenharmony_ci len = sizeof(struct ceph_mds_request_head_old); 292162306a36Sopenharmony_ci else 292262306a36Sopenharmony_ci len = sizeof(struct ceph_mds_request_head); 292362306a36Sopenharmony_ci 292462306a36Sopenharmony_ci /* filepaths */ 292562306a36Sopenharmony_ci len += 2 * (1 + sizeof(u32) + sizeof(u64)); 292662306a36Sopenharmony_ci len += pathlen1 + pathlen2; 292762306a36Sopenharmony_ci 292862306a36Sopenharmony_ci /* cap releases */ 292962306a36Sopenharmony_ci len += sizeof(struct ceph_mds_request_release) * 293062306a36Sopenharmony_ci (!!req->r_inode_drop + !!req->r_dentry_drop + 293162306a36Sopenharmony_ci !!req->r_old_inode_drop + !!req->r_old_dentry_drop); 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_ci if (req->r_dentry_drop) 293462306a36Sopenharmony_ci len += pathlen1; 293562306a36Sopenharmony_ci if (req->r_old_dentry_drop) 293662306a36Sopenharmony_ci len += pathlen2; 293762306a36Sopenharmony_ci 293862306a36Sopenharmony_ci /* MClientRequest tail */ 293962306a36Sopenharmony_ci 294062306a36Sopenharmony_ci /* req->r_stamp */ 294162306a36Sopenharmony_ci len += sizeof(struct ceph_timespec); 294262306a36Sopenharmony_ci 294362306a36Sopenharmony_ci /* gid list */ 294462306a36Sopenharmony_ci len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); 294562306a36Sopenharmony_ci 294662306a36Sopenharmony_ci /* alternate name */ 294762306a36Sopenharmony_ci len += sizeof(u32) + req->r_altname_len; 294862306a36Sopenharmony_ci 294962306a36Sopenharmony_ci /* fscrypt_auth */ 295062306a36Sopenharmony_ci len += sizeof(u32); // fscrypt_auth 295162306a36Sopenharmony_ci if (req->r_fscrypt_auth) 295262306a36Sopenharmony_ci len += ceph_fscrypt_auth_len(req->r_fscrypt_auth); 295362306a36Sopenharmony_ci 295462306a36Sopenharmony_ci /* fscrypt_file */ 295562306a36Sopenharmony_ci len += sizeof(u32); 295662306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags)) 295762306a36Sopenharmony_ci len += sizeof(__le64); 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_ci msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); 296062306a36Sopenharmony_ci if (!msg) { 296162306a36Sopenharmony_ci msg = ERR_PTR(-ENOMEM); 296262306a36Sopenharmony_ci goto out_free2; 296362306a36Sopenharmony_ci } 296462306a36Sopenharmony_ci 296562306a36Sopenharmony_ci msg->hdr.tid = cpu_to_le64(req->r_tid); 296662306a36Sopenharmony_ci 296762306a36Sopenharmony_ci lhead = find_legacy_request_head(msg->front.iov_base, 296862306a36Sopenharmony_ci session->s_con.peer_features); 296962306a36Sopenharmony_ci 297062306a36Sopenharmony_ci /* 297162306a36Sopenharmony_ci * The ceph_mds_request_head_legacy didn't contain a version field, and 297262306a36Sopenharmony_ci * one was added when we moved the message version from 3->4. 297362306a36Sopenharmony_ci */ 297462306a36Sopenharmony_ci if (legacy) { 297562306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(3); 297662306a36Sopenharmony_ci p = msg->front.iov_base + sizeof(*lhead); 297762306a36Sopenharmony_ci } else if (old_version) { 297862306a36Sopenharmony_ci struct ceph_mds_request_head_old *ohead = msg->front.iov_base; 297962306a36Sopenharmony_ci 298062306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(4); 298162306a36Sopenharmony_ci ohead->version = cpu_to_le16(1); 298262306a36Sopenharmony_ci p = msg->front.iov_base + sizeof(*ohead); 298362306a36Sopenharmony_ci } else { 298462306a36Sopenharmony_ci struct ceph_mds_request_head *nhead = msg->front.iov_base; 298562306a36Sopenharmony_ci 298662306a36Sopenharmony_ci msg->hdr.version = cpu_to_le16(6); 298762306a36Sopenharmony_ci nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION); 298862306a36Sopenharmony_ci p = msg->front.iov_base + sizeof(*nhead); 298962306a36Sopenharmony_ci } 299062306a36Sopenharmony_ci 299162306a36Sopenharmony_ci end = msg->front.iov_base + msg->front.iov_len; 299262306a36Sopenharmony_ci 299362306a36Sopenharmony_ci lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 299462306a36Sopenharmony_ci lhead->op = cpu_to_le32(req->r_op); 299562306a36Sopenharmony_ci lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, 299662306a36Sopenharmony_ci req->r_cred->fsuid)); 299762306a36Sopenharmony_ci lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, 299862306a36Sopenharmony_ci req->r_cred->fsgid)); 299962306a36Sopenharmony_ci lhead->ino = cpu_to_le64(req->r_deleg_ino); 300062306a36Sopenharmony_ci lhead->args = req->r_args; 300162306a36Sopenharmony_ci 300262306a36Sopenharmony_ci ceph_encode_filepath(&p, end, ino1, path1); 300362306a36Sopenharmony_ci ceph_encode_filepath(&p, end, ino2, path2); 300462306a36Sopenharmony_ci 300562306a36Sopenharmony_ci /* make note of release offset, in case we need to replay */ 300662306a36Sopenharmony_ci req->r_request_release_offset = p - msg->front.iov_base; 300762306a36Sopenharmony_ci 300862306a36Sopenharmony_ci /* cap releases */ 300962306a36Sopenharmony_ci releases = 0; 301062306a36Sopenharmony_ci if (req->r_inode_drop) 301162306a36Sopenharmony_ci releases += ceph_encode_inode_release(&p, 301262306a36Sopenharmony_ci req->r_inode ? req->r_inode : d_inode(req->r_dentry), 301362306a36Sopenharmony_ci mds, req->r_inode_drop, req->r_inode_unless, 301462306a36Sopenharmony_ci req->r_op == CEPH_MDS_OP_READDIR); 301562306a36Sopenharmony_ci if (req->r_dentry_drop) { 301662306a36Sopenharmony_ci ret = ceph_encode_dentry_release(&p, req->r_dentry, 301762306a36Sopenharmony_ci req->r_parent, mds, req->r_dentry_drop, 301862306a36Sopenharmony_ci req->r_dentry_unless); 301962306a36Sopenharmony_ci if (ret < 0) 302062306a36Sopenharmony_ci goto out_err; 302162306a36Sopenharmony_ci releases += ret; 302262306a36Sopenharmony_ci } 302362306a36Sopenharmony_ci if (req->r_old_dentry_drop) { 302462306a36Sopenharmony_ci ret = ceph_encode_dentry_release(&p, req->r_old_dentry, 302562306a36Sopenharmony_ci req->r_old_dentry_dir, mds, 302662306a36Sopenharmony_ci req->r_old_dentry_drop, 302762306a36Sopenharmony_ci req->r_old_dentry_unless); 302862306a36Sopenharmony_ci if (ret < 0) 302962306a36Sopenharmony_ci goto out_err; 303062306a36Sopenharmony_ci releases += ret; 303162306a36Sopenharmony_ci } 303262306a36Sopenharmony_ci if (req->r_old_inode_drop) 303362306a36Sopenharmony_ci releases += ceph_encode_inode_release(&p, 303462306a36Sopenharmony_ci d_inode(req->r_old_dentry), 303562306a36Sopenharmony_ci mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 303662306a36Sopenharmony_ci 303762306a36Sopenharmony_ci if (drop_cap_releases) { 303862306a36Sopenharmony_ci releases = 0; 303962306a36Sopenharmony_ci p = msg->front.iov_base + req->r_request_release_offset; 304062306a36Sopenharmony_ci } 304162306a36Sopenharmony_ci 304262306a36Sopenharmony_ci lhead->num_releases = cpu_to_le16(releases); 304362306a36Sopenharmony_ci 304462306a36Sopenharmony_ci encode_mclientrequest_tail(&p, req); 304562306a36Sopenharmony_ci 304662306a36Sopenharmony_ci if (WARN_ON_ONCE(p > end)) { 304762306a36Sopenharmony_ci ceph_msg_put(msg); 304862306a36Sopenharmony_ci msg = ERR_PTR(-ERANGE); 304962306a36Sopenharmony_ci goto out_free2; 305062306a36Sopenharmony_ci } 305162306a36Sopenharmony_ci 305262306a36Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 305362306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 305462306a36Sopenharmony_ci 305562306a36Sopenharmony_ci if (req->r_pagelist) { 305662306a36Sopenharmony_ci struct ceph_pagelist *pagelist = req->r_pagelist; 305762306a36Sopenharmony_ci ceph_msg_data_add_pagelist(msg, pagelist); 305862306a36Sopenharmony_ci msg->hdr.data_len = cpu_to_le32(pagelist->length); 305962306a36Sopenharmony_ci } else { 306062306a36Sopenharmony_ci msg->hdr.data_len = 0; 306162306a36Sopenharmony_ci } 306262306a36Sopenharmony_ci 306362306a36Sopenharmony_ci msg->hdr.data_off = cpu_to_le16(0); 306462306a36Sopenharmony_ci 306562306a36Sopenharmony_ciout_free2: 306662306a36Sopenharmony_ci if (freepath2) 306762306a36Sopenharmony_ci ceph_mdsc_free_path((char *)path2, pathlen2); 306862306a36Sopenharmony_ciout_free1: 306962306a36Sopenharmony_ci if (freepath1) 307062306a36Sopenharmony_ci ceph_mdsc_free_path((char *)path1, pathlen1); 307162306a36Sopenharmony_ciout: 307262306a36Sopenharmony_ci return msg; 307362306a36Sopenharmony_ciout_err: 307462306a36Sopenharmony_ci ceph_msg_put(msg); 307562306a36Sopenharmony_ci msg = ERR_PTR(ret); 307662306a36Sopenharmony_ci goto out_free2; 307762306a36Sopenharmony_ci} 307862306a36Sopenharmony_ci 307962306a36Sopenharmony_ci/* 308062306a36Sopenharmony_ci * called under mdsc->mutex if error, under no mutex if 308162306a36Sopenharmony_ci * success. 308262306a36Sopenharmony_ci */ 308362306a36Sopenharmony_cistatic void complete_request(struct ceph_mds_client *mdsc, 308462306a36Sopenharmony_ci struct ceph_mds_request *req) 308562306a36Sopenharmony_ci{ 308662306a36Sopenharmony_ci req->r_end_latency = ktime_get(); 308762306a36Sopenharmony_ci 308862306a36Sopenharmony_ci if (req->r_callback) 308962306a36Sopenharmony_ci req->r_callback(mdsc, req); 309062306a36Sopenharmony_ci complete_all(&req->r_completion); 309162306a36Sopenharmony_ci} 309262306a36Sopenharmony_ci 309362306a36Sopenharmony_ci/* 309462306a36Sopenharmony_ci * called under mdsc->mutex 309562306a36Sopenharmony_ci */ 309662306a36Sopenharmony_cistatic int __prepare_send_request(struct ceph_mds_session *session, 309762306a36Sopenharmony_ci struct ceph_mds_request *req, 309862306a36Sopenharmony_ci bool drop_cap_releases) 309962306a36Sopenharmony_ci{ 310062306a36Sopenharmony_ci int mds = session->s_mds; 310162306a36Sopenharmony_ci struct ceph_mds_client *mdsc = session->s_mdsc; 310262306a36Sopenharmony_ci struct ceph_mds_request_head_legacy *lhead; 310362306a36Sopenharmony_ci struct ceph_mds_request_head *nhead; 310462306a36Sopenharmony_ci struct ceph_msg *msg; 310562306a36Sopenharmony_ci int flags = 0, old_max_retry; 310662306a36Sopenharmony_ci bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD, 310762306a36Sopenharmony_ci &session->s_features); 310862306a36Sopenharmony_ci 310962306a36Sopenharmony_ci /* 311062306a36Sopenharmony_ci * Avoid inifinite retrying after overflow. The client will 311162306a36Sopenharmony_ci * increase the retry count and if the MDS is old version, 311262306a36Sopenharmony_ci * so we limit to retry at most 256 times. 311362306a36Sopenharmony_ci */ 311462306a36Sopenharmony_ci if (req->r_attempts) { 311562306a36Sopenharmony_ci old_max_retry = sizeof_field(struct ceph_mds_request_head_old, 311662306a36Sopenharmony_ci num_retry); 311762306a36Sopenharmony_ci old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE); 311862306a36Sopenharmony_ci if ((old_version && req->r_attempts >= old_max_retry) || 311962306a36Sopenharmony_ci ((uint32_t)req->r_attempts >= U32_MAX)) { 312062306a36Sopenharmony_ci pr_warn_ratelimited("%s request tid %llu seq overflow\n", 312162306a36Sopenharmony_ci __func__, req->r_tid); 312262306a36Sopenharmony_ci return -EMULTIHOP; 312362306a36Sopenharmony_ci } 312462306a36Sopenharmony_ci } 312562306a36Sopenharmony_ci 312662306a36Sopenharmony_ci req->r_attempts++; 312762306a36Sopenharmony_ci if (req->r_inode) { 312862306a36Sopenharmony_ci struct ceph_cap *cap = 312962306a36Sopenharmony_ci ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); 313062306a36Sopenharmony_ci 313162306a36Sopenharmony_ci if (cap) 313262306a36Sopenharmony_ci req->r_sent_on_mseq = cap->mseq; 313362306a36Sopenharmony_ci else 313462306a36Sopenharmony_ci req->r_sent_on_mseq = -1; 313562306a36Sopenharmony_ci } 313662306a36Sopenharmony_ci dout("%s %p tid %lld %s (attempt %d)\n", __func__, req, 313762306a36Sopenharmony_ci req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 314062306a36Sopenharmony_ci void *p; 314162306a36Sopenharmony_ci 314262306a36Sopenharmony_ci /* 314362306a36Sopenharmony_ci * Replay. Do not regenerate message (and rebuild 314462306a36Sopenharmony_ci * paths, etc.); just use the original message. 314562306a36Sopenharmony_ci * Rebuilding paths will break for renames because 314662306a36Sopenharmony_ci * d_move mangles the src name. 314762306a36Sopenharmony_ci */ 314862306a36Sopenharmony_ci msg = req->r_request; 314962306a36Sopenharmony_ci lhead = find_legacy_request_head(msg->front.iov_base, 315062306a36Sopenharmony_ci session->s_con.peer_features); 315162306a36Sopenharmony_ci 315262306a36Sopenharmony_ci flags = le32_to_cpu(lhead->flags); 315362306a36Sopenharmony_ci flags |= CEPH_MDS_FLAG_REPLAY; 315462306a36Sopenharmony_ci lhead->flags = cpu_to_le32(flags); 315562306a36Sopenharmony_ci 315662306a36Sopenharmony_ci if (req->r_target_inode) 315762306a36Sopenharmony_ci lhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci lhead->num_retry = req->r_attempts - 1; 316062306a36Sopenharmony_ci if (!old_version) { 316162306a36Sopenharmony_ci nhead = (struct ceph_mds_request_head*)msg->front.iov_base; 316262306a36Sopenharmony_ci nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1); 316362306a36Sopenharmony_ci } 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_ci /* remove cap/dentry releases from message */ 316662306a36Sopenharmony_ci lhead->num_releases = 0; 316762306a36Sopenharmony_ci 316862306a36Sopenharmony_ci p = msg->front.iov_base + req->r_request_release_offset; 316962306a36Sopenharmony_ci encode_mclientrequest_tail(&p, req); 317062306a36Sopenharmony_ci 317162306a36Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 317262306a36Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 317362306a36Sopenharmony_ci return 0; 317462306a36Sopenharmony_ci } 317562306a36Sopenharmony_ci 317662306a36Sopenharmony_ci if (req->r_request) { 317762306a36Sopenharmony_ci ceph_msg_put(req->r_request); 317862306a36Sopenharmony_ci req->r_request = NULL; 317962306a36Sopenharmony_ci } 318062306a36Sopenharmony_ci msg = create_request_message(session, req, drop_cap_releases); 318162306a36Sopenharmony_ci if (IS_ERR(msg)) { 318262306a36Sopenharmony_ci req->r_err = PTR_ERR(msg); 318362306a36Sopenharmony_ci return PTR_ERR(msg); 318462306a36Sopenharmony_ci } 318562306a36Sopenharmony_ci req->r_request = msg; 318662306a36Sopenharmony_ci 318762306a36Sopenharmony_ci lhead = find_legacy_request_head(msg->front.iov_base, 318862306a36Sopenharmony_ci session->s_con.peer_features); 318962306a36Sopenharmony_ci lhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); 319062306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) 319162306a36Sopenharmony_ci flags |= CEPH_MDS_FLAG_REPLAY; 319262306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) 319362306a36Sopenharmony_ci flags |= CEPH_MDS_FLAG_ASYNC; 319462306a36Sopenharmony_ci if (req->r_parent) 319562306a36Sopenharmony_ci flags |= CEPH_MDS_FLAG_WANT_DENTRY; 319662306a36Sopenharmony_ci lhead->flags = cpu_to_le32(flags); 319762306a36Sopenharmony_ci lhead->num_fwd = req->r_num_fwd; 319862306a36Sopenharmony_ci lhead->num_retry = req->r_attempts - 1; 319962306a36Sopenharmony_ci if (!old_version) { 320062306a36Sopenharmony_ci nhead = (struct ceph_mds_request_head*)msg->front.iov_base; 320162306a36Sopenharmony_ci nhead->ext_num_fwd = cpu_to_le32(req->r_num_fwd); 320262306a36Sopenharmony_ci nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1); 320362306a36Sopenharmony_ci } 320462306a36Sopenharmony_ci 320562306a36Sopenharmony_ci dout(" r_parent = %p\n", req->r_parent); 320662306a36Sopenharmony_ci return 0; 320762306a36Sopenharmony_ci} 320862306a36Sopenharmony_ci 320962306a36Sopenharmony_ci/* 321062306a36Sopenharmony_ci * called under mdsc->mutex 321162306a36Sopenharmony_ci */ 321262306a36Sopenharmony_cistatic int __send_request(struct ceph_mds_session *session, 321362306a36Sopenharmony_ci struct ceph_mds_request *req, 321462306a36Sopenharmony_ci bool drop_cap_releases) 321562306a36Sopenharmony_ci{ 321662306a36Sopenharmony_ci int err; 321762306a36Sopenharmony_ci 321862306a36Sopenharmony_ci err = __prepare_send_request(session, req, drop_cap_releases); 321962306a36Sopenharmony_ci if (!err) { 322062306a36Sopenharmony_ci ceph_msg_get(req->r_request); 322162306a36Sopenharmony_ci ceph_con_send(&session->s_con, req->r_request); 322262306a36Sopenharmony_ci } 322362306a36Sopenharmony_ci 322462306a36Sopenharmony_ci return err; 322562306a36Sopenharmony_ci} 322662306a36Sopenharmony_ci 322762306a36Sopenharmony_ci/* 322862306a36Sopenharmony_ci * send request, or put it on the appropriate wait list. 322962306a36Sopenharmony_ci */ 323062306a36Sopenharmony_cistatic void __do_request(struct ceph_mds_client *mdsc, 323162306a36Sopenharmony_ci struct ceph_mds_request *req) 323262306a36Sopenharmony_ci{ 323362306a36Sopenharmony_ci struct ceph_mds_session *session = NULL; 323462306a36Sopenharmony_ci int mds = -1; 323562306a36Sopenharmony_ci int err = 0; 323662306a36Sopenharmony_ci bool random; 323762306a36Sopenharmony_ci 323862306a36Sopenharmony_ci if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { 323962306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) 324062306a36Sopenharmony_ci __unregister_request(mdsc, req); 324162306a36Sopenharmony_ci return; 324262306a36Sopenharmony_ci } 324362306a36Sopenharmony_ci 324462306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_FENCE_IO) { 324562306a36Sopenharmony_ci dout("do_request metadata corrupted\n"); 324662306a36Sopenharmony_ci err = -EIO; 324762306a36Sopenharmony_ci goto finish; 324862306a36Sopenharmony_ci } 324962306a36Sopenharmony_ci if (req->r_timeout && 325062306a36Sopenharmony_ci time_after_eq(jiffies, req->r_started + req->r_timeout)) { 325162306a36Sopenharmony_ci dout("do_request timed out\n"); 325262306a36Sopenharmony_ci err = -ETIMEDOUT; 325362306a36Sopenharmony_ci goto finish; 325462306a36Sopenharmony_ci } 325562306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { 325662306a36Sopenharmony_ci dout("do_request forced umount\n"); 325762306a36Sopenharmony_ci err = -EIO; 325862306a36Sopenharmony_ci goto finish; 325962306a36Sopenharmony_ci } 326062306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { 326162306a36Sopenharmony_ci if (mdsc->mdsmap_err) { 326262306a36Sopenharmony_ci err = mdsc->mdsmap_err; 326362306a36Sopenharmony_ci dout("do_request mdsmap err %d\n", err); 326462306a36Sopenharmony_ci goto finish; 326562306a36Sopenharmony_ci } 326662306a36Sopenharmony_ci if (mdsc->mdsmap->m_epoch == 0) { 326762306a36Sopenharmony_ci dout("do_request no mdsmap, waiting for map\n"); 326862306a36Sopenharmony_ci list_add(&req->r_wait, &mdsc->waiting_for_map); 326962306a36Sopenharmony_ci return; 327062306a36Sopenharmony_ci } 327162306a36Sopenharmony_ci if (!(mdsc->fsc->mount_options->flags & 327262306a36Sopenharmony_ci CEPH_MOUNT_OPT_MOUNTWAIT) && 327362306a36Sopenharmony_ci !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { 327462306a36Sopenharmony_ci err = -EHOSTUNREACH; 327562306a36Sopenharmony_ci goto finish; 327662306a36Sopenharmony_ci } 327762306a36Sopenharmony_ci } 327862306a36Sopenharmony_ci 327962306a36Sopenharmony_ci put_request_session(req); 328062306a36Sopenharmony_ci 328162306a36Sopenharmony_ci mds = __choose_mds(mdsc, req, &random); 328262306a36Sopenharmony_ci if (mds < 0 || 328362306a36Sopenharmony_ci ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { 328462306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) { 328562306a36Sopenharmony_ci err = -EJUKEBOX; 328662306a36Sopenharmony_ci goto finish; 328762306a36Sopenharmony_ci } 328862306a36Sopenharmony_ci dout("do_request no mds or not active, waiting for map\n"); 328962306a36Sopenharmony_ci list_add(&req->r_wait, &mdsc->waiting_for_map); 329062306a36Sopenharmony_ci return; 329162306a36Sopenharmony_ci } 329262306a36Sopenharmony_ci 329362306a36Sopenharmony_ci /* get, open session */ 329462306a36Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, mds); 329562306a36Sopenharmony_ci if (!session) { 329662306a36Sopenharmony_ci session = register_session(mdsc, mds); 329762306a36Sopenharmony_ci if (IS_ERR(session)) { 329862306a36Sopenharmony_ci err = PTR_ERR(session); 329962306a36Sopenharmony_ci goto finish; 330062306a36Sopenharmony_ci } 330162306a36Sopenharmony_ci } 330262306a36Sopenharmony_ci req->r_session = ceph_get_mds_session(session); 330362306a36Sopenharmony_ci 330462306a36Sopenharmony_ci dout("do_request mds%d session %p state %s\n", mds, session, 330562306a36Sopenharmony_ci ceph_session_state_name(session->s_state)); 330662306a36Sopenharmony_ci 330762306a36Sopenharmony_ci /* 330862306a36Sopenharmony_ci * The old ceph will crash the MDSs when see unknown OPs 330962306a36Sopenharmony_ci */ 331062306a36Sopenharmony_ci if (req->r_feature_needed > 0 && 331162306a36Sopenharmony_ci !test_bit(req->r_feature_needed, &session->s_features)) { 331262306a36Sopenharmony_ci err = -EOPNOTSUPP; 331362306a36Sopenharmony_ci goto out_session; 331462306a36Sopenharmony_ci } 331562306a36Sopenharmony_ci 331662306a36Sopenharmony_ci if (session->s_state != CEPH_MDS_SESSION_OPEN && 331762306a36Sopenharmony_ci session->s_state != CEPH_MDS_SESSION_HUNG) { 331862306a36Sopenharmony_ci /* 331962306a36Sopenharmony_ci * We cannot queue async requests since the caps and delegated 332062306a36Sopenharmony_ci * inodes are bound to the session. Just return -EJUKEBOX and 332162306a36Sopenharmony_ci * let the caller retry a sync request in that case. 332262306a36Sopenharmony_ci */ 332362306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) { 332462306a36Sopenharmony_ci err = -EJUKEBOX; 332562306a36Sopenharmony_ci goto out_session; 332662306a36Sopenharmony_ci } 332762306a36Sopenharmony_ci 332862306a36Sopenharmony_ci /* 332962306a36Sopenharmony_ci * If the session has been REJECTED, then return a hard error, 333062306a36Sopenharmony_ci * unless it's a CLEANRECOVER mount, in which case we'll queue 333162306a36Sopenharmony_ci * it to the mdsc queue. 333262306a36Sopenharmony_ci */ 333362306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_REJECTED) { 333462306a36Sopenharmony_ci if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER)) 333562306a36Sopenharmony_ci list_add(&req->r_wait, &mdsc->waiting_for_map); 333662306a36Sopenharmony_ci else 333762306a36Sopenharmony_ci err = -EACCES; 333862306a36Sopenharmony_ci goto out_session; 333962306a36Sopenharmony_ci } 334062306a36Sopenharmony_ci 334162306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_NEW || 334262306a36Sopenharmony_ci session->s_state == CEPH_MDS_SESSION_CLOSING) { 334362306a36Sopenharmony_ci err = __open_session(mdsc, session); 334462306a36Sopenharmony_ci if (err) 334562306a36Sopenharmony_ci goto out_session; 334662306a36Sopenharmony_ci /* retry the same mds later */ 334762306a36Sopenharmony_ci if (random) 334862306a36Sopenharmony_ci req->r_resend_mds = mds; 334962306a36Sopenharmony_ci } 335062306a36Sopenharmony_ci list_add(&req->r_wait, &session->s_waiting); 335162306a36Sopenharmony_ci goto out_session; 335262306a36Sopenharmony_ci } 335362306a36Sopenharmony_ci 335462306a36Sopenharmony_ci /* send request */ 335562306a36Sopenharmony_ci req->r_resend_mds = -1; /* forget any previous mds hint */ 335662306a36Sopenharmony_ci 335762306a36Sopenharmony_ci if (req->r_request_started == 0) /* note request start time */ 335862306a36Sopenharmony_ci req->r_request_started = jiffies; 335962306a36Sopenharmony_ci 336062306a36Sopenharmony_ci /* 336162306a36Sopenharmony_ci * For async create we will choose the auth MDS of frag in parent 336262306a36Sopenharmony_ci * directory to send the request and ususally this works fine, but 336362306a36Sopenharmony_ci * if the migrated the dirtory to another MDS before it could handle 336462306a36Sopenharmony_ci * it the request will be forwarded. 336562306a36Sopenharmony_ci * 336662306a36Sopenharmony_ci * And then the auth cap will be changed. 336762306a36Sopenharmony_ci */ 336862306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags) && req->r_num_fwd) { 336962306a36Sopenharmony_ci struct ceph_dentry_info *di = ceph_dentry(req->r_dentry); 337062306a36Sopenharmony_ci struct ceph_inode_info *ci; 337162306a36Sopenharmony_ci struct ceph_cap *cap; 337262306a36Sopenharmony_ci 337362306a36Sopenharmony_ci /* 337462306a36Sopenharmony_ci * The request maybe handled very fast and the new inode 337562306a36Sopenharmony_ci * hasn't been linked to the dentry yet. We need to wait 337662306a36Sopenharmony_ci * for the ceph_finish_async_create(), which shouldn't be 337762306a36Sopenharmony_ci * stuck too long or fail in thoery, to finish when forwarding 337862306a36Sopenharmony_ci * the request. 337962306a36Sopenharmony_ci */ 338062306a36Sopenharmony_ci if (!d_inode(req->r_dentry)) { 338162306a36Sopenharmony_ci err = wait_on_bit(&di->flags, CEPH_DENTRY_ASYNC_CREATE_BIT, 338262306a36Sopenharmony_ci TASK_KILLABLE); 338362306a36Sopenharmony_ci if (err) { 338462306a36Sopenharmony_ci mutex_lock(&req->r_fill_mutex); 338562306a36Sopenharmony_ci set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); 338662306a36Sopenharmony_ci mutex_unlock(&req->r_fill_mutex); 338762306a36Sopenharmony_ci goto out_session; 338862306a36Sopenharmony_ci } 338962306a36Sopenharmony_ci } 339062306a36Sopenharmony_ci 339162306a36Sopenharmony_ci ci = ceph_inode(d_inode(req->r_dentry)); 339262306a36Sopenharmony_ci 339362306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 339462306a36Sopenharmony_ci cap = ci->i_auth_cap; 339562306a36Sopenharmony_ci if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE && mds != cap->mds) { 339662306a36Sopenharmony_ci dout("do_request session changed for auth cap %d -> %d\n", 339762306a36Sopenharmony_ci cap->session->s_mds, session->s_mds); 339862306a36Sopenharmony_ci 339962306a36Sopenharmony_ci /* Remove the auth cap from old session */ 340062306a36Sopenharmony_ci spin_lock(&cap->session->s_cap_lock); 340162306a36Sopenharmony_ci cap->session->s_nr_caps--; 340262306a36Sopenharmony_ci list_del_init(&cap->session_caps); 340362306a36Sopenharmony_ci spin_unlock(&cap->session->s_cap_lock); 340462306a36Sopenharmony_ci 340562306a36Sopenharmony_ci /* Add the auth cap to the new session */ 340662306a36Sopenharmony_ci cap->mds = mds; 340762306a36Sopenharmony_ci cap->session = session; 340862306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 340962306a36Sopenharmony_ci session->s_nr_caps++; 341062306a36Sopenharmony_ci list_add_tail(&cap->session_caps, &session->s_caps); 341162306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 341262306a36Sopenharmony_ci 341362306a36Sopenharmony_ci change_auth_cap_ses(ci, session); 341462306a36Sopenharmony_ci } 341562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 341662306a36Sopenharmony_ci } 341762306a36Sopenharmony_ci 341862306a36Sopenharmony_ci err = __send_request(session, req, false); 341962306a36Sopenharmony_ci 342062306a36Sopenharmony_ciout_session: 342162306a36Sopenharmony_ci ceph_put_mds_session(session); 342262306a36Sopenharmony_cifinish: 342362306a36Sopenharmony_ci if (err) { 342462306a36Sopenharmony_ci dout("__do_request early error %d\n", err); 342562306a36Sopenharmony_ci req->r_err = err; 342662306a36Sopenharmony_ci complete_request(mdsc, req); 342762306a36Sopenharmony_ci __unregister_request(mdsc, req); 342862306a36Sopenharmony_ci } 342962306a36Sopenharmony_ci return; 343062306a36Sopenharmony_ci} 343162306a36Sopenharmony_ci 343262306a36Sopenharmony_ci/* 343362306a36Sopenharmony_ci * called under mdsc->mutex 343462306a36Sopenharmony_ci */ 343562306a36Sopenharmony_cistatic void __wake_requests(struct ceph_mds_client *mdsc, 343662306a36Sopenharmony_ci struct list_head *head) 343762306a36Sopenharmony_ci{ 343862306a36Sopenharmony_ci struct ceph_mds_request *req; 343962306a36Sopenharmony_ci LIST_HEAD(tmp_list); 344062306a36Sopenharmony_ci 344162306a36Sopenharmony_ci list_splice_init(head, &tmp_list); 344262306a36Sopenharmony_ci 344362306a36Sopenharmony_ci while (!list_empty(&tmp_list)) { 344462306a36Sopenharmony_ci req = list_entry(tmp_list.next, 344562306a36Sopenharmony_ci struct ceph_mds_request, r_wait); 344662306a36Sopenharmony_ci list_del_init(&req->r_wait); 344762306a36Sopenharmony_ci dout(" wake request %p tid %llu\n", req, req->r_tid); 344862306a36Sopenharmony_ci __do_request(mdsc, req); 344962306a36Sopenharmony_ci } 345062306a36Sopenharmony_ci} 345162306a36Sopenharmony_ci 345262306a36Sopenharmony_ci/* 345362306a36Sopenharmony_ci * Wake up threads with requests pending for @mds, so that they can 345462306a36Sopenharmony_ci * resubmit their requests to a possibly different mds. 345562306a36Sopenharmony_ci */ 345662306a36Sopenharmony_cistatic void kick_requests(struct ceph_mds_client *mdsc, int mds) 345762306a36Sopenharmony_ci{ 345862306a36Sopenharmony_ci struct ceph_mds_request *req; 345962306a36Sopenharmony_ci struct rb_node *p = rb_first(&mdsc->request_tree); 346062306a36Sopenharmony_ci 346162306a36Sopenharmony_ci dout("kick_requests mds%d\n", mds); 346262306a36Sopenharmony_ci while (p) { 346362306a36Sopenharmony_ci req = rb_entry(p, struct ceph_mds_request, r_node); 346462306a36Sopenharmony_ci p = rb_next(p); 346562306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) 346662306a36Sopenharmony_ci continue; 346762306a36Sopenharmony_ci if (req->r_attempts > 0) 346862306a36Sopenharmony_ci continue; /* only new requests */ 346962306a36Sopenharmony_ci if (req->r_session && 347062306a36Sopenharmony_ci req->r_session->s_mds == mds) { 347162306a36Sopenharmony_ci dout(" kicking tid %llu\n", req->r_tid); 347262306a36Sopenharmony_ci list_del_init(&req->r_wait); 347362306a36Sopenharmony_ci __do_request(mdsc, req); 347462306a36Sopenharmony_ci } 347562306a36Sopenharmony_ci } 347662306a36Sopenharmony_ci} 347762306a36Sopenharmony_ci 347862306a36Sopenharmony_ciint ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, 347962306a36Sopenharmony_ci struct ceph_mds_request *req) 348062306a36Sopenharmony_ci{ 348162306a36Sopenharmony_ci int err = 0; 348262306a36Sopenharmony_ci 348362306a36Sopenharmony_ci /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ 348462306a36Sopenharmony_ci if (req->r_inode) 348562306a36Sopenharmony_ci ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 348662306a36Sopenharmony_ci if (req->r_parent) { 348762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(req->r_parent); 348862306a36Sopenharmony_ci int fmode = (req->r_op & CEPH_MDS_OP_WRITE) ? 348962306a36Sopenharmony_ci CEPH_FILE_MODE_WR : CEPH_FILE_MODE_RD; 349062306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 349162306a36Sopenharmony_ci ceph_take_cap_refs(ci, CEPH_CAP_PIN, false); 349262306a36Sopenharmony_ci __ceph_touch_fmode(ci, mdsc, fmode); 349362306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 349462306a36Sopenharmony_ci } 349562306a36Sopenharmony_ci if (req->r_old_dentry_dir) 349662306a36Sopenharmony_ci ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), 349762306a36Sopenharmony_ci CEPH_CAP_PIN); 349862306a36Sopenharmony_ci 349962306a36Sopenharmony_ci if (req->r_inode) { 350062306a36Sopenharmony_ci err = ceph_wait_on_async_create(req->r_inode); 350162306a36Sopenharmony_ci if (err) { 350262306a36Sopenharmony_ci dout("%s: wait for async create returned: %d\n", 350362306a36Sopenharmony_ci __func__, err); 350462306a36Sopenharmony_ci return err; 350562306a36Sopenharmony_ci } 350662306a36Sopenharmony_ci } 350762306a36Sopenharmony_ci 350862306a36Sopenharmony_ci if (!err && req->r_old_inode) { 350962306a36Sopenharmony_ci err = ceph_wait_on_async_create(req->r_old_inode); 351062306a36Sopenharmony_ci if (err) { 351162306a36Sopenharmony_ci dout("%s: wait for async create returned: %d\n", 351262306a36Sopenharmony_ci __func__, err); 351362306a36Sopenharmony_ci return err; 351462306a36Sopenharmony_ci } 351562306a36Sopenharmony_ci } 351662306a36Sopenharmony_ci 351762306a36Sopenharmony_ci dout("submit_request on %p for inode %p\n", req, dir); 351862306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 351962306a36Sopenharmony_ci __register_request(mdsc, req, dir); 352062306a36Sopenharmony_ci __do_request(mdsc, req); 352162306a36Sopenharmony_ci err = req->r_err; 352262306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 352362306a36Sopenharmony_ci return err; 352462306a36Sopenharmony_ci} 352562306a36Sopenharmony_ci 352662306a36Sopenharmony_ciint ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, 352762306a36Sopenharmony_ci struct ceph_mds_request *req, 352862306a36Sopenharmony_ci ceph_mds_request_wait_callback_t wait_func) 352962306a36Sopenharmony_ci{ 353062306a36Sopenharmony_ci int err; 353162306a36Sopenharmony_ci 353262306a36Sopenharmony_ci /* wait */ 353362306a36Sopenharmony_ci dout("do_request waiting\n"); 353462306a36Sopenharmony_ci if (wait_func) { 353562306a36Sopenharmony_ci err = wait_func(mdsc, req); 353662306a36Sopenharmony_ci } else { 353762306a36Sopenharmony_ci long timeleft = wait_for_completion_killable_timeout( 353862306a36Sopenharmony_ci &req->r_completion, 353962306a36Sopenharmony_ci ceph_timeout_jiffies(req->r_timeout)); 354062306a36Sopenharmony_ci if (timeleft > 0) 354162306a36Sopenharmony_ci err = 0; 354262306a36Sopenharmony_ci else if (!timeleft) 354362306a36Sopenharmony_ci err = -ETIMEDOUT; /* timed out */ 354462306a36Sopenharmony_ci else 354562306a36Sopenharmony_ci err = timeleft; /* killed */ 354662306a36Sopenharmony_ci } 354762306a36Sopenharmony_ci dout("do_request waited, got %d\n", err); 354862306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 354962306a36Sopenharmony_ci 355062306a36Sopenharmony_ci /* only abort if we didn't race with a real reply */ 355162306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { 355262306a36Sopenharmony_ci err = le32_to_cpu(req->r_reply_info.head->result); 355362306a36Sopenharmony_ci } else if (err < 0) { 355462306a36Sopenharmony_ci dout("aborted request %lld with %d\n", req->r_tid, err); 355562306a36Sopenharmony_ci 355662306a36Sopenharmony_ci /* 355762306a36Sopenharmony_ci * ensure we aren't running concurrently with 355862306a36Sopenharmony_ci * ceph_fill_trace or ceph_readdir_prepopulate, which 355962306a36Sopenharmony_ci * rely on locks (dir mutex) held by our caller. 356062306a36Sopenharmony_ci */ 356162306a36Sopenharmony_ci mutex_lock(&req->r_fill_mutex); 356262306a36Sopenharmony_ci req->r_err = err; 356362306a36Sopenharmony_ci set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); 356462306a36Sopenharmony_ci mutex_unlock(&req->r_fill_mutex); 356562306a36Sopenharmony_ci 356662306a36Sopenharmony_ci if (req->r_parent && 356762306a36Sopenharmony_ci (req->r_op & CEPH_MDS_OP_WRITE)) 356862306a36Sopenharmony_ci ceph_invalidate_dir_request(req); 356962306a36Sopenharmony_ci } else { 357062306a36Sopenharmony_ci err = req->r_err; 357162306a36Sopenharmony_ci } 357262306a36Sopenharmony_ci 357362306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 357462306a36Sopenharmony_ci return err; 357562306a36Sopenharmony_ci} 357662306a36Sopenharmony_ci 357762306a36Sopenharmony_ci/* 357862306a36Sopenharmony_ci * Synchrously perform an mds request. Take care of all of the 357962306a36Sopenharmony_ci * session setup, forwarding, retry details. 358062306a36Sopenharmony_ci */ 358162306a36Sopenharmony_ciint ceph_mdsc_do_request(struct ceph_mds_client *mdsc, 358262306a36Sopenharmony_ci struct inode *dir, 358362306a36Sopenharmony_ci struct ceph_mds_request *req) 358462306a36Sopenharmony_ci{ 358562306a36Sopenharmony_ci int err; 358662306a36Sopenharmony_ci 358762306a36Sopenharmony_ci dout("do_request on %p\n", req); 358862306a36Sopenharmony_ci 358962306a36Sopenharmony_ci /* issue */ 359062306a36Sopenharmony_ci err = ceph_mdsc_submit_request(mdsc, dir, req); 359162306a36Sopenharmony_ci if (!err) 359262306a36Sopenharmony_ci err = ceph_mdsc_wait_request(mdsc, req, NULL); 359362306a36Sopenharmony_ci dout("do_request %p done, result %d\n", req, err); 359462306a36Sopenharmony_ci return err; 359562306a36Sopenharmony_ci} 359662306a36Sopenharmony_ci 359762306a36Sopenharmony_ci/* 359862306a36Sopenharmony_ci * Invalidate dir's completeness, dentry lease state on an aborted MDS 359962306a36Sopenharmony_ci * namespace request. 360062306a36Sopenharmony_ci */ 360162306a36Sopenharmony_civoid ceph_invalidate_dir_request(struct ceph_mds_request *req) 360262306a36Sopenharmony_ci{ 360362306a36Sopenharmony_ci struct inode *dir = req->r_parent; 360462306a36Sopenharmony_ci struct inode *old_dir = req->r_old_dentry_dir; 360562306a36Sopenharmony_ci 360662306a36Sopenharmony_ci dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir); 360762306a36Sopenharmony_ci 360862306a36Sopenharmony_ci ceph_dir_clear_complete(dir); 360962306a36Sopenharmony_ci if (old_dir) 361062306a36Sopenharmony_ci ceph_dir_clear_complete(old_dir); 361162306a36Sopenharmony_ci if (req->r_dentry) 361262306a36Sopenharmony_ci ceph_invalidate_dentry_lease(req->r_dentry); 361362306a36Sopenharmony_ci if (req->r_old_dentry) 361462306a36Sopenharmony_ci ceph_invalidate_dentry_lease(req->r_old_dentry); 361562306a36Sopenharmony_ci} 361662306a36Sopenharmony_ci 361762306a36Sopenharmony_ci/* 361862306a36Sopenharmony_ci * Handle mds reply. 361962306a36Sopenharmony_ci * 362062306a36Sopenharmony_ci * We take the session mutex and parse and process the reply immediately. 362162306a36Sopenharmony_ci * This preserves the logical ordering of replies, capabilities, etc., sent 362262306a36Sopenharmony_ci * by the MDS as they are applied to our local cache. 362362306a36Sopenharmony_ci */ 362462306a36Sopenharmony_cistatic void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) 362562306a36Sopenharmony_ci{ 362662306a36Sopenharmony_ci struct ceph_mds_client *mdsc = session->s_mdsc; 362762306a36Sopenharmony_ci struct ceph_mds_request *req; 362862306a36Sopenharmony_ci struct ceph_mds_reply_head *head = msg->front.iov_base; 362962306a36Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ 363062306a36Sopenharmony_ci struct ceph_snap_realm *realm; 363162306a36Sopenharmony_ci u64 tid; 363262306a36Sopenharmony_ci int err, result; 363362306a36Sopenharmony_ci int mds = session->s_mds; 363462306a36Sopenharmony_ci bool close_sessions = false; 363562306a36Sopenharmony_ci 363662306a36Sopenharmony_ci if (msg->front.iov_len < sizeof(*head)) { 363762306a36Sopenharmony_ci pr_err("mdsc_handle_reply got corrupt (short) reply\n"); 363862306a36Sopenharmony_ci ceph_msg_dump(msg); 363962306a36Sopenharmony_ci return; 364062306a36Sopenharmony_ci } 364162306a36Sopenharmony_ci 364262306a36Sopenharmony_ci /* get request, session */ 364362306a36Sopenharmony_ci tid = le64_to_cpu(msg->hdr.tid); 364462306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 364562306a36Sopenharmony_ci req = lookup_get_request(mdsc, tid); 364662306a36Sopenharmony_ci if (!req) { 364762306a36Sopenharmony_ci dout("handle_reply on unknown tid %llu\n", tid); 364862306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 364962306a36Sopenharmony_ci return; 365062306a36Sopenharmony_ci } 365162306a36Sopenharmony_ci dout("handle_reply %p\n", req); 365262306a36Sopenharmony_ci 365362306a36Sopenharmony_ci /* correct session? */ 365462306a36Sopenharmony_ci if (req->r_session != session) { 365562306a36Sopenharmony_ci pr_err("mdsc_handle_reply got %llu on session mds%d" 365662306a36Sopenharmony_ci " not mds%d\n", tid, session->s_mds, 365762306a36Sopenharmony_ci req->r_session ? req->r_session->s_mds : -1); 365862306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 365962306a36Sopenharmony_ci goto out; 366062306a36Sopenharmony_ci } 366162306a36Sopenharmony_ci 366262306a36Sopenharmony_ci /* dup? */ 366362306a36Sopenharmony_ci if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) || 366462306a36Sopenharmony_ci (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) { 366562306a36Sopenharmony_ci pr_warn("got a dup %s reply on %llu from mds%d\n", 366662306a36Sopenharmony_ci head->safe ? "safe" : "unsafe", tid, mds); 366762306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 366862306a36Sopenharmony_ci goto out; 366962306a36Sopenharmony_ci } 367062306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) { 367162306a36Sopenharmony_ci pr_warn("got unsafe after safe on %llu from mds%d\n", 367262306a36Sopenharmony_ci tid, mds); 367362306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 367462306a36Sopenharmony_ci goto out; 367562306a36Sopenharmony_ci } 367662306a36Sopenharmony_ci 367762306a36Sopenharmony_ci result = le32_to_cpu(head->result); 367862306a36Sopenharmony_ci 367962306a36Sopenharmony_ci if (head->safe) { 368062306a36Sopenharmony_ci set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags); 368162306a36Sopenharmony_ci __unregister_request(mdsc, req); 368262306a36Sopenharmony_ci 368362306a36Sopenharmony_ci /* last request during umount? */ 368462306a36Sopenharmony_ci if (mdsc->stopping && !__get_oldest_req(mdsc)) 368562306a36Sopenharmony_ci complete_all(&mdsc->safe_umount_waiters); 368662306a36Sopenharmony_ci 368762306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 368862306a36Sopenharmony_ci /* 368962306a36Sopenharmony_ci * We already handled the unsafe response, now do the 369062306a36Sopenharmony_ci * cleanup. No need to examine the response; the MDS 369162306a36Sopenharmony_ci * doesn't include any result info in the safe 369262306a36Sopenharmony_ci * response. And even if it did, there is nothing 369362306a36Sopenharmony_ci * useful we could do with a revised return value. 369462306a36Sopenharmony_ci */ 369562306a36Sopenharmony_ci dout("got safe reply %llu, mds%d\n", tid, mds); 369662306a36Sopenharmony_ci 369762306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 369862306a36Sopenharmony_ci goto out; 369962306a36Sopenharmony_ci } 370062306a36Sopenharmony_ci } else { 370162306a36Sopenharmony_ci set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags); 370262306a36Sopenharmony_ci list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); 370362306a36Sopenharmony_ci } 370462306a36Sopenharmony_ci 370562306a36Sopenharmony_ci dout("handle_reply tid %lld result %d\n", tid, result); 370662306a36Sopenharmony_ci if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features)) 370762306a36Sopenharmony_ci err = parse_reply_info(session, msg, req, (u64)-1); 370862306a36Sopenharmony_ci else 370962306a36Sopenharmony_ci err = parse_reply_info(session, msg, req, 371062306a36Sopenharmony_ci session->s_con.peer_features); 371162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 371262306a36Sopenharmony_ci 371362306a36Sopenharmony_ci /* Must find target inode outside of mutexes to avoid deadlocks */ 371462306a36Sopenharmony_ci rinfo = &req->r_reply_info; 371562306a36Sopenharmony_ci if ((err >= 0) && rinfo->head->is_target) { 371662306a36Sopenharmony_ci struct inode *in = xchg(&req->r_new_inode, NULL); 371762306a36Sopenharmony_ci struct ceph_vino tvino = { 371862306a36Sopenharmony_ci .ino = le64_to_cpu(rinfo->targeti.in->ino), 371962306a36Sopenharmony_ci .snap = le64_to_cpu(rinfo->targeti.in->snapid) 372062306a36Sopenharmony_ci }; 372162306a36Sopenharmony_ci 372262306a36Sopenharmony_ci /* 372362306a36Sopenharmony_ci * If we ended up opening an existing inode, discard 372462306a36Sopenharmony_ci * r_new_inode 372562306a36Sopenharmony_ci */ 372662306a36Sopenharmony_ci if (req->r_op == CEPH_MDS_OP_CREATE && 372762306a36Sopenharmony_ci !req->r_reply_info.has_create_ino) { 372862306a36Sopenharmony_ci /* This should never happen on an async create */ 372962306a36Sopenharmony_ci WARN_ON_ONCE(req->r_deleg_ino); 373062306a36Sopenharmony_ci iput(in); 373162306a36Sopenharmony_ci in = NULL; 373262306a36Sopenharmony_ci } 373362306a36Sopenharmony_ci 373462306a36Sopenharmony_ci in = ceph_get_inode(mdsc->fsc->sb, tvino, in); 373562306a36Sopenharmony_ci if (IS_ERR(in)) { 373662306a36Sopenharmony_ci err = PTR_ERR(in); 373762306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 373862306a36Sopenharmony_ci goto out_err; 373962306a36Sopenharmony_ci } 374062306a36Sopenharmony_ci req->r_target_inode = in; 374162306a36Sopenharmony_ci } 374262306a36Sopenharmony_ci 374362306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 374462306a36Sopenharmony_ci if (err < 0) { 374562306a36Sopenharmony_ci pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); 374662306a36Sopenharmony_ci ceph_msg_dump(msg); 374762306a36Sopenharmony_ci goto out_err; 374862306a36Sopenharmony_ci } 374962306a36Sopenharmony_ci 375062306a36Sopenharmony_ci /* snap trace */ 375162306a36Sopenharmony_ci realm = NULL; 375262306a36Sopenharmony_ci if (rinfo->snapblob_len) { 375362306a36Sopenharmony_ci down_write(&mdsc->snap_rwsem); 375462306a36Sopenharmony_ci err = ceph_update_snap_trace(mdsc, rinfo->snapblob, 375562306a36Sopenharmony_ci rinfo->snapblob + rinfo->snapblob_len, 375662306a36Sopenharmony_ci le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP, 375762306a36Sopenharmony_ci &realm); 375862306a36Sopenharmony_ci if (err) { 375962306a36Sopenharmony_ci up_write(&mdsc->snap_rwsem); 376062306a36Sopenharmony_ci close_sessions = true; 376162306a36Sopenharmony_ci if (err == -EIO) 376262306a36Sopenharmony_ci ceph_msg_dump(msg); 376362306a36Sopenharmony_ci goto out_err; 376462306a36Sopenharmony_ci } 376562306a36Sopenharmony_ci downgrade_write(&mdsc->snap_rwsem); 376662306a36Sopenharmony_ci } else { 376762306a36Sopenharmony_ci down_read(&mdsc->snap_rwsem); 376862306a36Sopenharmony_ci } 376962306a36Sopenharmony_ci 377062306a36Sopenharmony_ci /* insert trace into our cache */ 377162306a36Sopenharmony_ci mutex_lock(&req->r_fill_mutex); 377262306a36Sopenharmony_ci current->journal_info = req; 377362306a36Sopenharmony_ci err = ceph_fill_trace(mdsc->fsc->sb, req); 377462306a36Sopenharmony_ci if (err == 0) { 377562306a36Sopenharmony_ci if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || 377662306a36Sopenharmony_ci req->r_op == CEPH_MDS_OP_LSSNAP)) 377762306a36Sopenharmony_ci err = ceph_readdir_prepopulate(req, req->r_session); 377862306a36Sopenharmony_ci } 377962306a36Sopenharmony_ci current->journal_info = NULL; 378062306a36Sopenharmony_ci mutex_unlock(&req->r_fill_mutex); 378162306a36Sopenharmony_ci 378262306a36Sopenharmony_ci up_read(&mdsc->snap_rwsem); 378362306a36Sopenharmony_ci if (realm) 378462306a36Sopenharmony_ci ceph_put_snap_realm(mdsc, realm); 378562306a36Sopenharmony_ci 378662306a36Sopenharmony_ci if (err == 0) { 378762306a36Sopenharmony_ci if (req->r_target_inode && 378862306a36Sopenharmony_ci test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 378962306a36Sopenharmony_ci struct ceph_inode_info *ci = 379062306a36Sopenharmony_ci ceph_inode(req->r_target_inode); 379162306a36Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 379262306a36Sopenharmony_ci list_add_tail(&req->r_unsafe_target_item, 379362306a36Sopenharmony_ci &ci->i_unsafe_iops); 379462306a36Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 379562306a36Sopenharmony_ci } 379662306a36Sopenharmony_ci 379762306a36Sopenharmony_ci ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 379862306a36Sopenharmony_ci } 379962306a36Sopenharmony_ciout_err: 380062306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 380162306a36Sopenharmony_ci if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 380262306a36Sopenharmony_ci if (err) { 380362306a36Sopenharmony_ci req->r_err = err; 380462306a36Sopenharmony_ci } else { 380562306a36Sopenharmony_ci req->r_reply = ceph_msg_get(msg); 380662306a36Sopenharmony_ci set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags); 380762306a36Sopenharmony_ci } 380862306a36Sopenharmony_ci } else { 380962306a36Sopenharmony_ci dout("reply arrived after request %lld was aborted\n", tid); 381062306a36Sopenharmony_ci } 381162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 381262306a36Sopenharmony_ci 381362306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 381462306a36Sopenharmony_ci 381562306a36Sopenharmony_ci /* kick calling process */ 381662306a36Sopenharmony_ci complete_request(mdsc, req); 381762306a36Sopenharmony_ci 381862306a36Sopenharmony_ci ceph_update_metadata_metrics(&mdsc->metric, req->r_start_latency, 381962306a36Sopenharmony_ci req->r_end_latency, err); 382062306a36Sopenharmony_ciout: 382162306a36Sopenharmony_ci ceph_mdsc_put_request(req); 382262306a36Sopenharmony_ci 382362306a36Sopenharmony_ci /* Defer closing the sessions after s_mutex lock being released */ 382462306a36Sopenharmony_ci if (close_sessions) 382562306a36Sopenharmony_ci ceph_mdsc_close_sessions(mdsc); 382662306a36Sopenharmony_ci return; 382762306a36Sopenharmony_ci} 382862306a36Sopenharmony_ci 382962306a36Sopenharmony_ci 383062306a36Sopenharmony_ci 383162306a36Sopenharmony_ci/* 383262306a36Sopenharmony_ci * handle mds notification that our request has been forwarded. 383362306a36Sopenharmony_ci */ 383462306a36Sopenharmony_cistatic void handle_forward(struct ceph_mds_client *mdsc, 383562306a36Sopenharmony_ci struct ceph_mds_session *session, 383662306a36Sopenharmony_ci struct ceph_msg *msg) 383762306a36Sopenharmony_ci{ 383862306a36Sopenharmony_ci struct ceph_mds_request *req; 383962306a36Sopenharmony_ci u64 tid = le64_to_cpu(msg->hdr.tid); 384062306a36Sopenharmony_ci u32 next_mds; 384162306a36Sopenharmony_ci u32 fwd_seq; 384262306a36Sopenharmony_ci int err = -EINVAL; 384362306a36Sopenharmony_ci void *p = msg->front.iov_base; 384462306a36Sopenharmony_ci void *end = p + msg->front.iov_len; 384562306a36Sopenharmony_ci bool aborted = false; 384662306a36Sopenharmony_ci 384762306a36Sopenharmony_ci ceph_decode_need(&p, end, 2*sizeof(u32), bad); 384862306a36Sopenharmony_ci next_mds = ceph_decode_32(&p); 384962306a36Sopenharmony_ci fwd_seq = ceph_decode_32(&p); 385062306a36Sopenharmony_ci 385162306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 385262306a36Sopenharmony_ci req = lookup_get_request(mdsc, tid); 385362306a36Sopenharmony_ci if (!req) { 385462306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 385562306a36Sopenharmony_ci dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); 385662306a36Sopenharmony_ci return; /* dup reply? */ 385762306a36Sopenharmony_ci } 385862306a36Sopenharmony_ci 385962306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 386062306a36Sopenharmony_ci dout("forward tid %llu aborted, unregistering\n", tid); 386162306a36Sopenharmony_ci __unregister_request(mdsc, req); 386262306a36Sopenharmony_ci } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) { 386362306a36Sopenharmony_ci /* 386462306a36Sopenharmony_ci * Avoid inifinite retrying after overflow. 386562306a36Sopenharmony_ci * 386662306a36Sopenharmony_ci * The MDS will increase the fwd count and in client side 386762306a36Sopenharmony_ci * if the num_fwd is less than the one saved in request 386862306a36Sopenharmony_ci * that means the MDS is an old version and overflowed of 386962306a36Sopenharmony_ci * 8 bits. 387062306a36Sopenharmony_ci */ 387162306a36Sopenharmony_ci mutex_lock(&req->r_fill_mutex); 387262306a36Sopenharmony_ci req->r_err = -EMULTIHOP; 387362306a36Sopenharmony_ci set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); 387462306a36Sopenharmony_ci mutex_unlock(&req->r_fill_mutex); 387562306a36Sopenharmony_ci aborted = true; 387662306a36Sopenharmony_ci pr_warn_ratelimited("forward tid %llu seq overflow\n", tid); 387762306a36Sopenharmony_ci } else { 387862306a36Sopenharmony_ci /* resend. forward race not possible; mds would drop */ 387962306a36Sopenharmony_ci dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); 388062306a36Sopenharmony_ci BUG_ON(req->r_err); 388162306a36Sopenharmony_ci BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)); 388262306a36Sopenharmony_ci req->r_attempts = 0; 388362306a36Sopenharmony_ci req->r_num_fwd = fwd_seq; 388462306a36Sopenharmony_ci req->r_resend_mds = next_mds; 388562306a36Sopenharmony_ci put_request_session(req); 388662306a36Sopenharmony_ci __do_request(mdsc, req); 388762306a36Sopenharmony_ci } 388862306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 388962306a36Sopenharmony_ci 389062306a36Sopenharmony_ci /* kick calling process */ 389162306a36Sopenharmony_ci if (aborted) 389262306a36Sopenharmony_ci complete_request(mdsc, req); 389362306a36Sopenharmony_ci ceph_mdsc_put_request(req); 389462306a36Sopenharmony_ci return; 389562306a36Sopenharmony_ci 389662306a36Sopenharmony_cibad: 389762306a36Sopenharmony_ci pr_err("mdsc_handle_forward decode error err=%d\n", err); 389862306a36Sopenharmony_ci ceph_msg_dump(msg); 389962306a36Sopenharmony_ci} 390062306a36Sopenharmony_ci 390162306a36Sopenharmony_cistatic int __decode_session_metadata(void **p, void *end, 390262306a36Sopenharmony_ci bool *blocklisted) 390362306a36Sopenharmony_ci{ 390462306a36Sopenharmony_ci /* map<string,string> */ 390562306a36Sopenharmony_ci u32 n; 390662306a36Sopenharmony_ci bool err_str; 390762306a36Sopenharmony_ci ceph_decode_32_safe(p, end, n, bad); 390862306a36Sopenharmony_ci while (n-- > 0) { 390962306a36Sopenharmony_ci u32 len; 391062306a36Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 391162306a36Sopenharmony_ci ceph_decode_need(p, end, len, bad); 391262306a36Sopenharmony_ci err_str = !strncmp(*p, "error_string", len); 391362306a36Sopenharmony_ci *p += len; 391462306a36Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 391562306a36Sopenharmony_ci ceph_decode_need(p, end, len, bad); 391662306a36Sopenharmony_ci /* 391762306a36Sopenharmony_ci * Match "blocklisted (blacklisted)" from newer MDSes, 391862306a36Sopenharmony_ci * or "blacklisted" from older MDSes. 391962306a36Sopenharmony_ci */ 392062306a36Sopenharmony_ci if (err_str && strnstr(*p, "blacklisted", len)) 392162306a36Sopenharmony_ci *blocklisted = true; 392262306a36Sopenharmony_ci *p += len; 392362306a36Sopenharmony_ci } 392462306a36Sopenharmony_ci return 0; 392562306a36Sopenharmony_cibad: 392662306a36Sopenharmony_ci return -1; 392762306a36Sopenharmony_ci} 392862306a36Sopenharmony_ci 392962306a36Sopenharmony_ci/* 393062306a36Sopenharmony_ci * handle a mds session control message 393162306a36Sopenharmony_ci */ 393262306a36Sopenharmony_cistatic void handle_session(struct ceph_mds_session *session, 393362306a36Sopenharmony_ci struct ceph_msg *msg) 393462306a36Sopenharmony_ci{ 393562306a36Sopenharmony_ci struct ceph_mds_client *mdsc = session->s_mdsc; 393662306a36Sopenharmony_ci int mds = session->s_mds; 393762306a36Sopenharmony_ci int msg_version = le16_to_cpu(msg->hdr.version); 393862306a36Sopenharmony_ci void *p = msg->front.iov_base; 393962306a36Sopenharmony_ci void *end = p + msg->front.iov_len; 394062306a36Sopenharmony_ci struct ceph_mds_session_head *h; 394162306a36Sopenharmony_ci u32 op; 394262306a36Sopenharmony_ci u64 seq, features = 0; 394362306a36Sopenharmony_ci int wake = 0; 394462306a36Sopenharmony_ci bool blocklisted = false; 394562306a36Sopenharmony_ci 394662306a36Sopenharmony_ci /* decode */ 394762306a36Sopenharmony_ci ceph_decode_need(&p, end, sizeof(*h), bad); 394862306a36Sopenharmony_ci h = p; 394962306a36Sopenharmony_ci p += sizeof(*h); 395062306a36Sopenharmony_ci 395162306a36Sopenharmony_ci op = le32_to_cpu(h->op); 395262306a36Sopenharmony_ci seq = le64_to_cpu(h->seq); 395362306a36Sopenharmony_ci 395462306a36Sopenharmony_ci if (msg_version >= 3) { 395562306a36Sopenharmony_ci u32 len; 395662306a36Sopenharmony_ci /* version >= 2 and < 5, decode metadata, skip otherwise 395762306a36Sopenharmony_ci * as it's handled via flags. 395862306a36Sopenharmony_ci */ 395962306a36Sopenharmony_ci if (msg_version >= 5) 396062306a36Sopenharmony_ci ceph_decode_skip_map(&p, end, string, string, bad); 396162306a36Sopenharmony_ci else if (__decode_session_metadata(&p, end, &blocklisted) < 0) 396262306a36Sopenharmony_ci goto bad; 396362306a36Sopenharmony_ci 396462306a36Sopenharmony_ci /* version >= 3, feature bits */ 396562306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 396662306a36Sopenharmony_ci if (len) { 396762306a36Sopenharmony_ci ceph_decode_64_safe(&p, end, features, bad); 396862306a36Sopenharmony_ci p += len - sizeof(features); 396962306a36Sopenharmony_ci } 397062306a36Sopenharmony_ci } 397162306a36Sopenharmony_ci 397262306a36Sopenharmony_ci if (msg_version >= 5) { 397362306a36Sopenharmony_ci u32 flags, len; 397462306a36Sopenharmony_ci 397562306a36Sopenharmony_ci /* version >= 4 */ 397662306a36Sopenharmony_ci ceph_decode_skip_16(&p, end, bad); /* struct_v, struct_cv */ 397762306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); /* len */ 397862306a36Sopenharmony_ci ceph_decode_skip_n(&p, end, len, bad); /* metric_spec */ 397962306a36Sopenharmony_ci 398062306a36Sopenharmony_ci /* version >= 5, flags */ 398162306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, flags, bad); 398262306a36Sopenharmony_ci if (flags & CEPH_SESSION_BLOCKLISTED) { 398362306a36Sopenharmony_ci pr_warn("mds%d session blocklisted\n", session->s_mds); 398462306a36Sopenharmony_ci blocklisted = true; 398562306a36Sopenharmony_ci } 398662306a36Sopenharmony_ci } 398762306a36Sopenharmony_ci 398862306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 398962306a36Sopenharmony_ci if (op == CEPH_SESSION_CLOSE) { 399062306a36Sopenharmony_ci ceph_get_mds_session(session); 399162306a36Sopenharmony_ci __unregister_session(mdsc, session); 399262306a36Sopenharmony_ci } 399362306a36Sopenharmony_ci /* FIXME: this ttl calculation is generous */ 399462306a36Sopenharmony_ci session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose; 399562306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 399662306a36Sopenharmony_ci 399762306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 399862306a36Sopenharmony_ci 399962306a36Sopenharmony_ci dout("handle_session mds%d %s %p state %s seq %llu\n", 400062306a36Sopenharmony_ci mds, ceph_session_op_name(op), session, 400162306a36Sopenharmony_ci ceph_session_state_name(session->s_state), seq); 400262306a36Sopenharmony_ci 400362306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_HUNG) { 400462306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_OPEN; 400562306a36Sopenharmony_ci pr_info("mds%d came back\n", session->s_mds); 400662306a36Sopenharmony_ci } 400762306a36Sopenharmony_ci 400862306a36Sopenharmony_ci switch (op) { 400962306a36Sopenharmony_ci case CEPH_SESSION_OPEN: 401062306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 401162306a36Sopenharmony_ci pr_info("mds%d reconnect success\n", session->s_mds); 401262306a36Sopenharmony_ci 401362306a36Sopenharmony_ci session->s_features = features; 401462306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_OPEN) { 401562306a36Sopenharmony_ci pr_notice("mds%d is already opened\n", session->s_mds); 401662306a36Sopenharmony_ci } else { 401762306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_OPEN; 401862306a36Sopenharmony_ci renewed_caps(mdsc, session, 0); 401962306a36Sopenharmony_ci if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, 402062306a36Sopenharmony_ci &session->s_features)) 402162306a36Sopenharmony_ci metric_schedule_delayed(&mdsc->metric); 402262306a36Sopenharmony_ci } 402362306a36Sopenharmony_ci 402462306a36Sopenharmony_ci /* 402562306a36Sopenharmony_ci * The connection maybe broken and the session in client 402662306a36Sopenharmony_ci * side has been reinitialized, need to update the seq 402762306a36Sopenharmony_ci * anyway. 402862306a36Sopenharmony_ci */ 402962306a36Sopenharmony_ci if (!session->s_seq && seq) 403062306a36Sopenharmony_ci session->s_seq = seq; 403162306a36Sopenharmony_ci 403262306a36Sopenharmony_ci wake = 1; 403362306a36Sopenharmony_ci if (mdsc->stopping) 403462306a36Sopenharmony_ci __close_session(mdsc, session); 403562306a36Sopenharmony_ci break; 403662306a36Sopenharmony_ci 403762306a36Sopenharmony_ci case CEPH_SESSION_RENEWCAPS: 403862306a36Sopenharmony_ci if (session->s_renew_seq == seq) 403962306a36Sopenharmony_ci renewed_caps(mdsc, session, 1); 404062306a36Sopenharmony_ci break; 404162306a36Sopenharmony_ci 404262306a36Sopenharmony_ci case CEPH_SESSION_CLOSE: 404362306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 404462306a36Sopenharmony_ci pr_info("mds%d reconnect denied\n", session->s_mds); 404562306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_CLOSED; 404662306a36Sopenharmony_ci cleanup_session_requests(mdsc, session); 404762306a36Sopenharmony_ci remove_session_caps(session); 404862306a36Sopenharmony_ci wake = 2; /* for good measure */ 404962306a36Sopenharmony_ci wake_up_all(&mdsc->session_close_wq); 405062306a36Sopenharmony_ci break; 405162306a36Sopenharmony_ci 405262306a36Sopenharmony_ci case CEPH_SESSION_STALE: 405362306a36Sopenharmony_ci pr_info("mds%d caps went stale, renewing\n", 405462306a36Sopenharmony_ci session->s_mds); 405562306a36Sopenharmony_ci atomic_inc(&session->s_cap_gen); 405662306a36Sopenharmony_ci session->s_cap_ttl = jiffies - 1; 405762306a36Sopenharmony_ci send_renew_caps(mdsc, session); 405862306a36Sopenharmony_ci break; 405962306a36Sopenharmony_ci 406062306a36Sopenharmony_ci case CEPH_SESSION_RECALL_STATE: 406162306a36Sopenharmony_ci ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps)); 406262306a36Sopenharmony_ci break; 406362306a36Sopenharmony_ci 406462306a36Sopenharmony_ci case CEPH_SESSION_FLUSHMSG: 406562306a36Sopenharmony_ci /* flush cap releases */ 406662306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 406762306a36Sopenharmony_ci if (session->s_num_cap_releases) 406862306a36Sopenharmony_ci ceph_flush_cap_releases(mdsc, session); 406962306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 407062306a36Sopenharmony_ci 407162306a36Sopenharmony_ci send_flushmsg_ack(mdsc, session, seq); 407262306a36Sopenharmony_ci break; 407362306a36Sopenharmony_ci 407462306a36Sopenharmony_ci case CEPH_SESSION_FORCE_RO: 407562306a36Sopenharmony_ci dout("force_session_readonly %p\n", session); 407662306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 407762306a36Sopenharmony_ci session->s_readonly = true; 407862306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 407962306a36Sopenharmony_ci wake_up_session_caps(session, FORCE_RO); 408062306a36Sopenharmony_ci break; 408162306a36Sopenharmony_ci 408262306a36Sopenharmony_ci case CEPH_SESSION_REJECT: 408362306a36Sopenharmony_ci WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING); 408462306a36Sopenharmony_ci pr_info("mds%d rejected session\n", session->s_mds); 408562306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_REJECTED; 408662306a36Sopenharmony_ci cleanup_session_requests(mdsc, session); 408762306a36Sopenharmony_ci remove_session_caps(session); 408862306a36Sopenharmony_ci if (blocklisted) 408962306a36Sopenharmony_ci mdsc->fsc->blocklisted = true; 409062306a36Sopenharmony_ci wake = 2; /* for good measure */ 409162306a36Sopenharmony_ci break; 409262306a36Sopenharmony_ci 409362306a36Sopenharmony_ci default: 409462306a36Sopenharmony_ci pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); 409562306a36Sopenharmony_ci WARN_ON(1); 409662306a36Sopenharmony_ci } 409762306a36Sopenharmony_ci 409862306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 409962306a36Sopenharmony_ci if (wake) { 410062306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 410162306a36Sopenharmony_ci __wake_requests(mdsc, &session->s_waiting); 410262306a36Sopenharmony_ci if (wake == 2) 410362306a36Sopenharmony_ci kick_requests(mdsc, mds); 410462306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 410562306a36Sopenharmony_ci } 410662306a36Sopenharmony_ci if (op == CEPH_SESSION_CLOSE) 410762306a36Sopenharmony_ci ceph_put_mds_session(session); 410862306a36Sopenharmony_ci return; 410962306a36Sopenharmony_ci 411062306a36Sopenharmony_cibad: 411162306a36Sopenharmony_ci pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds, 411262306a36Sopenharmony_ci (int)msg->front.iov_len); 411362306a36Sopenharmony_ci ceph_msg_dump(msg); 411462306a36Sopenharmony_ci return; 411562306a36Sopenharmony_ci} 411662306a36Sopenharmony_ci 411762306a36Sopenharmony_civoid ceph_mdsc_release_dir_caps(struct ceph_mds_request *req) 411862306a36Sopenharmony_ci{ 411962306a36Sopenharmony_ci int dcaps; 412062306a36Sopenharmony_ci 412162306a36Sopenharmony_ci dcaps = xchg(&req->r_dir_caps, 0); 412262306a36Sopenharmony_ci if (dcaps) { 412362306a36Sopenharmony_ci dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); 412462306a36Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps); 412562306a36Sopenharmony_ci } 412662306a36Sopenharmony_ci} 412762306a36Sopenharmony_ci 412862306a36Sopenharmony_civoid ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req) 412962306a36Sopenharmony_ci{ 413062306a36Sopenharmony_ci int dcaps; 413162306a36Sopenharmony_ci 413262306a36Sopenharmony_ci dcaps = xchg(&req->r_dir_caps, 0); 413362306a36Sopenharmony_ci if (dcaps) { 413462306a36Sopenharmony_ci dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); 413562306a36Sopenharmony_ci ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent), 413662306a36Sopenharmony_ci dcaps); 413762306a36Sopenharmony_ci } 413862306a36Sopenharmony_ci} 413962306a36Sopenharmony_ci 414062306a36Sopenharmony_ci/* 414162306a36Sopenharmony_ci * called under session->mutex. 414262306a36Sopenharmony_ci */ 414362306a36Sopenharmony_cistatic void replay_unsafe_requests(struct ceph_mds_client *mdsc, 414462306a36Sopenharmony_ci struct ceph_mds_session *session) 414562306a36Sopenharmony_ci{ 414662306a36Sopenharmony_ci struct ceph_mds_request *req, *nreq; 414762306a36Sopenharmony_ci struct rb_node *p; 414862306a36Sopenharmony_ci 414962306a36Sopenharmony_ci dout("replay_unsafe_requests mds%d\n", session->s_mds); 415062306a36Sopenharmony_ci 415162306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 415262306a36Sopenharmony_ci list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) 415362306a36Sopenharmony_ci __send_request(session, req, true); 415462306a36Sopenharmony_ci 415562306a36Sopenharmony_ci /* 415662306a36Sopenharmony_ci * also re-send old requests when MDS enters reconnect stage. So that MDS 415762306a36Sopenharmony_ci * can process completed request in clientreplay stage. 415862306a36Sopenharmony_ci */ 415962306a36Sopenharmony_ci p = rb_first(&mdsc->request_tree); 416062306a36Sopenharmony_ci while (p) { 416162306a36Sopenharmony_ci req = rb_entry(p, struct ceph_mds_request, r_node); 416262306a36Sopenharmony_ci p = rb_next(p); 416362306a36Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) 416462306a36Sopenharmony_ci continue; 416562306a36Sopenharmony_ci if (req->r_attempts == 0) 416662306a36Sopenharmony_ci continue; /* only old requests */ 416762306a36Sopenharmony_ci if (!req->r_session) 416862306a36Sopenharmony_ci continue; 416962306a36Sopenharmony_ci if (req->r_session->s_mds != session->s_mds) 417062306a36Sopenharmony_ci continue; 417162306a36Sopenharmony_ci 417262306a36Sopenharmony_ci ceph_mdsc_release_dir_caps_no_check(req); 417362306a36Sopenharmony_ci 417462306a36Sopenharmony_ci __send_request(session, req, true); 417562306a36Sopenharmony_ci } 417662306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 417762306a36Sopenharmony_ci} 417862306a36Sopenharmony_ci 417962306a36Sopenharmony_cistatic int send_reconnect_partial(struct ceph_reconnect_state *recon_state) 418062306a36Sopenharmony_ci{ 418162306a36Sopenharmony_ci struct ceph_msg *reply; 418262306a36Sopenharmony_ci struct ceph_pagelist *_pagelist; 418362306a36Sopenharmony_ci struct page *page; 418462306a36Sopenharmony_ci __le32 *addr; 418562306a36Sopenharmony_ci int err = -ENOMEM; 418662306a36Sopenharmony_ci 418762306a36Sopenharmony_ci if (!recon_state->allow_multi) 418862306a36Sopenharmony_ci return -ENOSPC; 418962306a36Sopenharmony_ci 419062306a36Sopenharmony_ci /* can't handle message that contains both caps and realm */ 419162306a36Sopenharmony_ci BUG_ON(!recon_state->nr_caps == !recon_state->nr_realms); 419262306a36Sopenharmony_ci 419362306a36Sopenharmony_ci /* pre-allocate new pagelist */ 419462306a36Sopenharmony_ci _pagelist = ceph_pagelist_alloc(GFP_NOFS); 419562306a36Sopenharmony_ci if (!_pagelist) 419662306a36Sopenharmony_ci return -ENOMEM; 419762306a36Sopenharmony_ci 419862306a36Sopenharmony_ci reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); 419962306a36Sopenharmony_ci if (!reply) 420062306a36Sopenharmony_ci goto fail_msg; 420162306a36Sopenharmony_ci 420262306a36Sopenharmony_ci /* placeholder for nr_caps */ 420362306a36Sopenharmony_ci err = ceph_pagelist_encode_32(_pagelist, 0); 420462306a36Sopenharmony_ci if (err < 0) 420562306a36Sopenharmony_ci goto fail; 420662306a36Sopenharmony_ci 420762306a36Sopenharmony_ci if (recon_state->nr_caps) { 420862306a36Sopenharmony_ci /* currently encoding caps */ 420962306a36Sopenharmony_ci err = ceph_pagelist_encode_32(recon_state->pagelist, 0); 421062306a36Sopenharmony_ci if (err) 421162306a36Sopenharmony_ci goto fail; 421262306a36Sopenharmony_ci } else { 421362306a36Sopenharmony_ci /* placeholder for nr_realms (currently encoding relams) */ 421462306a36Sopenharmony_ci err = ceph_pagelist_encode_32(_pagelist, 0); 421562306a36Sopenharmony_ci if (err < 0) 421662306a36Sopenharmony_ci goto fail; 421762306a36Sopenharmony_ci } 421862306a36Sopenharmony_ci 421962306a36Sopenharmony_ci err = ceph_pagelist_encode_8(recon_state->pagelist, 1); 422062306a36Sopenharmony_ci if (err) 422162306a36Sopenharmony_ci goto fail; 422262306a36Sopenharmony_ci 422362306a36Sopenharmony_ci page = list_first_entry(&recon_state->pagelist->head, struct page, lru); 422462306a36Sopenharmony_ci addr = kmap_atomic(page); 422562306a36Sopenharmony_ci if (recon_state->nr_caps) { 422662306a36Sopenharmony_ci /* currently encoding caps */ 422762306a36Sopenharmony_ci *addr = cpu_to_le32(recon_state->nr_caps); 422862306a36Sopenharmony_ci } else { 422962306a36Sopenharmony_ci /* currently encoding relams */ 423062306a36Sopenharmony_ci *(addr + 1) = cpu_to_le32(recon_state->nr_realms); 423162306a36Sopenharmony_ci } 423262306a36Sopenharmony_ci kunmap_atomic(addr); 423362306a36Sopenharmony_ci 423462306a36Sopenharmony_ci reply->hdr.version = cpu_to_le16(5); 423562306a36Sopenharmony_ci reply->hdr.compat_version = cpu_to_le16(4); 423662306a36Sopenharmony_ci 423762306a36Sopenharmony_ci reply->hdr.data_len = cpu_to_le32(recon_state->pagelist->length); 423862306a36Sopenharmony_ci ceph_msg_data_add_pagelist(reply, recon_state->pagelist); 423962306a36Sopenharmony_ci 424062306a36Sopenharmony_ci ceph_con_send(&recon_state->session->s_con, reply); 424162306a36Sopenharmony_ci ceph_pagelist_release(recon_state->pagelist); 424262306a36Sopenharmony_ci 424362306a36Sopenharmony_ci recon_state->pagelist = _pagelist; 424462306a36Sopenharmony_ci recon_state->nr_caps = 0; 424562306a36Sopenharmony_ci recon_state->nr_realms = 0; 424662306a36Sopenharmony_ci recon_state->msg_version = 5; 424762306a36Sopenharmony_ci return 0; 424862306a36Sopenharmony_cifail: 424962306a36Sopenharmony_ci ceph_msg_put(reply); 425062306a36Sopenharmony_cifail_msg: 425162306a36Sopenharmony_ci ceph_pagelist_release(_pagelist); 425262306a36Sopenharmony_ci return err; 425362306a36Sopenharmony_ci} 425462306a36Sopenharmony_ci 425562306a36Sopenharmony_cistatic struct dentry* d_find_primary(struct inode *inode) 425662306a36Sopenharmony_ci{ 425762306a36Sopenharmony_ci struct dentry *alias, *dn = NULL; 425862306a36Sopenharmony_ci 425962306a36Sopenharmony_ci if (hlist_empty(&inode->i_dentry)) 426062306a36Sopenharmony_ci return NULL; 426162306a36Sopenharmony_ci 426262306a36Sopenharmony_ci spin_lock(&inode->i_lock); 426362306a36Sopenharmony_ci if (hlist_empty(&inode->i_dentry)) 426462306a36Sopenharmony_ci goto out_unlock; 426562306a36Sopenharmony_ci 426662306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 426762306a36Sopenharmony_ci alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); 426862306a36Sopenharmony_ci if (!IS_ROOT(alias)) 426962306a36Sopenharmony_ci dn = dget(alias); 427062306a36Sopenharmony_ci goto out_unlock; 427162306a36Sopenharmony_ci } 427262306a36Sopenharmony_ci 427362306a36Sopenharmony_ci hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 427462306a36Sopenharmony_ci spin_lock(&alias->d_lock); 427562306a36Sopenharmony_ci if (!d_unhashed(alias) && 427662306a36Sopenharmony_ci (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) { 427762306a36Sopenharmony_ci dn = dget_dlock(alias); 427862306a36Sopenharmony_ci } 427962306a36Sopenharmony_ci spin_unlock(&alias->d_lock); 428062306a36Sopenharmony_ci if (dn) 428162306a36Sopenharmony_ci break; 428262306a36Sopenharmony_ci } 428362306a36Sopenharmony_ciout_unlock: 428462306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 428562306a36Sopenharmony_ci return dn; 428662306a36Sopenharmony_ci} 428762306a36Sopenharmony_ci 428862306a36Sopenharmony_ci/* 428962306a36Sopenharmony_ci * Encode information about a cap for a reconnect with the MDS. 429062306a36Sopenharmony_ci */ 429162306a36Sopenharmony_cistatic int reconnect_caps_cb(struct inode *inode, int mds, void *arg) 429262306a36Sopenharmony_ci{ 429362306a36Sopenharmony_ci union { 429462306a36Sopenharmony_ci struct ceph_mds_cap_reconnect v2; 429562306a36Sopenharmony_ci struct ceph_mds_cap_reconnect_v1 v1; 429662306a36Sopenharmony_ci } rec; 429762306a36Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 429862306a36Sopenharmony_ci struct ceph_reconnect_state *recon_state = arg; 429962306a36Sopenharmony_ci struct ceph_pagelist *pagelist = recon_state->pagelist; 430062306a36Sopenharmony_ci struct dentry *dentry; 430162306a36Sopenharmony_ci struct ceph_cap *cap; 430262306a36Sopenharmony_ci char *path; 430362306a36Sopenharmony_ci int pathlen = 0, err; 430462306a36Sopenharmony_ci u64 pathbase; 430562306a36Sopenharmony_ci u64 snap_follows; 430662306a36Sopenharmony_ci 430762306a36Sopenharmony_ci dentry = d_find_primary(inode); 430862306a36Sopenharmony_ci if (dentry) { 430962306a36Sopenharmony_ci /* set pathbase to parent dir when msg_version >= 2 */ 431062306a36Sopenharmony_ci path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 431162306a36Sopenharmony_ci recon_state->msg_version >= 2); 431262306a36Sopenharmony_ci dput(dentry); 431362306a36Sopenharmony_ci if (IS_ERR(path)) { 431462306a36Sopenharmony_ci err = PTR_ERR(path); 431562306a36Sopenharmony_ci goto out_err; 431662306a36Sopenharmony_ci } 431762306a36Sopenharmony_ci } else { 431862306a36Sopenharmony_ci path = NULL; 431962306a36Sopenharmony_ci pathbase = 0; 432062306a36Sopenharmony_ci } 432162306a36Sopenharmony_ci 432262306a36Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 432362306a36Sopenharmony_ci cap = __get_cap_for_mds(ci, mds); 432462306a36Sopenharmony_ci if (!cap) { 432562306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 432662306a36Sopenharmony_ci err = 0; 432762306a36Sopenharmony_ci goto out_err; 432862306a36Sopenharmony_ci } 432962306a36Sopenharmony_ci dout(" adding %p ino %llx.%llx cap %p %lld %s\n", 433062306a36Sopenharmony_ci inode, ceph_vinop(inode), cap, cap->cap_id, 433162306a36Sopenharmony_ci ceph_cap_string(cap->issued)); 433262306a36Sopenharmony_ci 433362306a36Sopenharmony_ci cap->seq = 0; /* reset cap seq */ 433462306a36Sopenharmony_ci cap->issue_seq = 0; /* and issue_seq */ 433562306a36Sopenharmony_ci cap->mseq = 0; /* and migrate_seq */ 433662306a36Sopenharmony_ci cap->cap_gen = atomic_read(&cap->session->s_cap_gen); 433762306a36Sopenharmony_ci 433862306a36Sopenharmony_ci /* These are lost when the session goes away */ 433962306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 434062306a36Sopenharmony_ci if (cap->issued & CEPH_CAP_DIR_CREATE) { 434162306a36Sopenharmony_ci ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns)); 434262306a36Sopenharmony_ci memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); 434362306a36Sopenharmony_ci } 434462306a36Sopenharmony_ci cap->issued &= ~CEPH_CAP_ANY_DIR_OPS; 434562306a36Sopenharmony_ci } 434662306a36Sopenharmony_ci 434762306a36Sopenharmony_ci if (recon_state->msg_version >= 2) { 434862306a36Sopenharmony_ci rec.v2.cap_id = cpu_to_le64(cap->cap_id); 434962306a36Sopenharmony_ci rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); 435062306a36Sopenharmony_ci rec.v2.issued = cpu_to_le32(cap->issued); 435162306a36Sopenharmony_ci rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); 435262306a36Sopenharmony_ci rec.v2.pathbase = cpu_to_le64(pathbase); 435362306a36Sopenharmony_ci rec.v2.flock_len = (__force __le32) 435462306a36Sopenharmony_ci ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); 435562306a36Sopenharmony_ci } else { 435662306a36Sopenharmony_ci rec.v1.cap_id = cpu_to_le64(cap->cap_id); 435762306a36Sopenharmony_ci rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); 435862306a36Sopenharmony_ci rec.v1.issued = cpu_to_le32(cap->issued); 435962306a36Sopenharmony_ci rec.v1.size = cpu_to_le64(i_size_read(inode)); 436062306a36Sopenharmony_ci ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); 436162306a36Sopenharmony_ci ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); 436262306a36Sopenharmony_ci rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); 436362306a36Sopenharmony_ci rec.v1.pathbase = cpu_to_le64(pathbase); 436462306a36Sopenharmony_ci } 436562306a36Sopenharmony_ci 436662306a36Sopenharmony_ci if (list_empty(&ci->i_cap_snaps)) { 436762306a36Sopenharmony_ci snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0; 436862306a36Sopenharmony_ci } else { 436962306a36Sopenharmony_ci struct ceph_cap_snap *capsnap = 437062306a36Sopenharmony_ci list_first_entry(&ci->i_cap_snaps, 437162306a36Sopenharmony_ci struct ceph_cap_snap, ci_item); 437262306a36Sopenharmony_ci snap_follows = capsnap->follows; 437362306a36Sopenharmony_ci } 437462306a36Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 437562306a36Sopenharmony_ci 437662306a36Sopenharmony_ci if (recon_state->msg_version >= 2) { 437762306a36Sopenharmony_ci int num_fcntl_locks, num_flock_locks; 437862306a36Sopenharmony_ci struct ceph_filelock *flocks = NULL; 437962306a36Sopenharmony_ci size_t struct_len, total_len = sizeof(u64); 438062306a36Sopenharmony_ci u8 struct_v = 0; 438162306a36Sopenharmony_ci 438262306a36Sopenharmony_ciencode_again: 438362306a36Sopenharmony_ci if (rec.v2.flock_len) { 438462306a36Sopenharmony_ci ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); 438562306a36Sopenharmony_ci } else { 438662306a36Sopenharmony_ci num_fcntl_locks = 0; 438762306a36Sopenharmony_ci num_flock_locks = 0; 438862306a36Sopenharmony_ci } 438962306a36Sopenharmony_ci if (num_fcntl_locks + num_flock_locks > 0) { 439062306a36Sopenharmony_ci flocks = kmalloc_array(num_fcntl_locks + num_flock_locks, 439162306a36Sopenharmony_ci sizeof(struct ceph_filelock), 439262306a36Sopenharmony_ci GFP_NOFS); 439362306a36Sopenharmony_ci if (!flocks) { 439462306a36Sopenharmony_ci err = -ENOMEM; 439562306a36Sopenharmony_ci goto out_err; 439662306a36Sopenharmony_ci } 439762306a36Sopenharmony_ci err = ceph_encode_locks_to_buffer(inode, flocks, 439862306a36Sopenharmony_ci num_fcntl_locks, 439962306a36Sopenharmony_ci num_flock_locks); 440062306a36Sopenharmony_ci if (err) { 440162306a36Sopenharmony_ci kfree(flocks); 440262306a36Sopenharmony_ci flocks = NULL; 440362306a36Sopenharmony_ci if (err == -ENOSPC) 440462306a36Sopenharmony_ci goto encode_again; 440562306a36Sopenharmony_ci goto out_err; 440662306a36Sopenharmony_ci } 440762306a36Sopenharmony_ci } else { 440862306a36Sopenharmony_ci kfree(flocks); 440962306a36Sopenharmony_ci flocks = NULL; 441062306a36Sopenharmony_ci } 441162306a36Sopenharmony_ci 441262306a36Sopenharmony_ci if (recon_state->msg_version >= 3) { 441362306a36Sopenharmony_ci /* version, compat_version and struct_len */ 441462306a36Sopenharmony_ci total_len += 2 * sizeof(u8) + sizeof(u32); 441562306a36Sopenharmony_ci struct_v = 2; 441662306a36Sopenharmony_ci } 441762306a36Sopenharmony_ci /* 441862306a36Sopenharmony_ci * number of encoded locks is stable, so copy to pagelist 441962306a36Sopenharmony_ci */ 442062306a36Sopenharmony_ci struct_len = 2 * sizeof(u32) + 442162306a36Sopenharmony_ci (num_fcntl_locks + num_flock_locks) * 442262306a36Sopenharmony_ci sizeof(struct ceph_filelock); 442362306a36Sopenharmony_ci rec.v2.flock_len = cpu_to_le32(struct_len); 442462306a36Sopenharmony_ci 442562306a36Sopenharmony_ci struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); 442662306a36Sopenharmony_ci 442762306a36Sopenharmony_ci if (struct_v >= 2) 442862306a36Sopenharmony_ci struct_len += sizeof(u64); /* snap_follows */ 442962306a36Sopenharmony_ci 443062306a36Sopenharmony_ci total_len += struct_len; 443162306a36Sopenharmony_ci 443262306a36Sopenharmony_ci if (pagelist->length + total_len > RECONNECT_MAX_SIZE) { 443362306a36Sopenharmony_ci err = send_reconnect_partial(recon_state); 443462306a36Sopenharmony_ci if (err) 443562306a36Sopenharmony_ci goto out_freeflocks; 443662306a36Sopenharmony_ci pagelist = recon_state->pagelist; 443762306a36Sopenharmony_ci } 443862306a36Sopenharmony_ci 443962306a36Sopenharmony_ci err = ceph_pagelist_reserve(pagelist, total_len); 444062306a36Sopenharmony_ci if (err) 444162306a36Sopenharmony_ci goto out_freeflocks; 444262306a36Sopenharmony_ci 444362306a36Sopenharmony_ci ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); 444462306a36Sopenharmony_ci if (recon_state->msg_version >= 3) { 444562306a36Sopenharmony_ci ceph_pagelist_encode_8(pagelist, struct_v); 444662306a36Sopenharmony_ci ceph_pagelist_encode_8(pagelist, 1); 444762306a36Sopenharmony_ci ceph_pagelist_encode_32(pagelist, struct_len); 444862306a36Sopenharmony_ci } 444962306a36Sopenharmony_ci ceph_pagelist_encode_string(pagelist, path, pathlen); 445062306a36Sopenharmony_ci ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); 445162306a36Sopenharmony_ci ceph_locks_to_pagelist(flocks, pagelist, 445262306a36Sopenharmony_ci num_fcntl_locks, num_flock_locks); 445362306a36Sopenharmony_ci if (struct_v >= 2) 445462306a36Sopenharmony_ci ceph_pagelist_encode_64(pagelist, snap_follows); 445562306a36Sopenharmony_ciout_freeflocks: 445662306a36Sopenharmony_ci kfree(flocks); 445762306a36Sopenharmony_ci } else { 445862306a36Sopenharmony_ci err = ceph_pagelist_reserve(pagelist, 445962306a36Sopenharmony_ci sizeof(u64) + sizeof(u32) + 446062306a36Sopenharmony_ci pathlen + sizeof(rec.v1)); 446162306a36Sopenharmony_ci if (err) 446262306a36Sopenharmony_ci goto out_err; 446362306a36Sopenharmony_ci 446462306a36Sopenharmony_ci ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); 446562306a36Sopenharmony_ci ceph_pagelist_encode_string(pagelist, path, pathlen); 446662306a36Sopenharmony_ci ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); 446762306a36Sopenharmony_ci } 446862306a36Sopenharmony_ci 446962306a36Sopenharmony_ciout_err: 447062306a36Sopenharmony_ci ceph_mdsc_free_path(path, pathlen); 447162306a36Sopenharmony_ci if (!err) 447262306a36Sopenharmony_ci recon_state->nr_caps++; 447362306a36Sopenharmony_ci return err; 447462306a36Sopenharmony_ci} 447562306a36Sopenharmony_ci 447662306a36Sopenharmony_cistatic int encode_snap_realms(struct ceph_mds_client *mdsc, 447762306a36Sopenharmony_ci struct ceph_reconnect_state *recon_state) 447862306a36Sopenharmony_ci{ 447962306a36Sopenharmony_ci struct rb_node *p; 448062306a36Sopenharmony_ci struct ceph_pagelist *pagelist = recon_state->pagelist; 448162306a36Sopenharmony_ci int err = 0; 448262306a36Sopenharmony_ci 448362306a36Sopenharmony_ci if (recon_state->msg_version >= 4) { 448462306a36Sopenharmony_ci err = ceph_pagelist_encode_32(pagelist, mdsc->num_snap_realms); 448562306a36Sopenharmony_ci if (err < 0) 448662306a36Sopenharmony_ci goto fail; 448762306a36Sopenharmony_ci } 448862306a36Sopenharmony_ci 448962306a36Sopenharmony_ci /* 449062306a36Sopenharmony_ci * snaprealms. we provide mds with the ino, seq (version), and 449162306a36Sopenharmony_ci * parent for all of our realms. If the mds has any newer info, 449262306a36Sopenharmony_ci * it will tell us. 449362306a36Sopenharmony_ci */ 449462306a36Sopenharmony_ci for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) { 449562306a36Sopenharmony_ci struct ceph_snap_realm *realm = 449662306a36Sopenharmony_ci rb_entry(p, struct ceph_snap_realm, node); 449762306a36Sopenharmony_ci struct ceph_mds_snaprealm_reconnect sr_rec; 449862306a36Sopenharmony_ci 449962306a36Sopenharmony_ci if (recon_state->msg_version >= 4) { 450062306a36Sopenharmony_ci size_t need = sizeof(u8) * 2 + sizeof(u32) + 450162306a36Sopenharmony_ci sizeof(sr_rec); 450262306a36Sopenharmony_ci 450362306a36Sopenharmony_ci if (pagelist->length + need > RECONNECT_MAX_SIZE) { 450462306a36Sopenharmony_ci err = send_reconnect_partial(recon_state); 450562306a36Sopenharmony_ci if (err) 450662306a36Sopenharmony_ci goto fail; 450762306a36Sopenharmony_ci pagelist = recon_state->pagelist; 450862306a36Sopenharmony_ci } 450962306a36Sopenharmony_ci 451062306a36Sopenharmony_ci err = ceph_pagelist_reserve(pagelist, need); 451162306a36Sopenharmony_ci if (err) 451262306a36Sopenharmony_ci goto fail; 451362306a36Sopenharmony_ci 451462306a36Sopenharmony_ci ceph_pagelist_encode_8(pagelist, 1); 451562306a36Sopenharmony_ci ceph_pagelist_encode_8(pagelist, 1); 451662306a36Sopenharmony_ci ceph_pagelist_encode_32(pagelist, sizeof(sr_rec)); 451762306a36Sopenharmony_ci } 451862306a36Sopenharmony_ci 451962306a36Sopenharmony_ci dout(" adding snap realm %llx seq %lld parent %llx\n", 452062306a36Sopenharmony_ci realm->ino, realm->seq, realm->parent_ino); 452162306a36Sopenharmony_ci sr_rec.ino = cpu_to_le64(realm->ino); 452262306a36Sopenharmony_ci sr_rec.seq = cpu_to_le64(realm->seq); 452362306a36Sopenharmony_ci sr_rec.parent = cpu_to_le64(realm->parent_ino); 452462306a36Sopenharmony_ci 452562306a36Sopenharmony_ci err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec)); 452662306a36Sopenharmony_ci if (err) 452762306a36Sopenharmony_ci goto fail; 452862306a36Sopenharmony_ci 452962306a36Sopenharmony_ci recon_state->nr_realms++; 453062306a36Sopenharmony_ci } 453162306a36Sopenharmony_cifail: 453262306a36Sopenharmony_ci return err; 453362306a36Sopenharmony_ci} 453462306a36Sopenharmony_ci 453562306a36Sopenharmony_ci 453662306a36Sopenharmony_ci/* 453762306a36Sopenharmony_ci * If an MDS fails and recovers, clients need to reconnect in order to 453862306a36Sopenharmony_ci * reestablish shared state. This includes all caps issued through 453962306a36Sopenharmony_ci * this session _and_ the snap_realm hierarchy. Because it's not 454062306a36Sopenharmony_ci * clear which snap realms the mds cares about, we send everything we 454162306a36Sopenharmony_ci * know about.. that ensures we'll then get any new info the 454262306a36Sopenharmony_ci * recovering MDS might have. 454362306a36Sopenharmony_ci * 454462306a36Sopenharmony_ci * This is a relatively heavyweight operation, but it's rare. 454562306a36Sopenharmony_ci */ 454662306a36Sopenharmony_cistatic void send_mds_reconnect(struct ceph_mds_client *mdsc, 454762306a36Sopenharmony_ci struct ceph_mds_session *session) 454862306a36Sopenharmony_ci{ 454962306a36Sopenharmony_ci struct ceph_msg *reply; 455062306a36Sopenharmony_ci int mds = session->s_mds; 455162306a36Sopenharmony_ci int err = -ENOMEM; 455262306a36Sopenharmony_ci struct ceph_reconnect_state recon_state = { 455362306a36Sopenharmony_ci .session = session, 455462306a36Sopenharmony_ci }; 455562306a36Sopenharmony_ci LIST_HEAD(dispose); 455662306a36Sopenharmony_ci 455762306a36Sopenharmony_ci pr_info("mds%d reconnect start\n", mds); 455862306a36Sopenharmony_ci 455962306a36Sopenharmony_ci recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS); 456062306a36Sopenharmony_ci if (!recon_state.pagelist) 456162306a36Sopenharmony_ci goto fail_nopagelist; 456262306a36Sopenharmony_ci 456362306a36Sopenharmony_ci reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); 456462306a36Sopenharmony_ci if (!reply) 456562306a36Sopenharmony_ci goto fail_nomsg; 456662306a36Sopenharmony_ci 456762306a36Sopenharmony_ci xa_destroy(&session->s_delegated_inos); 456862306a36Sopenharmony_ci 456962306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 457062306a36Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_RECONNECTING; 457162306a36Sopenharmony_ci session->s_seq = 0; 457262306a36Sopenharmony_ci 457362306a36Sopenharmony_ci dout("session %p state %s\n", session, 457462306a36Sopenharmony_ci ceph_session_state_name(session->s_state)); 457562306a36Sopenharmony_ci 457662306a36Sopenharmony_ci atomic_inc(&session->s_cap_gen); 457762306a36Sopenharmony_ci 457862306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 457962306a36Sopenharmony_ci /* don't know if session is readonly */ 458062306a36Sopenharmony_ci session->s_readonly = 0; 458162306a36Sopenharmony_ci /* 458262306a36Sopenharmony_ci * notify __ceph_remove_cap() that we are composing cap reconnect. 458362306a36Sopenharmony_ci * If a cap get released before being added to the cap reconnect, 458462306a36Sopenharmony_ci * __ceph_remove_cap() should skip queuing cap release. 458562306a36Sopenharmony_ci */ 458662306a36Sopenharmony_ci session->s_cap_reconnect = 1; 458762306a36Sopenharmony_ci /* drop old cap expires; we're about to reestablish that state */ 458862306a36Sopenharmony_ci detach_cap_releases(session, &dispose); 458962306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 459062306a36Sopenharmony_ci dispose_cap_releases(mdsc, &dispose); 459162306a36Sopenharmony_ci 459262306a36Sopenharmony_ci /* trim unused caps to reduce MDS's cache rejoin time */ 459362306a36Sopenharmony_ci if (mdsc->fsc->sb->s_root) 459462306a36Sopenharmony_ci shrink_dcache_parent(mdsc->fsc->sb->s_root); 459562306a36Sopenharmony_ci 459662306a36Sopenharmony_ci ceph_con_close(&session->s_con); 459762306a36Sopenharmony_ci ceph_con_open(&session->s_con, 459862306a36Sopenharmony_ci CEPH_ENTITY_TYPE_MDS, mds, 459962306a36Sopenharmony_ci ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); 460062306a36Sopenharmony_ci 460162306a36Sopenharmony_ci /* replay unsafe requests */ 460262306a36Sopenharmony_ci replay_unsafe_requests(mdsc, session); 460362306a36Sopenharmony_ci 460462306a36Sopenharmony_ci ceph_early_kick_flushing_caps(mdsc, session); 460562306a36Sopenharmony_ci 460662306a36Sopenharmony_ci down_read(&mdsc->snap_rwsem); 460762306a36Sopenharmony_ci 460862306a36Sopenharmony_ci /* placeholder for nr_caps */ 460962306a36Sopenharmony_ci err = ceph_pagelist_encode_32(recon_state.pagelist, 0); 461062306a36Sopenharmony_ci if (err) 461162306a36Sopenharmony_ci goto fail; 461262306a36Sopenharmony_ci 461362306a36Sopenharmony_ci if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) { 461462306a36Sopenharmony_ci recon_state.msg_version = 3; 461562306a36Sopenharmony_ci recon_state.allow_multi = true; 461662306a36Sopenharmony_ci } else if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) { 461762306a36Sopenharmony_ci recon_state.msg_version = 3; 461862306a36Sopenharmony_ci } else { 461962306a36Sopenharmony_ci recon_state.msg_version = 2; 462062306a36Sopenharmony_ci } 462162306a36Sopenharmony_ci /* trsaverse this session's caps */ 462262306a36Sopenharmony_ci err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state); 462362306a36Sopenharmony_ci 462462306a36Sopenharmony_ci spin_lock(&session->s_cap_lock); 462562306a36Sopenharmony_ci session->s_cap_reconnect = 0; 462662306a36Sopenharmony_ci spin_unlock(&session->s_cap_lock); 462762306a36Sopenharmony_ci 462862306a36Sopenharmony_ci if (err < 0) 462962306a36Sopenharmony_ci goto fail; 463062306a36Sopenharmony_ci 463162306a36Sopenharmony_ci /* check if all realms can be encoded into current message */ 463262306a36Sopenharmony_ci if (mdsc->num_snap_realms) { 463362306a36Sopenharmony_ci size_t total_len = 463462306a36Sopenharmony_ci recon_state.pagelist->length + 463562306a36Sopenharmony_ci mdsc->num_snap_realms * 463662306a36Sopenharmony_ci sizeof(struct ceph_mds_snaprealm_reconnect); 463762306a36Sopenharmony_ci if (recon_state.msg_version >= 4) { 463862306a36Sopenharmony_ci /* number of realms */ 463962306a36Sopenharmony_ci total_len += sizeof(u32); 464062306a36Sopenharmony_ci /* version, compat_version and struct_len */ 464162306a36Sopenharmony_ci total_len += mdsc->num_snap_realms * 464262306a36Sopenharmony_ci (2 * sizeof(u8) + sizeof(u32)); 464362306a36Sopenharmony_ci } 464462306a36Sopenharmony_ci if (total_len > RECONNECT_MAX_SIZE) { 464562306a36Sopenharmony_ci if (!recon_state.allow_multi) { 464662306a36Sopenharmony_ci err = -ENOSPC; 464762306a36Sopenharmony_ci goto fail; 464862306a36Sopenharmony_ci } 464962306a36Sopenharmony_ci if (recon_state.nr_caps) { 465062306a36Sopenharmony_ci err = send_reconnect_partial(&recon_state); 465162306a36Sopenharmony_ci if (err) 465262306a36Sopenharmony_ci goto fail; 465362306a36Sopenharmony_ci } 465462306a36Sopenharmony_ci recon_state.msg_version = 5; 465562306a36Sopenharmony_ci } 465662306a36Sopenharmony_ci } 465762306a36Sopenharmony_ci 465862306a36Sopenharmony_ci err = encode_snap_realms(mdsc, &recon_state); 465962306a36Sopenharmony_ci if (err < 0) 466062306a36Sopenharmony_ci goto fail; 466162306a36Sopenharmony_ci 466262306a36Sopenharmony_ci if (recon_state.msg_version >= 5) { 466362306a36Sopenharmony_ci err = ceph_pagelist_encode_8(recon_state.pagelist, 0); 466462306a36Sopenharmony_ci if (err < 0) 466562306a36Sopenharmony_ci goto fail; 466662306a36Sopenharmony_ci } 466762306a36Sopenharmony_ci 466862306a36Sopenharmony_ci if (recon_state.nr_caps || recon_state.nr_realms) { 466962306a36Sopenharmony_ci struct page *page = 467062306a36Sopenharmony_ci list_first_entry(&recon_state.pagelist->head, 467162306a36Sopenharmony_ci struct page, lru); 467262306a36Sopenharmony_ci __le32 *addr = kmap_atomic(page); 467362306a36Sopenharmony_ci if (recon_state.nr_caps) { 467462306a36Sopenharmony_ci WARN_ON(recon_state.nr_realms != mdsc->num_snap_realms); 467562306a36Sopenharmony_ci *addr = cpu_to_le32(recon_state.nr_caps); 467662306a36Sopenharmony_ci } else if (recon_state.msg_version >= 4) { 467762306a36Sopenharmony_ci *(addr + 1) = cpu_to_le32(recon_state.nr_realms); 467862306a36Sopenharmony_ci } 467962306a36Sopenharmony_ci kunmap_atomic(addr); 468062306a36Sopenharmony_ci } 468162306a36Sopenharmony_ci 468262306a36Sopenharmony_ci reply->hdr.version = cpu_to_le16(recon_state.msg_version); 468362306a36Sopenharmony_ci if (recon_state.msg_version >= 4) 468462306a36Sopenharmony_ci reply->hdr.compat_version = cpu_to_le16(4); 468562306a36Sopenharmony_ci 468662306a36Sopenharmony_ci reply->hdr.data_len = cpu_to_le32(recon_state.pagelist->length); 468762306a36Sopenharmony_ci ceph_msg_data_add_pagelist(reply, recon_state.pagelist); 468862306a36Sopenharmony_ci 468962306a36Sopenharmony_ci ceph_con_send(&session->s_con, reply); 469062306a36Sopenharmony_ci 469162306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 469262306a36Sopenharmony_ci 469362306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 469462306a36Sopenharmony_ci __wake_requests(mdsc, &session->s_waiting); 469562306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 469662306a36Sopenharmony_ci 469762306a36Sopenharmony_ci up_read(&mdsc->snap_rwsem); 469862306a36Sopenharmony_ci ceph_pagelist_release(recon_state.pagelist); 469962306a36Sopenharmony_ci return; 470062306a36Sopenharmony_ci 470162306a36Sopenharmony_cifail: 470262306a36Sopenharmony_ci ceph_msg_put(reply); 470362306a36Sopenharmony_ci up_read(&mdsc->snap_rwsem); 470462306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 470562306a36Sopenharmony_cifail_nomsg: 470662306a36Sopenharmony_ci ceph_pagelist_release(recon_state.pagelist); 470762306a36Sopenharmony_cifail_nopagelist: 470862306a36Sopenharmony_ci pr_err("error %d preparing reconnect for mds%d\n", err, mds); 470962306a36Sopenharmony_ci return; 471062306a36Sopenharmony_ci} 471162306a36Sopenharmony_ci 471262306a36Sopenharmony_ci 471362306a36Sopenharmony_ci/* 471462306a36Sopenharmony_ci * compare old and new mdsmaps, kicking requests 471562306a36Sopenharmony_ci * and closing out old connections as necessary 471662306a36Sopenharmony_ci * 471762306a36Sopenharmony_ci * called under mdsc->mutex. 471862306a36Sopenharmony_ci */ 471962306a36Sopenharmony_cistatic void check_new_map(struct ceph_mds_client *mdsc, 472062306a36Sopenharmony_ci struct ceph_mdsmap *newmap, 472162306a36Sopenharmony_ci struct ceph_mdsmap *oldmap) 472262306a36Sopenharmony_ci{ 472362306a36Sopenharmony_ci int i, j, err; 472462306a36Sopenharmony_ci int oldstate, newstate; 472562306a36Sopenharmony_ci struct ceph_mds_session *s; 472662306a36Sopenharmony_ci unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0}; 472762306a36Sopenharmony_ci 472862306a36Sopenharmony_ci dout("check_new_map new %u old %u\n", 472962306a36Sopenharmony_ci newmap->m_epoch, oldmap->m_epoch); 473062306a36Sopenharmony_ci 473162306a36Sopenharmony_ci if (newmap->m_info) { 473262306a36Sopenharmony_ci for (i = 0; i < newmap->possible_max_rank; i++) { 473362306a36Sopenharmony_ci for (j = 0; j < newmap->m_info[i].num_export_targets; j++) 473462306a36Sopenharmony_ci set_bit(newmap->m_info[i].export_targets[j], targets); 473562306a36Sopenharmony_ci } 473662306a36Sopenharmony_ci } 473762306a36Sopenharmony_ci 473862306a36Sopenharmony_ci for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) { 473962306a36Sopenharmony_ci if (!mdsc->sessions[i]) 474062306a36Sopenharmony_ci continue; 474162306a36Sopenharmony_ci s = mdsc->sessions[i]; 474262306a36Sopenharmony_ci oldstate = ceph_mdsmap_get_state(oldmap, i); 474362306a36Sopenharmony_ci newstate = ceph_mdsmap_get_state(newmap, i); 474462306a36Sopenharmony_ci 474562306a36Sopenharmony_ci dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", 474662306a36Sopenharmony_ci i, ceph_mds_state_name(oldstate), 474762306a36Sopenharmony_ci ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", 474862306a36Sopenharmony_ci ceph_mds_state_name(newstate), 474962306a36Sopenharmony_ci ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", 475062306a36Sopenharmony_ci ceph_session_state_name(s->s_state)); 475162306a36Sopenharmony_ci 475262306a36Sopenharmony_ci if (i >= newmap->possible_max_rank) { 475362306a36Sopenharmony_ci /* force close session for stopped mds */ 475462306a36Sopenharmony_ci ceph_get_mds_session(s); 475562306a36Sopenharmony_ci __unregister_session(mdsc, s); 475662306a36Sopenharmony_ci __wake_requests(mdsc, &s->s_waiting); 475762306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 475862306a36Sopenharmony_ci 475962306a36Sopenharmony_ci mutex_lock(&s->s_mutex); 476062306a36Sopenharmony_ci cleanup_session_requests(mdsc, s); 476162306a36Sopenharmony_ci remove_session_caps(s); 476262306a36Sopenharmony_ci mutex_unlock(&s->s_mutex); 476362306a36Sopenharmony_ci 476462306a36Sopenharmony_ci ceph_put_mds_session(s); 476562306a36Sopenharmony_ci 476662306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 476762306a36Sopenharmony_ci kick_requests(mdsc, i); 476862306a36Sopenharmony_ci continue; 476962306a36Sopenharmony_ci } 477062306a36Sopenharmony_ci 477162306a36Sopenharmony_ci if (memcmp(ceph_mdsmap_get_addr(oldmap, i), 477262306a36Sopenharmony_ci ceph_mdsmap_get_addr(newmap, i), 477362306a36Sopenharmony_ci sizeof(struct ceph_entity_addr))) { 477462306a36Sopenharmony_ci /* just close it */ 477562306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 477662306a36Sopenharmony_ci mutex_lock(&s->s_mutex); 477762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 477862306a36Sopenharmony_ci ceph_con_close(&s->s_con); 477962306a36Sopenharmony_ci mutex_unlock(&s->s_mutex); 478062306a36Sopenharmony_ci s->s_state = CEPH_MDS_SESSION_RESTARTING; 478162306a36Sopenharmony_ci } else if (oldstate == newstate) { 478262306a36Sopenharmony_ci continue; /* nothing new with this mds */ 478362306a36Sopenharmony_ci } 478462306a36Sopenharmony_ci 478562306a36Sopenharmony_ci /* 478662306a36Sopenharmony_ci * send reconnect? 478762306a36Sopenharmony_ci */ 478862306a36Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_RESTARTING && 478962306a36Sopenharmony_ci newstate >= CEPH_MDS_STATE_RECONNECT) { 479062306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 479162306a36Sopenharmony_ci clear_bit(i, targets); 479262306a36Sopenharmony_ci send_mds_reconnect(mdsc, s); 479362306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 479462306a36Sopenharmony_ci } 479562306a36Sopenharmony_ci 479662306a36Sopenharmony_ci /* 479762306a36Sopenharmony_ci * kick request on any mds that has gone active. 479862306a36Sopenharmony_ci */ 479962306a36Sopenharmony_ci if (oldstate < CEPH_MDS_STATE_ACTIVE && 480062306a36Sopenharmony_ci newstate >= CEPH_MDS_STATE_ACTIVE) { 480162306a36Sopenharmony_ci if (oldstate != CEPH_MDS_STATE_CREATING && 480262306a36Sopenharmony_ci oldstate != CEPH_MDS_STATE_STARTING) 480362306a36Sopenharmony_ci pr_info("mds%d recovery completed\n", s->s_mds); 480462306a36Sopenharmony_ci kick_requests(mdsc, i); 480562306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 480662306a36Sopenharmony_ci mutex_lock(&s->s_mutex); 480762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 480862306a36Sopenharmony_ci ceph_kick_flushing_caps(mdsc, s); 480962306a36Sopenharmony_ci mutex_unlock(&s->s_mutex); 481062306a36Sopenharmony_ci wake_up_session_caps(s, RECONNECT); 481162306a36Sopenharmony_ci } 481262306a36Sopenharmony_ci } 481362306a36Sopenharmony_ci 481462306a36Sopenharmony_ci /* 481562306a36Sopenharmony_ci * Only open and reconnect sessions that don't exist yet. 481662306a36Sopenharmony_ci */ 481762306a36Sopenharmony_ci for (i = 0; i < newmap->possible_max_rank; i++) { 481862306a36Sopenharmony_ci /* 481962306a36Sopenharmony_ci * In case the import MDS is crashed just after 482062306a36Sopenharmony_ci * the EImportStart journal is flushed, so when 482162306a36Sopenharmony_ci * a standby MDS takes over it and is replaying 482262306a36Sopenharmony_ci * the EImportStart journal the new MDS daemon 482362306a36Sopenharmony_ci * will wait the client to reconnect it, but the 482462306a36Sopenharmony_ci * client may never register/open the session yet. 482562306a36Sopenharmony_ci * 482662306a36Sopenharmony_ci * Will try to reconnect that MDS daemon if the 482762306a36Sopenharmony_ci * rank number is in the export targets array and 482862306a36Sopenharmony_ci * is the up:reconnect state. 482962306a36Sopenharmony_ci */ 483062306a36Sopenharmony_ci newstate = ceph_mdsmap_get_state(newmap, i); 483162306a36Sopenharmony_ci if (!test_bit(i, targets) || newstate != CEPH_MDS_STATE_RECONNECT) 483262306a36Sopenharmony_ci continue; 483362306a36Sopenharmony_ci 483462306a36Sopenharmony_ci /* 483562306a36Sopenharmony_ci * The session maybe registered and opened by some 483662306a36Sopenharmony_ci * requests which were choosing random MDSes during 483762306a36Sopenharmony_ci * the mdsc->mutex's unlock/lock gap below in rare 483862306a36Sopenharmony_ci * case. But the related MDS daemon will just queue 483962306a36Sopenharmony_ci * that requests and be still waiting for the client's 484062306a36Sopenharmony_ci * reconnection request in up:reconnect state. 484162306a36Sopenharmony_ci */ 484262306a36Sopenharmony_ci s = __ceph_lookup_mds_session(mdsc, i); 484362306a36Sopenharmony_ci if (likely(!s)) { 484462306a36Sopenharmony_ci s = __open_export_target_session(mdsc, i); 484562306a36Sopenharmony_ci if (IS_ERR(s)) { 484662306a36Sopenharmony_ci err = PTR_ERR(s); 484762306a36Sopenharmony_ci pr_err("failed to open export target session, err %d\n", 484862306a36Sopenharmony_ci err); 484962306a36Sopenharmony_ci continue; 485062306a36Sopenharmony_ci } 485162306a36Sopenharmony_ci } 485262306a36Sopenharmony_ci dout("send reconnect to export target mds.%d\n", i); 485362306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 485462306a36Sopenharmony_ci send_mds_reconnect(mdsc, s); 485562306a36Sopenharmony_ci ceph_put_mds_session(s); 485662306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 485762306a36Sopenharmony_ci } 485862306a36Sopenharmony_ci 485962306a36Sopenharmony_ci for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) { 486062306a36Sopenharmony_ci s = mdsc->sessions[i]; 486162306a36Sopenharmony_ci if (!s) 486262306a36Sopenharmony_ci continue; 486362306a36Sopenharmony_ci if (!ceph_mdsmap_is_laggy(newmap, i)) 486462306a36Sopenharmony_ci continue; 486562306a36Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_OPEN || 486662306a36Sopenharmony_ci s->s_state == CEPH_MDS_SESSION_HUNG || 486762306a36Sopenharmony_ci s->s_state == CEPH_MDS_SESSION_CLOSING) { 486862306a36Sopenharmony_ci dout(" connecting to export targets of laggy mds%d\n", 486962306a36Sopenharmony_ci i); 487062306a36Sopenharmony_ci __open_export_target_sessions(mdsc, s); 487162306a36Sopenharmony_ci } 487262306a36Sopenharmony_ci } 487362306a36Sopenharmony_ci} 487462306a36Sopenharmony_ci 487562306a36Sopenharmony_ci 487662306a36Sopenharmony_ci 487762306a36Sopenharmony_ci/* 487862306a36Sopenharmony_ci * leases 487962306a36Sopenharmony_ci */ 488062306a36Sopenharmony_ci 488162306a36Sopenharmony_ci/* 488262306a36Sopenharmony_ci * caller must hold session s_mutex, dentry->d_lock 488362306a36Sopenharmony_ci */ 488462306a36Sopenharmony_civoid __ceph_mdsc_drop_dentry_lease(struct dentry *dentry) 488562306a36Sopenharmony_ci{ 488662306a36Sopenharmony_ci struct ceph_dentry_info *di = ceph_dentry(dentry); 488762306a36Sopenharmony_ci 488862306a36Sopenharmony_ci ceph_put_mds_session(di->lease_session); 488962306a36Sopenharmony_ci di->lease_session = NULL; 489062306a36Sopenharmony_ci} 489162306a36Sopenharmony_ci 489262306a36Sopenharmony_cistatic void handle_lease(struct ceph_mds_client *mdsc, 489362306a36Sopenharmony_ci struct ceph_mds_session *session, 489462306a36Sopenharmony_ci struct ceph_msg *msg) 489562306a36Sopenharmony_ci{ 489662306a36Sopenharmony_ci struct super_block *sb = mdsc->fsc->sb; 489762306a36Sopenharmony_ci struct inode *inode; 489862306a36Sopenharmony_ci struct dentry *parent, *dentry; 489962306a36Sopenharmony_ci struct ceph_dentry_info *di; 490062306a36Sopenharmony_ci int mds = session->s_mds; 490162306a36Sopenharmony_ci struct ceph_mds_lease *h = msg->front.iov_base; 490262306a36Sopenharmony_ci u32 seq; 490362306a36Sopenharmony_ci struct ceph_vino vino; 490462306a36Sopenharmony_ci struct qstr dname; 490562306a36Sopenharmony_ci int release = 0; 490662306a36Sopenharmony_ci 490762306a36Sopenharmony_ci dout("handle_lease from mds%d\n", mds); 490862306a36Sopenharmony_ci 490962306a36Sopenharmony_ci if (!ceph_inc_mds_stopping_blocker(mdsc, session)) 491062306a36Sopenharmony_ci return; 491162306a36Sopenharmony_ci 491262306a36Sopenharmony_ci /* decode */ 491362306a36Sopenharmony_ci if (msg->front.iov_len < sizeof(*h) + sizeof(u32)) 491462306a36Sopenharmony_ci goto bad; 491562306a36Sopenharmony_ci vino.ino = le64_to_cpu(h->ino); 491662306a36Sopenharmony_ci vino.snap = CEPH_NOSNAP; 491762306a36Sopenharmony_ci seq = le32_to_cpu(h->seq); 491862306a36Sopenharmony_ci dname.len = get_unaligned_le32(h + 1); 491962306a36Sopenharmony_ci if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len) 492062306a36Sopenharmony_ci goto bad; 492162306a36Sopenharmony_ci dname.name = (void *)(h + 1) + sizeof(u32); 492262306a36Sopenharmony_ci 492362306a36Sopenharmony_ci /* lookup inode */ 492462306a36Sopenharmony_ci inode = ceph_find_inode(sb, vino); 492562306a36Sopenharmony_ci dout("handle_lease %s, ino %llx %p %.*s\n", 492662306a36Sopenharmony_ci ceph_lease_op_name(h->action), vino.ino, inode, 492762306a36Sopenharmony_ci dname.len, dname.name); 492862306a36Sopenharmony_ci 492962306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 493062306a36Sopenharmony_ci if (!inode) { 493162306a36Sopenharmony_ci dout("handle_lease no inode %llx\n", vino.ino); 493262306a36Sopenharmony_ci goto release; 493362306a36Sopenharmony_ci } 493462306a36Sopenharmony_ci 493562306a36Sopenharmony_ci /* dentry */ 493662306a36Sopenharmony_ci parent = d_find_alias(inode); 493762306a36Sopenharmony_ci if (!parent) { 493862306a36Sopenharmony_ci dout("no parent dentry on inode %p\n", inode); 493962306a36Sopenharmony_ci WARN_ON(1); 494062306a36Sopenharmony_ci goto release; /* hrm... */ 494162306a36Sopenharmony_ci } 494262306a36Sopenharmony_ci dname.hash = full_name_hash(parent, dname.name, dname.len); 494362306a36Sopenharmony_ci dentry = d_lookup(parent, &dname); 494462306a36Sopenharmony_ci dput(parent); 494562306a36Sopenharmony_ci if (!dentry) 494662306a36Sopenharmony_ci goto release; 494762306a36Sopenharmony_ci 494862306a36Sopenharmony_ci spin_lock(&dentry->d_lock); 494962306a36Sopenharmony_ci di = ceph_dentry(dentry); 495062306a36Sopenharmony_ci switch (h->action) { 495162306a36Sopenharmony_ci case CEPH_MDS_LEASE_REVOKE: 495262306a36Sopenharmony_ci if (di->lease_session == session) { 495362306a36Sopenharmony_ci if (ceph_seq_cmp(di->lease_seq, seq) > 0) 495462306a36Sopenharmony_ci h->seq = cpu_to_le32(di->lease_seq); 495562306a36Sopenharmony_ci __ceph_mdsc_drop_dentry_lease(dentry); 495662306a36Sopenharmony_ci } 495762306a36Sopenharmony_ci release = 1; 495862306a36Sopenharmony_ci break; 495962306a36Sopenharmony_ci 496062306a36Sopenharmony_ci case CEPH_MDS_LEASE_RENEW: 496162306a36Sopenharmony_ci if (di->lease_session == session && 496262306a36Sopenharmony_ci di->lease_gen == atomic_read(&session->s_cap_gen) && 496362306a36Sopenharmony_ci di->lease_renew_from && 496462306a36Sopenharmony_ci di->lease_renew_after == 0) { 496562306a36Sopenharmony_ci unsigned long duration = 496662306a36Sopenharmony_ci msecs_to_jiffies(le32_to_cpu(h->duration_ms)); 496762306a36Sopenharmony_ci 496862306a36Sopenharmony_ci di->lease_seq = seq; 496962306a36Sopenharmony_ci di->time = di->lease_renew_from + duration; 497062306a36Sopenharmony_ci di->lease_renew_after = di->lease_renew_from + 497162306a36Sopenharmony_ci (duration >> 1); 497262306a36Sopenharmony_ci di->lease_renew_from = 0; 497362306a36Sopenharmony_ci } 497462306a36Sopenharmony_ci break; 497562306a36Sopenharmony_ci } 497662306a36Sopenharmony_ci spin_unlock(&dentry->d_lock); 497762306a36Sopenharmony_ci dput(dentry); 497862306a36Sopenharmony_ci 497962306a36Sopenharmony_ci if (!release) 498062306a36Sopenharmony_ci goto out; 498162306a36Sopenharmony_ci 498262306a36Sopenharmony_cirelease: 498362306a36Sopenharmony_ci /* let's just reuse the same message */ 498462306a36Sopenharmony_ci h->action = CEPH_MDS_LEASE_REVOKE_ACK; 498562306a36Sopenharmony_ci ceph_msg_get(msg); 498662306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 498762306a36Sopenharmony_ci 498862306a36Sopenharmony_ciout: 498962306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 499062306a36Sopenharmony_ci iput(inode); 499162306a36Sopenharmony_ci 499262306a36Sopenharmony_ci ceph_dec_mds_stopping_blocker(mdsc); 499362306a36Sopenharmony_ci return; 499462306a36Sopenharmony_ci 499562306a36Sopenharmony_cibad: 499662306a36Sopenharmony_ci ceph_dec_mds_stopping_blocker(mdsc); 499762306a36Sopenharmony_ci 499862306a36Sopenharmony_ci pr_err("corrupt lease message\n"); 499962306a36Sopenharmony_ci ceph_msg_dump(msg); 500062306a36Sopenharmony_ci} 500162306a36Sopenharmony_ci 500262306a36Sopenharmony_civoid ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, 500362306a36Sopenharmony_ci struct dentry *dentry, char action, 500462306a36Sopenharmony_ci u32 seq) 500562306a36Sopenharmony_ci{ 500662306a36Sopenharmony_ci struct ceph_msg *msg; 500762306a36Sopenharmony_ci struct ceph_mds_lease *lease; 500862306a36Sopenharmony_ci struct inode *dir; 500962306a36Sopenharmony_ci int len = sizeof(*lease) + sizeof(u32) + NAME_MAX; 501062306a36Sopenharmony_ci 501162306a36Sopenharmony_ci dout("lease_send_msg identry %p %s to mds%d\n", 501262306a36Sopenharmony_ci dentry, ceph_lease_op_name(action), session->s_mds); 501362306a36Sopenharmony_ci 501462306a36Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false); 501562306a36Sopenharmony_ci if (!msg) 501662306a36Sopenharmony_ci return; 501762306a36Sopenharmony_ci lease = msg->front.iov_base; 501862306a36Sopenharmony_ci lease->action = action; 501962306a36Sopenharmony_ci lease->seq = cpu_to_le32(seq); 502062306a36Sopenharmony_ci 502162306a36Sopenharmony_ci spin_lock(&dentry->d_lock); 502262306a36Sopenharmony_ci dir = d_inode(dentry->d_parent); 502362306a36Sopenharmony_ci lease->ino = cpu_to_le64(ceph_ino(dir)); 502462306a36Sopenharmony_ci lease->first = lease->last = cpu_to_le64(ceph_snap(dir)); 502562306a36Sopenharmony_ci 502662306a36Sopenharmony_ci put_unaligned_le32(dentry->d_name.len, lease + 1); 502762306a36Sopenharmony_ci memcpy((void *)(lease + 1) + 4, 502862306a36Sopenharmony_ci dentry->d_name.name, dentry->d_name.len); 502962306a36Sopenharmony_ci spin_unlock(&dentry->d_lock); 503062306a36Sopenharmony_ci 503162306a36Sopenharmony_ci ceph_con_send(&session->s_con, msg); 503262306a36Sopenharmony_ci} 503362306a36Sopenharmony_ci 503462306a36Sopenharmony_ci/* 503562306a36Sopenharmony_ci * lock unlock the session, to wait ongoing session activities 503662306a36Sopenharmony_ci */ 503762306a36Sopenharmony_cistatic void lock_unlock_session(struct ceph_mds_session *s) 503862306a36Sopenharmony_ci{ 503962306a36Sopenharmony_ci mutex_lock(&s->s_mutex); 504062306a36Sopenharmony_ci mutex_unlock(&s->s_mutex); 504162306a36Sopenharmony_ci} 504262306a36Sopenharmony_ci 504362306a36Sopenharmony_cistatic void maybe_recover_session(struct ceph_mds_client *mdsc) 504462306a36Sopenharmony_ci{ 504562306a36Sopenharmony_ci struct ceph_fs_client *fsc = mdsc->fsc; 504662306a36Sopenharmony_ci 504762306a36Sopenharmony_ci if (!ceph_test_mount_opt(fsc, CLEANRECOVER)) 504862306a36Sopenharmony_ci return; 504962306a36Sopenharmony_ci 505062306a36Sopenharmony_ci if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED) 505162306a36Sopenharmony_ci return; 505262306a36Sopenharmony_ci 505362306a36Sopenharmony_ci if (!READ_ONCE(fsc->blocklisted)) 505462306a36Sopenharmony_ci return; 505562306a36Sopenharmony_ci 505662306a36Sopenharmony_ci pr_info("auto reconnect after blocklisted\n"); 505762306a36Sopenharmony_ci ceph_force_reconnect(fsc->sb); 505862306a36Sopenharmony_ci} 505962306a36Sopenharmony_ci 506062306a36Sopenharmony_cibool check_session_state(struct ceph_mds_session *s) 506162306a36Sopenharmony_ci{ 506262306a36Sopenharmony_ci switch (s->s_state) { 506362306a36Sopenharmony_ci case CEPH_MDS_SESSION_OPEN: 506462306a36Sopenharmony_ci if (s->s_ttl && time_after(jiffies, s->s_ttl)) { 506562306a36Sopenharmony_ci s->s_state = CEPH_MDS_SESSION_HUNG; 506662306a36Sopenharmony_ci pr_info("mds%d hung\n", s->s_mds); 506762306a36Sopenharmony_ci } 506862306a36Sopenharmony_ci break; 506962306a36Sopenharmony_ci case CEPH_MDS_SESSION_CLOSING: 507062306a36Sopenharmony_ci case CEPH_MDS_SESSION_NEW: 507162306a36Sopenharmony_ci case CEPH_MDS_SESSION_RESTARTING: 507262306a36Sopenharmony_ci case CEPH_MDS_SESSION_CLOSED: 507362306a36Sopenharmony_ci case CEPH_MDS_SESSION_REJECTED: 507462306a36Sopenharmony_ci return false; 507562306a36Sopenharmony_ci } 507662306a36Sopenharmony_ci 507762306a36Sopenharmony_ci return true; 507862306a36Sopenharmony_ci} 507962306a36Sopenharmony_ci 508062306a36Sopenharmony_ci/* 508162306a36Sopenharmony_ci * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply, 508262306a36Sopenharmony_ci * then we need to retransmit that request. 508362306a36Sopenharmony_ci */ 508462306a36Sopenharmony_civoid inc_session_sequence(struct ceph_mds_session *s) 508562306a36Sopenharmony_ci{ 508662306a36Sopenharmony_ci lockdep_assert_held(&s->s_mutex); 508762306a36Sopenharmony_ci 508862306a36Sopenharmony_ci s->s_seq++; 508962306a36Sopenharmony_ci 509062306a36Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_CLOSING) { 509162306a36Sopenharmony_ci int ret; 509262306a36Sopenharmony_ci 509362306a36Sopenharmony_ci dout("resending session close request for mds%d\n", s->s_mds); 509462306a36Sopenharmony_ci ret = request_close_session(s); 509562306a36Sopenharmony_ci if (ret < 0) 509662306a36Sopenharmony_ci pr_err("unable to close session to mds%d: %d\n", 509762306a36Sopenharmony_ci s->s_mds, ret); 509862306a36Sopenharmony_ci } 509962306a36Sopenharmony_ci} 510062306a36Sopenharmony_ci 510162306a36Sopenharmony_ci/* 510262306a36Sopenharmony_ci * delayed work -- periodically trim expired leases, renew caps with mds. If 510362306a36Sopenharmony_ci * the @delay parameter is set to 0 or if it's more than 5 secs, the default 510462306a36Sopenharmony_ci * workqueue delay value of 5 secs will be used. 510562306a36Sopenharmony_ci */ 510662306a36Sopenharmony_cistatic void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay) 510762306a36Sopenharmony_ci{ 510862306a36Sopenharmony_ci unsigned long max_delay = HZ * 5; 510962306a36Sopenharmony_ci 511062306a36Sopenharmony_ci /* 5 secs default delay */ 511162306a36Sopenharmony_ci if (!delay || (delay > max_delay)) 511262306a36Sopenharmony_ci delay = max_delay; 511362306a36Sopenharmony_ci schedule_delayed_work(&mdsc->delayed_work, 511462306a36Sopenharmony_ci round_jiffies_relative(delay)); 511562306a36Sopenharmony_ci} 511662306a36Sopenharmony_ci 511762306a36Sopenharmony_cistatic void delayed_work(struct work_struct *work) 511862306a36Sopenharmony_ci{ 511962306a36Sopenharmony_ci struct ceph_mds_client *mdsc = 512062306a36Sopenharmony_ci container_of(work, struct ceph_mds_client, delayed_work.work); 512162306a36Sopenharmony_ci unsigned long delay; 512262306a36Sopenharmony_ci int renew_interval; 512362306a36Sopenharmony_ci int renew_caps; 512462306a36Sopenharmony_ci int i; 512562306a36Sopenharmony_ci 512662306a36Sopenharmony_ci dout("mdsc delayed_work\n"); 512762306a36Sopenharmony_ci 512862306a36Sopenharmony_ci if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) 512962306a36Sopenharmony_ci return; 513062306a36Sopenharmony_ci 513162306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 513262306a36Sopenharmony_ci renew_interval = mdsc->mdsmap->m_session_timeout >> 2; 513362306a36Sopenharmony_ci renew_caps = time_after_eq(jiffies, HZ*renew_interval + 513462306a36Sopenharmony_ci mdsc->last_renew_caps); 513562306a36Sopenharmony_ci if (renew_caps) 513662306a36Sopenharmony_ci mdsc->last_renew_caps = jiffies; 513762306a36Sopenharmony_ci 513862306a36Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 513962306a36Sopenharmony_ci struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); 514062306a36Sopenharmony_ci if (!s) 514162306a36Sopenharmony_ci continue; 514262306a36Sopenharmony_ci 514362306a36Sopenharmony_ci if (!check_session_state(s)) { 514462306a36Sopenharmony_ci ceph_put_mds_session(s); 514562306a36Sopenharmony_ci continue; 514662306a36Sopenharmony_ci } 514762306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 514862306a36Sopenharmony_ci 514962306a36Sopenharmony_ci mutex_lock(&s->s_mutex); 515062306a36Sopenharmony_ci if (renew_caps) 515162306a36Sopenharmony_ci send_renew_caps(mdsc, s); 515262306a36Sopenharmony_ci else 515362306a36Sopenharmony_ci ceph_con_keepalive(&s->s_con); 515462306a36Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_OPEN || 515562306a36Sopenharmony_ci s->s_state == CEPH_MDS_SESSION_HUNG) 515662306a36Sopenharmony_ci ceph_send_cap_releases(mdsc, s); 515762306a36Sopenharmony_ci mutex_unlock(&s->s_mutex); 515862306a36Sopenharmony_ci ceph_put_mds_session(s); 515962306a36Sopenharmony_ci 516062306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 516162306a36Sopenharmony_ci } 516262306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 516362306a36Sopenharmony_ci 516462306a36Sopenharmony_ci delay = ceph_check_delayed_caps(mdsc); 516562306a36Sopenharmony_ci 516662306a36Sopenharmony_ci ceph_queue_cap_reclaim_work(mdsc); 516762306a36Sopenharmony_ci 516862306a36Sopenharmony_ci ceph_trim_snapid_map(mdsc); 516962306a36Sopenharmony_ci 517062306a36Sopenharmony_ci maybe_recover_session(mdsc); 517162306a36Sopenharmony_ci 517262306a36Sopenharmony_ci schedule_delayed(mdsc, delay); 517362306a36Sopenharmony_ci} 517462306a36Sopenharmony_ci 517562306a36Sopenharmony_ciint ceph_mdsc_init(struct ceph_fs_client *fsc) 517662306a36Sopenharmony_ci 517762306a36Sopenharmony_ci{ 517862306a36Sopenharmony_ci struct ceph_mds_client *mdsc; 517962306a36Sopenharmony_ci int err; 518062306a36Sopenharmony_ci 518162306a36Sopenharmony_ci mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); 518262306a36Sopenharmony_ci if (!mdsc) 518362306a36Sopenharmony_ci return -ENOMEM; 518462306a36Sopenharmony_ci mdsc->fsc = fsc; 518562306a36Sopenharmony_ci mutex_init(&mdsc->mutex); 518662306a36Sopenharmony_ci mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); 518762306a36Sopenharmony_ci if (!mdsc->mdsmap) { 518862306a36Sopenharmony_ci err = -ENOMEM; 518962306a36Sopenharmony_ci goto err_mdsc; 519062306a36Sopenharmony_ci } 519162306a36Sopenharmony_ci 519262306a36Sopenharmony_ci init_completion(&mdsc->safe_umount_waiters); 519362306a36Sopenharmony_ci spin_lock_init(&mdsc->stopping_lock); 519462306a36Sopenharmony_ci atomic_set(&mdsc->stopping_blockers, 0); 519562306a36Sopenharmony_ci init_completion(&mdsc->stopping_waiter); 519662306a36Sopenharmony_ci init_waitqueue_head(&mdsc->session_close_wq); 519762306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->waiting_for_map); 519862306a36Sopenharmony_ci mdsc->quotarealms_inodes = RB_ROOT; 519962306a36Sopenharmony_ci mutex_init(&mdsc->quotarealms_inodes_mutex); 520062306a36Sopenharmony_ci init_rwsem(&mdsc->snap_rwsem); 520162306a36Sopenharmony_ci mdsc->snap_realms = RB_ROOT; 520262306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->snap_empty); 520362306a36Sopenharmony_ci spin_lock_init(&mdsc->snap_empty_lock); 520462306a36Sopenharmony_ci mdsc->request_tree = RB_ROOT; 520562306a36Sopenharmony_ci INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); 520662306a36Sopenharmony_ci mdsc->last_renew_caps = jiffies; 520762306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_delay_list); 520862306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_wait_list); 520962306a36Sopenharmony_ci spin_lock_init(&mdsc->cap_delay_lock); 521062306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->snap_flush_list); 521162306a36Sopenharmony_ci spin_lock_init(&mdsc->snap_flush_lock); 521262306a36Sopenharmony_ci mdsc->last_cap_flush_tid = 1; 521362306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_flush_list); 521462306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); 521562306a36Sopenharmony_ci spin_lock_init(&mdsc->cap_dirty_lock); 521662306a36Sopenharmony_ci init_waitqueue_head(&mdsc->cap_flushing_wq); 521762306a36Sopenharmony_ci INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work); 521862306a36Sopenharmony_ci err = ceph_metric_init(&mdsc->metric); 521962306a36Sopenharmony_ci if (err) 522062306a36Sopenharmony_ci goto err_mdsmap; 522162306a36Sopenharmony_ci 522262306a36Sopenharmony_ci spin_lock_init(&mdsc->dentry_list_lock); 522362306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->dentry_leases); 522462306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->dentry_dir_leases); 522562306a36Sopenharmony_ci 522662306a36Sopenharmony_ci ceph_caps_init(mdsc); 522762306a36Sopenharmony_ci ceph_adjust_caps_max_min(mdsc, fsc->mount_options); 522862306a36Sopenharmony_ci 522962306a36Sopenharmony_ci spin_lock_init(&mdsc->snapid_map_lock); 523062306a36Sopenharmony_ci mdsc->snapid_map_tree = RB_ROOT; 523162306a36Sopenharmony_ci INIT_LIST_HEAD(&mdsc->snapid_map_lru); 523262306a36Sopenharmony_ci 523362306a36Sopenharmony_ci init_rwsem(&mdsc->pool_perm_rwsem); 523462306a36Sopenharmony_ci mdsc->pool_perm_tree = RB_ROOT; 523562306a36Sopenharmony_ci 523662306a36Sopenharmony_ci strscpy(mdsc->nodename, utsname()->nodename, 523762306a36Sopenharmony_ci sizeof(mdsc->nodename)); 523862306a36Sopenharmony_ci 523962306a36Sopenharmony_ci fsc->mdsc = mdsc; 524062306a36Sopenharmony_ci return 0; 524162306a36Sopenharmony_ci 524262306a36Sopenharmony_cierr_mdsmap: 524362306a36Sopenharmony_ci kfree(mdsc->mdsmap); 524462306a36Sopenharmony_cierr_mdsc: 524562306a36Sopenharmony_ci kfree(mdsc); 524662306a36Sopenharmony_ci return err; 524762306a36Sopenharmony_ci} 524862306a36Sopenharmony_ci 524962306a36Sopenharmony_ci/* 525062306a36Sopenharmony_ci * Wait for safe replies on open mds requests. If we time out, drop 525162306a36Sopenharmony_ci * all requests from the tree to avoid dangling dentry refs. 525262306a36Sopenharmony_ci */ 525362306a36Sopenharmony_cistatic void wait_requests(struct ceph_mds_client *mdsc) 525462306a36Sopenharmony_ci{ 525562306a36Sopenharmony_ci struct ceph_options *opts = mdsc->fsc->client->options; 525662306a36Sopenharmony_ci struct ceph_mds_request *req; 525762306a36Sopenharmony_ci 525862306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 525962306a36Sopenharmony_ci if (__get_oldest_req(mdsc)) { 526062306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 526162306a36Sopenharmony_ci 526262306a36Sopenharmony_ci dout("wait_requests waiting for requests\n"); 526362306a36Sopenharmony_ci wait_for_completion_timeout(&mdsc->safe_umount_waiters, 526462306a36Sopenharmony_ci ceph_timeout_jiffies(opts->mount_timeout)); 526562306a36Sopenharmony_ci 526662306a36Sopenharmony_ci /* tear down remaining requests */ 526762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 526862306a36Sopenharmony_ci while ((req = __get_oldest_req(mdsc))) { 526962306a36Sopenharmony_ci dout("wait_requests timed out on tid %llu\n", 527062306a36Sopenharmony_ci req->r_tid); 527162306a36Sopenharmony_ci list_del_init(&req->r_wait); 527262306a36Sopenharmony_ci __unregister_request(mdsc, req); 527362306a36Sopenharmony_ci } 527462306a36Sopenharmony_ci } 527562306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 527662306a36Sopenharmony_ci dout("wait_requests done\n"); 527762306a36Sopenharmony_ci} 527862306a36Sopenharmony_ci 527962306a36Sopenharmony_civoid send_flush_mdlog(struct ceph_mds_session *s) 528062306a36Sopenharmony_ci{ 528162306a36Sopenharmony_ci struct ceph_msg *msg; 528262306a36Sopenharmony_ci 528362306a36Sopenharmony_ci /* 528462306a36Sopenharmony_ci * Pre-luminous MDS crashes when it sees an unknown session request 528562306a36Sopenharmony_ci */ 528662306a36Sopenharmony_ci if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS)) 528762306a36Sopenharmony_ci return; 528862306a36Sopenharmony_ci 528962306a36Sopenharmony_ci mutex_lock(&s->s_mutex); 529062306a36Sopenharmony_ci dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds, 529162306a36Sopenharmony_ci ceph_session_state_name(s->s_state), s->s_seq); 529262306a36Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, 529362306a36Sopenharmony_ci s->s_seq); 529462306a36Sopenharmony_ci if (!msg) { 529562306a36Sopenharmony_ci pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n", 529662306a36Sopenharmony_ci s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); 529762306a36Sopenharmony_ci } else { 529862306a36Sopenharmony_ci ceph_con_send(&s->s_con, msg); 529962306a36Sopenharmony_ci } 530062306a36Sopenharmony_ci mutex_unlock(&s->s_mutex); 530162306a36Sopenharmony_ci} 530262306a36Sopenharmony_ci 530362306a36Sopenharmony_ci/* 530462306a36Sopenharmony_ci * called before mount is ro, and before dentries are torn down. 530562306a36Sopenharmony_ci * (hmm, does this still race with new lookups?) 530662306a36Sopenharmony_ci */ 530762306a36Sopenharmony_civoid ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) 530862306a36Sopenharmony_ci{ 530962306a36Sopenharmony_ci dout("pre_umount\n"); 531062306a36Sopenharmony_ci mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; 531162306a36Sopenharmony_ci 531262306a36Sopenharmony_ci ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); 531362306a36Sopenharmony_ci ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); 531462306a36Sopenharmony_ci ceph_flush_dirty_caps(mdsc); 531562306a36Sopenharmony_ci wait_requests(mdsc); 531662306a36Sopenharmony_ci 531762306a36Sopenharmony_ci /* 531862306a36Sopenharmony_ci * wait for reply handlers to drop their request refs and 531962306a36Sopenharmony_ci * their inode/dcache refs 532062306a36Sopenharmony_ci */ 532162306a36Sopenharmony_ci ceph_msgr_flush(); 532262306a36Sopenharmony_ci 532362306a36Sopenharmony_ci ceph_cleanup_quotarealms_inodes(mdsc); 532462306a36Sopenharmony_ci} 532562306a36Sopenharmony_ci 532662306a36Sopenharmony_ci/* 532762306a36Sopenharmony_ci * flush the mdlog and wait for all write mds requests to flush. 532862306a36Sopenharmony_ci */ 532962306a36Sopenharmony_cistatic void flush_mdlog_and_wait_mdsc_unsafe_requests(struct ceph_mds_client *mdsc, 533062306a36Sopenharmony_ci u64 want_tid) 533162306a36Sopenharmony_ci{ 533262306a36Sopenharmony_ci struct ceph_mds_request *req = NULL, *nextreq; 533362306a36Sopenharmony_ci struct ceph_mds_session *last_session = NULL; 533462306a36Sopenharmony_ci struct rb_node *n; 533562306a36Sopenharmony_ci 533662306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 533762306a36Sopenharmony_ci dout("%s want %lld\n", __func__, want_tid); 533862306a36Sopenharmony_cirestart: 533962306a36Sopenharmony_ci req = __get_oldest_req(mdsc); 534062306a36Sopenharmony_ci while (req && req->r_tid <= want_tid) { 534162306a36Sopenharmony_ci /* find next request */ 534262306a36Sopenharmony_ci n = rb_next(&req->r_node); 534362306a36Sopenharmony_ci if (n) 534462306a36Sopenharmony_ci nextreq = rb_entry(n, struct ceph_mds_request, r_node); 534562306a36Sopenharmony_ci else 534662306a36Sopenharmony_ci nextreq = NULL; 534762306a36Sopenharmony_ci if (req->r_op != CEPH_MDS_OP_SETFILELOCK && 534862306a36Sopenharmony_ci (req->r_op & CEPH_MDS_OP_WRITE)) { 534962306a36Sopenharmony_ci struct ceph_mds_session *s = req->r_session; 535062306a36Sopenharmony_ci 535162306a36Sopenharmony_ci if (!s) { 535262306a36Sopenharmony_ci req = nextreq; 535362306a36Sopenharmony_ci continue; 535462306a36Sopenharmony_ci } 535562306a36Sopenharmony_ci 535662306a36Sopenharmony_ci /* write op */ 535762306a36Sopenharmony_ci ceph_mdsc_get_request(req); 535862306a36Sopenharmony_ci if (nextreq) 535962306a36Sopenharmony_ci ceph_mdsc_get_request(nextreq); 536062306a36Sopenharmony_ci s = ceph_get_mds_session(s); 536162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 536262306a36Sopenharmony_ci 536362306a36Sopenharmony_ci /* send flush mdlog request to MDS */ 536462306a36Sopenharmony_ci if (last_session != s) { 536562306a36Sopenharmony_ci send_flush_mdlog(s); 536662306a36Sopenharmony_ci ceph_put_mds_session(last_session); 536762306a36Sopenharmony_ci last_session = s; 536862306a36Sopenharmony_ci } else { 536962306a36Sopenharmony_ci ceph_put_mds_session(s); 537062306a36Sopenharmony_ci } 537162306a36Sopenharmony_ci dout("%s wait on %llu (want %llu)\n", __func__, 537262306a36Sopenharmony_ci req->r_tid, want_tid); 537362306a36Sopenharmony_ci wait_for_completion(&req->r_safe_completion); 537462306a36Sopenharmony_ci 537562306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 537662306a36Sopenharmony_ci ceph_mdsc_put_request(req); 537762306a36Sopenharmony_ci if (!nextreq) 537862306a36Sopenharmony_ci break; /* next dne before, so we're done! */ 537962306a36Sopenharmony_ci if (RB_EMPTY_NODE(&nextreq->r_node)) { 538062306a36Sopenharmony_ci /* next request was removed from tree */ 538162306a36Sopenharmony_ci ceph_mdsc_put_request(nextreq); 538262306a36Sopenharmony_ci goto restart; 538362306a36Sopenharmony_ci } 538462306a36Sopenharmony_ci ceph_mdsc_put_request(nextreq); /* won't go away */ 538562306a36Sopenharmony_ci } 538662306a36Sopenharmony_ci req = nextreq; 538762306a36Sopenharmony_ci } 538862306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 538962306a36Sopenharmony_ci ceph_put_mds_session(last_session); 539062306a36Sopenharmony_ci dout("%s done\n", __func__); 539162306a36Sopenharmony_ci} 539262306a36Sopenharmony_ci 539362306a36Sopenharmony_civoid ceph_mdsc_sync(struct ceph_mds_client *mdsc) 539462306a36Sopenharmony_ci{ 539562306a36Sopenharmony_ci u64 want_tid, want_flush; 539662306a36Sopenharmony_ci 539762306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) 539862306a36Sopenharmony_ci return; 539962306a36Sopenharmony_ci 540062306a36Sopenharmony_ci dout("sync\n"); 540162306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 540262306a36Sopenharmony_ci want_tid = mdsc->last_tid; 540362306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 540462306a36Sopenharmony_ci 540562306a36Sopenharmony_ci ceph_flush_dirty_caps(mdsc); 540662306a36Sopenharmony_ci spin_lock(&mdsc->cap_dirty_lock); 540762306a36Sopenharmony_ci want_flush = mdsc->last_cap_flush_tid; 540862306a36Sopenharmony_ci if (!list_empty(&mdsc->cap_flush_list)) { 540962306a36Sopenharmony_ci struct ceph_cap_flush *cf = 541062306a36Sopenharmony_ci list_last_entry(&mdsc->cap_flush_list, 541162306a36Sopenharmony_ci struct ceph_cap_flush, g_list); 541262306a36Sopenharmony_ci cf->wake = true; 541362306a36Sopenharmony_ci } 541462306a36Sopenharmony_ci spin_unlock(&mdsc->cap_dirty_lock); 541562306a36Sopenharmony_ci 541662306a36Sopenharmony_ci dout("sync want tid %lld flush_seq %lld\n", 541762306a36Sopenharmony_ci want_tid, want_flush); 541862306a36Sopenharmony_ci 541962306a36Sopenharmony_ci flush_mdlog_and_wait_mdsc_unsafe_requests(mdsc, want_tid); 542062306a36Sopenharmony_ci wait_caps_flush(mdsc, want_flush); 542162306a36Sopenharmony_ci} 542262306a36Sopenharmony_ci 542362306a36Sopenharmony_ci/* 542462306a36Sopenharmony_ci * true if all sessions are closed, or we force unmount 542562306a36Sopenharmony_ci */ 542662306a36Sopenharmony_cistatic bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) 542762306a36Sopenharmony_ci{ 542862306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) 542962306a36Sopenharmony_ci return true; 543062306a36Sopenharmony_ci return atomic_read(&mdsc->num_sessions) <= skipped; 543162306a36Sopenharmony_ci} 543262306a36Sopenharmony_ci 543362306a36Sopenharmony_ci/* 543462306a36Sopenharmony_ci * called after sb is ro or when metadata corrupted. 543562306a36Sopenharmony_ci */ 543662306a36Sopenharmony_civoid ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) 543762306a36Sopenharmony_ci{ 543862306a36Sopenharmony_ci struct ceph_options *opts = mdsc->fsc->client->options; 543962306a36Sopenharmony_ci struct ceph_mds_session *session; 544062306a36Sopenharmony_ci int i; 544162306a36Sopenharmony_ci int skipped = 0; 544262306a36Sopenharmony_ci 544362306a36Sopenharmony_ci dout("close_sessions\n"); 544462306a36Sopenharmony_ci 544562306a36Sopenharmony_ci /* close sessions */ 544662306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 544762306a36Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 544862306a36Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, i); 544962306a36Sopenharmony_ci if (!session) 545062306a36Sopenharmony_ci continue; 545162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 545262306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 545362306a36Sopenharmony_ci if (__close_session(mdsc, session) <= 0) 545462306a36Sopenharmony_ci skipped++; 545562306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 545662306a36Sopenharmony_ci ceph_put_mds_session(session); 545762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 545862306a36Sopenharmony_ci } 545962306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 546062306a36Sopenharmony_ci 546162306a36Sopenharmony_ci dout("waiting for sessions to close\n"); 546262306a36Sopenharmony_ci wait_event_timeout(mdsc->session_close_wq, 546362306a36Sopenharmony_ci done_closing_sessions(mdsc, skipped), 546462306a36Sopenharmony_ci ceph_timeout_jiffies(opts->mount_timeout)); 546562306a36Sopenharmony_ci 546662306a36Sopenharmony_ci /* tear down remaining sessions */ 546762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 546862306a36Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 546962306a36Sopenharmony_ci if (mdsc->sessions[i]) { 547062306a36Sopenharmony_ci session = ceph_get_mds_session(mdsc->sessions[i]); 547162306a36Sopenharmony_ci __unregister_session(mdsc, session); 547262306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 547362306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 547462306a36Sopenharmony_ci remove_session_caps(session); 547562306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 547662306a36Sopenharmony_ci ceph_put_mds_session(session); 547762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 547862306a36Sopenharmony_ci } 547962306a36Sopenharmony_ci } 548062306a36Sopenharmony_ci WARN_ON(!list_empty(&mdsc->cap_delay_list)); 548162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 548262306a36Sopenharmony_ci 548362306a36Sopenharmony_ci ceph_cleanup_snapid_map(mdsc); 548462306a36Sopenharmony_ci ceph_cleanup_global_and_empty_realms(mdsc); 548562306a36Sopenharmony_ci 548662306a36Sopenharmony_ci cancel_work_sync(&mdsc->cap_reclaim_work); 548762306a36Sopenharmony_ci cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ 548862306a36Sopenharmony_ci 548962306a36Sopenharmony_ci dout("stopped\n"); 549062306a36Sopenharmony_ci} 549162306a36Sopenharmony_ci 549262306a36Sopenharmony_civoid ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) 549362306a36Sopenharmony_ci{ 549462306a36Sopenharmony_ci struct ceph_mds_session *session; 549562306a36Sopenharmony_ci int mds; 549662306a36Sopenharmony_ci 549762306a36Sopenharmony_ci dout("force umount\n"); 549862306a36Sopenharmony_ci 549962306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 550062306a36Sopenharmony_ci for (mds = 0; mds < mdsc->max_sessions; mds++) { 550162306a36Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, mds); 550262306a36Sopenharmony_ci if (!session) 550362306a36Sopenharmony_ci continue; 550462306a36Sopenharmony_ci 550562306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_REJECTED) 550662306a36Sopenharmony_ci __unregister_session(mdsc, session); 550762306a36Sopenharmony_ci __wake_requests(mdsc, &session->s_waiting); 550862306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 550962306a36Sopenharmony_ci 551062306a36Sopenharmony_ci mutex_lock(&session->s_mutex); 551162306a36Sopenharmony_ci __close_session(mdsc, session); 551262306a36Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_CLOSING) { 551362306a36Sopenharmony_ci cleanup_session_requests(mdsc, session); 551462306a36Sopenharmony_ci remove_session_caps(session); 551562306a36Sopenharmony_ci } 551662306a36Sopenharmony_ci mutex_unlock(&session->s_mutex); 551762306a36Sopenharmony_ci ceph_put_mds_session(session); 551862306a36Sopenharmony_ci 551962306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 552062306a36Sopenharmony_ci kick_requests(mdsc, mds); 552162306a36Sopenharmony_ci } 552262306a36Sopenharmony_ci __wake_requests(mdsc, &mdsc->waiting_for_map); 552362306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 552462306a36Sopenharmony_ci} 552562306a36Sopenharmony_ci 552662306a36Sopenharmony_cistatic void ceph_mdsc_stop(struct ceph_mds_client *mdsc) 552762306a36Sopenharmony_ci{ 552862306a36Sopenharmony_ci dout("stop\n"); 552962306a36Sopenharmony_ci /* 553062306a36Sopenharmony_ci * Make sure the delayed work stopped before releasing 553162306a36Sopenharmony_ci * the resources. 553262306a36Sopenharmony_ci * 553362306a36Sopenharmony_ci * Because the cancel_delayed_work_sync() will only 553462306a36Sopenharmony_ci * guarantee that the work finishes executing. But the 553562306a36Sopenharmony_ci * delayed work will re-arm itself again after that. 553662306a36Sopenharmony_ci */ 553762306a36Sopenharmony_ci flush_delayed_work(&mdsc->delayed_work); 553862306a36Sopenharmony_ci 553962306a36Sopenharmony_ci if (mdsc->mdsmap) 554062306a36Sopenharmony_ci ceph_mdsmap_destroy(mdsc->mdsmap); 554162306a36Sopenharmony_ci kfree(mdsc->sessions); 554262306a36Sopenharmony_ci ceph_caps_finalize(mdsc); 554362306a36Sopenharmony_ci ceph_pool_perm_destroy(mdsc); 554462306a36Sopenharmony_ci} 554562306a36Sopenharmony_ci 554662306a36Sopenharmony_civoid ceph_mdsc_destroy(struct ceph_fs_client *fsc) 554762306a36Sopenharmony_ci{ 554862306a36Sopenharmony_ci struct ceph_mds_client *mdsc = fsc->mdsc; 554962306a36Sopenharmony_ci dout("mdsc_destroy %p\n", mdsc); 555062306a36Sopenharmony_ci 555162306a36Sopenharmony_ci if (!mdsc) 555262306a36Sopenharmony_ci return; 555362306a36Sopenharmony_ci 555462306a36Sopenharmony_ci /* flush out any connection work with references to us */ 555562306a36Sopenharmony_ci ceph_msgr_flush(); 555662306a36Sopenharmony_ci 555762306a36Sopenharmony_ci ceph_mdsc_stop(mdsc); 555862306a36Sopenharmony_ci 555962306a36Sopenharmony_ci ceph_metric_destroy(&mdsc->metric); 556062306a36Sopenharmony_ci 556162306a36Sopenharmony_ci fsc->mdsc = NULL; 556262306a36Sopenharmony_ci kfree(mdsc); 556362306a36Sopenharmony_ci dout("mdsc_destroy %p done\n", mdsc); 556462306a36Sopenharmony_ci} 556562306a36Sopenharmony_ci 556662306a36Sopenharmony_civoid ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) 556762306a36Sopenharmony_ci{ 556862306a36Sopenharmony_ci struct ceph_fs_client *fsc = mdsc->fsc; 556962306a36Sopenharmony_ci const char *mds_namespace = fsc->mount_options->mds_namespace; 557062306a36Sopenharmony_ci void *p = msg->front.iov_base; 557162306a36Sopenharmony_ci void *end = p + msg->front.iov_len; 557262306a36Sopenharmony_ci u32 epoch; 557362306a36Sopenharmony_ci u32 num_fs; 557462306a36Sopenharmony_ci u32 mount_fscid = (u32)-1; 557562306a36Sopenharmony_ci int err = -EINVAL; 557662306a36Sopenharmony_ci 557762306a36Sopenharmony_ci ceph_decode_need(&p, end, sizeof(u32), bad); 557862306a36Sopenharmony_ci epoch = ceph_decode_32(&p); 557962306a36Sopenharmony_ci 558062306a36Sopenharmony_ci dout("handle_fsmap epoch %u\n", epoch); 558162306a36Sopenharmony_ci 558262306a36Sopenharmony_ci /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */ 558362306a36Sopenharmony_ci ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad); 558462306a36Sopenharmony_ci 558562306a36Sopenharmony_ci ceph_decode_32_safe(&p, end, num_fs, bad); 558662306a36Sopenharmony_ci while (num_fs-- > 0) { 558762306a36Sopenharmony_ci void *info_p, *info_end; 558862306a36Sopenharmony_ci u32 info_len; 558962306a36Sopenharmony_ci u32 fscid, namelen; 559062306a36Sopenharmony_ci 559162306a36Sopenharmony_ci ceph_decode_need(&p, end, 2 + sizeof(u32), bad); 559262306a36Sopenharmony_ci p += 2; // info_v, info_cv 559362306a36Sopenharmony_ci info_len = ceph_decode_32(&p); 559462306a36Sopenharmony_ci ceph_decode_need(&p, end, info_len, bad); 559562306a36Sopenharmony_ci info_p = p; 559662306a36Sopenharmony_ci info_end = p + info_len; 559762306a36Sopenharmony_ci p = info_end; 559862306a36Sopenharmony_ci 559962306a36Sopenharmony_ci ceph_decode_need(&info_p, info_end, sizeof(u32) * 2, bad); 560062306a36Sopenharmony_ci fscid = ceph_decode_32(&info_p); 560162306a36Sopenharmony_ci namelen = ceph_decode_32(&info_p); 560262306a36Sopenharmony_ci ceph_decode_need(&info_p, info_end, namelen, bad); 560362306a36Sopenharmony_ci 560462306a36Sopenharmony_ci if (mds_namespace && 560562306a36Sopenharmony_ci strlen(mds_namespace) == namelen && 560662306a36Sopenharmony_ci !strncmp(mds_namespace, (char *)info_p, namelen)) { 560762306a36Sopenharmony_ci mount_fscid = fscid; 560862306a36Sopenharmony_ci break; 560962306a36Sopenharmony_ci } 561062306a36Sopenharmony_ci } 561162306a36Sopenharmony_ci 561262306a36Sopenharmony_ci ceph_monc_got_map(&fsc->client->monc, CEPH_SUB_FSMAP, epoch); 561362306a36Sopenharmony_ci if (mount_fscid != (u32)-1) { 561462306a36Sopenharmony_ci fsc->client->monc.fs_cluster_id = mount_fscid; 561562306a36Sopenharmony_ci ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 561662306a36Sopenharmony_ci 0, true); 561762306a36Sopenharmony_ci ceph_monc_renew_subs(&fsc->client->monc); 561862306a36Sopenharmony_ci } else { 561962306a36Sopenharmony_ci err = -ENOENT; 562062306a36Sopenharmony_ci goto err_out; 562162306a36Sopenharmony_ci } 562262306a36Sopenharmony_ci return; 562362306a36Sopenharmony_ci 562462306a36Sopenharmony_cibad: 562562306a36Sopenharmony_ci pr_err("error decoding fsmap %d. Shutting down mount.\n", err); 562662306a36Sopenharmony_ci ceph_umount_begin(mdsc->fsc->sb); 562762306a36Sopenharmony_ci ceph_msg_dump(msg); 562862306a36Sopenharmony_cierr_out: 562962306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 563062306a36Sopenharmony_ci mdsc->mdsmap_err = err; 563162306a36Sopenharmony_ci __wake_requests(mdsc, &mdsc->waiting_for_map); 563262306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 563362306a36Sopenharmony_ci} 563462306a36Sopenharmony_ci 563562306a36Sopenharmony_ci/* 563662306a36Sopenharmony_ci * handle mds map update. 563762306a36Sopenharmony_ci */ 563862306a36Sopenharmony_civoid ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) 563962306a36Sopenharmony_ci{ 564062306a36Sopenharmony_ci u32 epoch; 564162306a36Sopenharmony_ci u32 maplen; 564262306a36Sopenharmony_ci void *p = msg->front.iov_base; 564362306a36Sopenharmony_ci void *end = p + msg->front.iov_len; 564462306a36Sopenharmony_ci struct ceph_mdsmap *newmap, *oldmap; 564562306a36Sopenharmony_ci struct ceph_fsid fsid; 564662306a36Sopenharmony_ci int err = -EINVAL; 564762306a36Sopenharmony_ci 564862306a36Sopenharmony_ci ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); 564962306a36Sopenharmony_ci ceph_decode_copy(&p, &fsid, sizeof(fsid)); 565062306a36Sopenharmony_ci if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) 565162306a36Sopenharmony_ci return; 565262306a36Sopenharmony_ci epoch = ceph_decode_32(&p); 565362306a36Sopenharmony_ci maplen = ceph_decode_32(&p); 565462306a36Sopenharmony_ci dout("handle_map epoch %u len %d\n", epoch, (int)maplen); 565562306a36Sopenharmony_ci 565662306a36Sopenharmony_ci /* do we need it? */ 565762306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 565862306a36Sopenharmony_ci if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { 565962306a36Sopenharmony_ci dout("handle_map epoch %u <= our %u\n", 566062306a36Sopenharmony_ci epoch, mdsc->mdsmap->m_epoch); 566162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 566262306a36Sopenharmony_ci return; 566362306a36Sopenharmony_ci } 566462306a36Sopenharmony_ci 566562306a36Sopenharmony_ci newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client)); 566662306a36Sopenharmony_ci if (IS_ERR(newmap)) { 566762306a36Sopenharmony_ci err = PTR_ERR(newmap); 566862306a36Sopenharmony_ci goto bad_unlock; 566962306a36Sopenharmony_ci } 567062306a36Sopenharmony_ci 567162306a36Sopenharmony_ci /* swap into place */ 567262306a36Sopenharmony_ci if (mdsc->mdsmap) { 567362306a36Sopenharmony_ci oldmap = mdsc->mdsmap; 567462306a36Sopenharmony_ci mdsc->mdsmap = newmap; 567562306a36Sopenharmony_ci check_new_map(mdsc, newmap, oldmap); 567662306a36Sopenharmony_ci ceph_mdsmap_destroy(oldmap); 567762306a36Sopenharmony_ci } else { 567862306a36Sopenharmony_ci mdsc->mdsmap = newmap; /* first mds map */ 567962306a36Sopenharmony_ci } 568062306a36Sopenharmony_ci mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size, 568162306a36Sopenharmony_ci MAX_LFS_FILESIZE); 568262306a36Sopenharmony_ci 568362306a36Sopenharmony_ci __wake_requests(mdsc, &mdsc->waiting_for_map); 568462306a36Sopenharmony_ci ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP, 568562306a36Sopenharmony_ci mdsc->mdsmap->m_epoch); 568662306a36Sopenharmony_ci 568762306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 568862306a36Sopenharmony_ci schedule_delayed(mdsc, 0); 568962306a36Sopenharmony_ci return; 569062306a36Sopenharmony_ci 569162306a36Sopenharmony_cibad_unlock: 569262306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 569362306a36Sopenharmony_cibad: 569462306a36Sopenharmony_ci pr_err("error decoding mdsmap %d. Shutting down mount.\n", err); 569562306a36Sopenharmony_ci ceph_umount_begin(mdsc->fsc->sb); 569662306a36Sopenharmony_ci ceph_msg_dump(msg); 569762306a36Sopenharmony_ci return; 569862306a36Sopenharmony_ci} 569962306a36Sopenharmony_ci 570062306a36Sopenharmony_cistatic struct ceph_connection *mds_get_con(struct ceph_connection *con) 570162306a36Sopenharmony_ci{ 570262306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 570362306a36Sopenharmony_ci 570462306a36Sopenharmony_ci if (ceph_get_mds_session(s)) 570562306a36Sopenharmony_ci return con; 570662306a36Sopenharmony_ci return NULL; 570762306a36Sopenharmony_ci} 570862306a36Sopenharmony_ci 570962306a36Sopenharmony_cistatic void mds_put_con(struct ceph_connection *con) 571062306a36Sopenharmony_ci{ 571162306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 571262306a36Sopenharmony_ci 571362306a36Sopenharmony_ci ceph_put_mds_session(s); 571462306a36Sopenharmony_ci} 571562306a36Sopenharmony_ci 571662306a36Sopenharmony_ci/* 571762306a36Sopenharmony_ci * if the client is unresponsive for long enough, the mds will kill 571862306a36Sopenharmony_ci * the session entirely. 571962306a36Sopenharmony_ci */ 572062306a36Sopenharmony_cistatic void mds_peer_reset(struct ceph_connection *con) 572162306a36Sopenharmony_ci{ 572262306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 572362306a36Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 572462306a36Sopenharmony_ci 572562306a36Sopenharmony_ci pr_warn("mds%d closed our session\n", s->s_mds); 572662306a36Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) 572762306a36Sopenharmony_ci send_mds_reconnect(mdsc, s); 572862306a36Sopenharmony_ci} 572962306a36Sopenharmony_ci 573062306a36Sopenharmony_cistatic void mds_dispatch(struct ceph_connection *con, struct ceph_msg *msg) 573162306a36Sopenharmony_ci{ 573262306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 573362306a36Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 573462306a36Sopenharmony_ci int type = le16_to_cpu(msg->hdr.type); 573562306a36Sopenharmony_ci 573662306a36Sopenharmony_ci mutex_lock(&mdsc->mutex); 573762306a36Sopenharmony_ci if (__verify_registered_session(mdsc, s) < 0) { 573862306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 573962306a36Sopenharmony_ci goto out; 574062306a36Sopenharmony_ci } 574162306a36Sopenharmony_ci mutex_unlock(&mdsc->mutex); 574262306a36Sopenharmony_ci 574362306a36Sopenharmony_ci switch (type) { 574462306a36Sopenharmony_ci case CEPH_MSG_MDS_MAP: 574562306a36Sopenharmony_ci ceph_mdsc_handle_mdsmap(mdsc, msg); 574662306a36Sopenharmony_ci break; 574762306a36Sopenharmony_ci case CEPH_MSG_FS_MAP_USER: 574862306a36Sopenharmony_ci ceph_mdsc_handle_fsmap(mdsc, msg); 574962306a36Sopenharmony_ci break; 575062306a36Sopenharmony_ci case CEPH_MSG_CLIENT_SESSION: 575162306a36Sopenharmony_ci handle_session(s, msg); 575262306a36Sopenharmony_ci break; 575362306a36Sopenharmony_ci case CEPH_MSG_CLIENT_REPLY: 575462306a36Sopenharmony_ci handle_reply(s, msg); 575562306a36Sopenharmony_ci break; 575662306a36Sopenharmony_ci case CEPH_MSG_CLIENT_REQUEST_FORWARD: 575762306a36Sopenharmony_ci handle_forward(mdsc, s, msg); 575862306a36Sopenharmony_ci break; 575962306a36Sopenharmony_ci case CEPH_MSG_CLIENT_CAPS: 576062306a36Sopenharmony_ci ceph_handle_caps(s, msg); 576162306a36Sopenharmony_ci break; 576262306a36Sopenharmony_ci case CEPH_MSG_CLIENT_SNAP: 576362306a36Sopenharmony_ci ceph_handle_snap(mdsc, s, msg); 576462306a36Sopenharmony_ci break; 576562306a36Sopenharmony_ci case CEPH_MSG_CLIENT_LEASE: 576662306a36Sopenharmony_ci handle_lease(mdsc, s, msg); 576762306a36Sopenharmony_ci break; 576862306a36Sopenharmony_ci case CEPH_MSG_CLIENT_QUOTA: 576962306a36Sopenharmony_ci ceph_handle_quota(mdsc, s, msg); 577062306a36Sopenharmony_ci break; 577162306a36Sopenharmony_ci 577262306a36Sopenharmony_ci default: 577362306a36Sopenharmony_ci pr_err("received unknown message type %d %s\n", type, 577462306a36Sopenharmony_ci ceph_msg_type_name(type)); 577562306a36Sopenharmony_ci } 577662306a36Sopenharmony_ciout: 577762306a36Sopenharmony_ci ceph_msg_put(msg); 577862306a36Sopenharmony_ci} 577962306a36Sopenharmony_ci 578062306a36Sopenharmony_ci/* 578162306a36Sopenharmony_ci * authentication 578262306a36Sopenharmony_ci */ 578362306a36Sopenharmony_ci 578462306a36Sopenharmony_ci/* 578562306a36Sopenharmony_ci * Note: returned pointer is the address of a structure that's 578662306a36Sopenharmony_ci * managed separately. Caller must *not* attempt to free it. 578762306a36Sopenharmony_ci */ 578862306a36Sopenharmony_cistatic struct ceph_auth_handshake * 578962306a36Sopenharmony_cimds_get_authorizer(struct ceph_connection *con, int *proto, int force_new) 579062306a36Sopenharmony_ci{ 579162306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 579262306a36Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 579362306a36Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 579462306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 579562306a36Sopenharmony_ci int ret; 579662306a36Sopenharmony_ci 579762306a36Sopenharmony_ci ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS, 579862306a36Sopenharmony_ci force_new, proto, NULL, NULL); 579962306a36Sopenharmony_ci if (ret) 580062306a36Sopenharmony_ci return ERR_PTR(ret); 580162306a36Sopenharmony_ci 580262306a36Sopenharmony_ci return auth; 580362306a36Sopenharmony_ci} 580462306a36Sopenharmony_ci 580562306a36Sopenharmony_cistatic int mds_add_authorizer_challenge(struct ceph_connection *con, 580662306a36Sopenharmony_ci void *challenge_buf, int challenge_buf_len) 580762306a36Sopenharmony_ci{ 580862306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 580962306a36Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 581062306a36Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 581162306a36Sopenharmony_ci 581262306a36Sopenharmony_ci return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, 581362306a36Sopenharmony_ci challenge_buf, challenge_buf_len); 581462306a36Sopenharmony_ci} 581562306a36Sopenharmony_ci 581662306a36Sopenharmony_cistatic int mds_verify_authorizer_reply(struct ceph_connection *con) 581762306a36Sopenharmony_ci{ 581862306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 581962306a36Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 582062306a36Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 582162306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 582262306a36Sopenharmony_ci 582362306a36Sopenharmony_ci return ceph_auth_verify_authorizer_reply(ac, auth->authorizer, 582462306a36Sopenharmony_ci auth->authorizer_reply_buf, auth->authorizer_reply_buf_len, 582562306a36Sopenharmony_ci NULL, NULL, NULL, NULL); 582662306a36Sopenharmony_ci} 582762306a36Sopenharmony_ci 582862306a36Sopenharmony_cistatic int mds_invalidate_authorizer(struct ceph_connection *con) 582962306a36Sopenharmony_ci{ 583062306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 583162306a36Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 583262306a36Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 583362306a36Sopenharmony_ci 583462306a36Sopenharmony_ci ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); 583562306a36Sopenharmony_ci 583662306a36Sopenharmony_ci return ceph_monc_validate_auth(&mdsc->fsc->client->monc); 583762306a36Sopenharmony_ci} 583862306a36Sopenharmony_ci 583962306a36Sopenharmony_cistatic int mds_get_auth_request(struct ceph_connection *con, 584062306a36Sopenharmony_ci void *buf, int *buf_len, 584162306a36Sopenharmony_ci void **authorizer, int *authorizer_len) 584262306a36Sopenharmony_ci{ 584362306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 584462306a36Sopenharmony_ci struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth; 584562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 584662306a36Sopenharmony_ci int ret; 584762306a36Sopenharmony_ci 584862306a36Sopenharmony_ci ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS, 584962306a36Sopenharmony_ci buf, buf_len); 585062306a36Sopenharmony_ci if (ret) 585162306a36Sopenharmony_ci return ret; 585262306a36Sopenharmony_ci 585362306a36Sopenharmony_ci *authorizer = auth->authorizer_buf; 585462306a36Sopenharmony_ci *authorizer_len = auth->authorizer_buf_len; 585562306a36Sopenharmony_ci return 0; 585662306a36Sopenharmony_ci} 585762306a36Sopenharmony_ci 585862306a36Sopenharmony_cistatic int mds_handle_auth_reply_more(struct ceph_connection *con, 585962306a36Sopenharmony_ci void *reply, int reply_len, 586062306a36Sopenharmony_ci void *buf, int *buf_len, 586162306a36Sopenharmony_ci void **authorizer, int *authorizer_len) 586262306a36Sopenharmony_ci{ 586362306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 586462306a36Sopenharmony_ci struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth; 586562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 586662306a36Sopenharmony_ci int ret; 586762306a36Sopenharmony_ci 586862306a36Sopenharmony_ci ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len, 586962306a36Sopenharmony_ci buf, buf_len); 587062306a36Sopenharmony_ci if (ret) 587162306a36Sopenharmony_ci return ret; 587262306a36Sopenharmony_ci 587362306a36Sopenharmony_ci *authorizer = auth->authorizer_buf; 587462306a36Sopenharmony_ci *authorizer_len = auth->authorizer_buf_len; 587562306a36Sopenharmony_ci return 0; 587662306a36Sopenharmony_ci} 587762306a36Sopenharmony_ci 587862306a36Sopenharmony_cistatic int mds_handle_auth_done(struct ceph_connection *con, 587962306a36Sopenharmony_ci u64 global_id, void *reply, int reply_len, 588062306a36Sopenharmony_ci u8 *session_key, int *session_key_len, 588162306a36Sopenharmony_ci u8 *con_secret, int *con_secret_len) 588262306a36Sopenharmony_ci{ 588362306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 588462306a36Sopenharmony_ci struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth; 588562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 588662306a36Sopenharmony_ci 588762306a36Sopenharmony_ci return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len, 588862306a36Sopenharmony_ci session_key, session_key_len, 588962306a36Sopenharmony_ci con_secret, con_secret_len); 589062306a36Sopenharmony_ci} 589162306a36Sopenharmony_ci 589262306a36Sopenharmony_cistatic int mds_handle_auth_bad_method(struct ceph_connection *con, 589362306a36Sopenharmony_ci int used_proto, int result, 589462306a36Sopenharmony_ci const int *allowed_protos, int proto_cnt, 589562306a36Sopenharmony_ci const int *allowed_modes, int mode_cnt) 589662306a36Sopenharmony_ci{ 589762306a36Sopenharmony_ci struct ceph_mds_session *s = con->private; 589862306a36Sopenharmony_ci struct ceph_mon_client *monc = &s->s_mdsc->fsc->client->monc; 589962306a36Sopenharmony_ci int ret; 590062306a36Sopenharmony_ci 590162306a36Sopenharmony_ci if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_MDS, 590262306a36Sopenharmony_ci used_proto, result, 590362306a36Sopenharmony_ci allowed_protos, proto_cnt, 590462306a36Sopenharmony_ci allowed_modes, mode_cnt)) { 590562306a36Sopenharmony_ci ret = ceph_monc_validate_auth(monc); 590662306a36Sopenharmony_ci if (ret) 590762306a36Sopenharmony_ci return ret; 590862306a36Sopenharmony_ci } 590962306a36Sopenharmony_ci 591062306a36Sopenharmony_ci return -EACCES; 591162306a36Sopenharmony_ci} 591262306a36Sopenharmony_ci 591362306a36Sopenharmony_cistatic struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, 591462306a36Sopenharmony_ci struct ceph_msg_header *hdr, int *skip) 591562306a36Sopenharmony_ci{ 591662306a36Sopenharmony_ci struct ceph_msg *msg; 591762306a36Sopenharmony_ci int type = (int) le16_to_cpu(hdr->type); 591862306a36Sopenharmony_ci int front_len = (int) le32_to_cpu(hdr->front_len); 591962306a36Sopenharmony_ci 592062306a36Sopenharmony_ci if (con->in_msg) 592162306a36Sopenharmony_ci return con->in_msg; 592262306a36Sopenharmony_ci 592362306a36Sopenharmony_ci *skip = 0; 592462306a36Sopenharmony_ci msg = ceph_msg_new(type, front_len, GFP_NOFS, false); 592562306a36Sopenharmony_ci if (!msg) { 592662306a36Sopenharmony_ci pr_err("unable to allocate msg type %d len %d\n", 592762306a36Sopenharmony_ci type, front_len); 592862306a36Sopenharmony_ci return NULL; 592962306a36Sopenharmony_ci } 593062306a36Sopenharmony_ci 593162306a36Sopenharmony_ci return msg; 593262306a36Sopenharmony_ci} 593362306a36Sopenharmony_ci 593462306a36Sopenharmony_cistatic int mds_sign_message(struct ceph_msg *msg) 593562306a36Sopenharmony_ci{ 593662306a36Sopenharmony_ci struct ceph_mds_session *s = msg->con->private; 593762306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 593862306a36Sopenharmony_ci 593962306a36Sopenharmony_ci return ceph_auth_sign_message(auth, msg); 594062306a36Sopenharmony_ci} 594162306a36Sopenharmony_ci 594262306a36Sopenharmony_cistatic int mds_check_message_signature(struct ceph_msg *msg) 594362306a36Sopenharmony_ci{ 594462306a36Sopenharmony_ci struct ceph_mds_session *s = msg->con->private; 594562306a36Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 594662306a36Sopenharmony_ci 594762306a36Sopenharmony_ci return ceph_auth_check_message_signature(auth, msg); 594862306a36Sopenharmony_ci} 594962306a36Sopenharmony_ci 595062306a36Sopenharmony_cistatic const struct ceph_connection_operations mds_con_ops = { 595162306a36Sopenharmony_ci .get = mds_get_con, 595262306a36Sopenharmony_ci .put = mds_put_con, 595362306a36Sopenharmony_ci .alloc_msg = mds_alloc_msg, 595462306a36Sopenharmony_ci .dispatch = mds_dispatch, 595562306a36Sopenharmony_ci .peer_reset = mds_peer_reset, 595662306a36Sopenharmony_ci .get_authorizer = mds_get_authorizer, 595762306a36Sopenharmony_ci .add_authorizer_challenge = mds_add_authorizer_challenge, 595862306a36Sopenharmony_ci .verify_authorizer_reply = mds_verify_authorizer_reply, 595962306a36Sopenharmony_ci .invalidate_authorizer = mds_invalidate_authorizer, 596062306a36Sopenharmony_ci .sign_message = mds_sign_message, 596162306a36Sopenharmony_ci .check_message_signature = mds_check_message_signature, 596262306a36Sopenharmony_ci .get_auth_request = mds_get_auth_request, 596362306a36Sopenharmony_ci .handle_auth_reply_more = mds_handle_auth_reply_more, 596462306a36Sopenharmony_ci .handle_auth_done = mds_handle_auth_done, 596562306a36Sopenharmony_ci .handle_auth_bad_method = mds_handle_auth_bad_method, 596662306a36Sopenharmony_ci}; 596762306a36Sopenharmony_ci 596862306a36Sopenharmony_ci/* eof */ 5969