18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_debug.h> 38c2ecf20Sopenharmony_ci 48c2ecf20Sopenharmony_ci#include <linux/fs.h> 58c2ecf20Sopenharmony_ci#include <linux/wait.h> 68c2ecf20Sopenharmony_ci#include <linux/slab.h> 78c2ecf20Sopenharmony_ci#include <linux/gfp.h> 88c2ecf20Sopenharmony_ci#include <linux/sched.h> 98c2ecf20Sopenharmony_ci#include <linux/debugfs.h> 108c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 118c2ecf20Sopenharmony_ci#include <linux/ratelimit.h> 128c2ecf20Sopenharmony_ci#include <linux/bits.h> 138c2ecf20Sopenharmony_ci#include <linux/ktime.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include "super.h" 168c2ecf20Sopenharmony_ci#include "mds_client.h" 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_ci#include <linux/ceph/ceph_features.h> 198c2ecf20Sopenharmony_ci#include <linux/ceph/messenger.h> 208c2ecf20Sopenharmony_ci#include <linux/ceph/decode.h> 218c2ecf20Sopenharmony_ci#include <linux/ceph/pagelist.h> 228c2ecf20Sopenharmony_ci#include <linux/ceph/auth.h> 238c2ecf20Sopenharmony_ci#include <linux/ceph/debugfs.h> 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE) 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci/* 288c2ecf20Sopenharmony_ci * A cluster of MDS (metadata server) daemons is responsible for 298c2ecf20Sopenharmony_ci * managing the file system namespace (the directory hierarchy and 308c2ecf20Sopenharmony_ci * inodes) and for coordinating shared access to storage. Metadata is 318c2ecf20Sopenharmony_ci * partitioning hierarchically across a number of servers, and that 328c2ecf20Sopenharmony_ci * partition varies over time as the cluster adjusts the distribution 338c2ecf20Sopenharmony_ci * in order to balance load. 348c2ecf20Sopenharmony_ci * 358c2ecf20Sopenharmony_ci * The MDS client is primarily responsible to managing synchronous 368c2ecf20Sopenharmony_ci * metadata requests for operations like open, unlink, and so forth. 378c2ecf20Sopenharmony_ci * If there is a MDS failure, we find out about it when we (possibly 388c2ecf20Sopenharmony_ci * request and) receive a new MDS map, and can resubmit affected 398c2ecf20Sopenharmony_ci * requests. 408c2ecf20Sopenharmony_ci * 418c2ecf20Sopenharmony_ci * For the most part, though, we take advantage of a lossless 428c2ecf20Sopenharmony_ci * communications channel to the MDS, and do not need to worry about 438c2ecf20Sopenharmony_ci * timing out or resubmitting requests. 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci * We maintain a stateful "session" with each MDS we interact with. 468c2ecf20Sopenharmony_ci * Within each session, we sent periodic heartbeat messages to ensure 478c2ecf20Sopenharmony_ci * any capabilities or leases we have been issues remain valid. If 488c2ecf20Sopenharmony_ci * the session times out and goes stale, our leases and capabilities 498c2ecf20Sopenharmony_ci * are no longer valid. 508c2ecf20Sopenharmony_ci */ 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistruct ceph_reconnect_state { 538c2ecf20Sopenharmony_ci struct ceph_mds_session *session; 548c2ecf20Sopenharmony_ci int nr_caps, nr_realms; 558c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist; 568c2ecf20Sopenharmony_ci unsigned msg_version; 578c2ecf20Sopenharmony_ci bool allow_multi; 588c2ecf20Sopenharmony_ci}; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_cistatic void __wake_requests(struct ceph_mds_client *mdsc, 618c2ecf20Sopenharmony_ci struct list_head *head); 628c2ecf20Sopenharmony_cistatic void ceph_cap_release_work(struct work_struct *work); 638c2ecf20Sopenharmony_cistatic void ceph_cap_reclaim_work(struct work_struct *work); 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_cistatic const struct ceph_connection_operations mds_con_ops; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci/* 698c2ecf20Sopenharmony_ci * mds reply parsing 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_cistatic int parse_reply_info_quota(void **p, void *end, 738c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_in *info) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci u8 struct_v, struct_compat; 768c2ecf20Sopenharmony_ci u32 struct_len; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 798c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 808c2ecf20Sopenharmony_ci /* struct_v is expected to be >= 1. we only 818c2ecf20Sopenharmony_ci * understand encoding with struct_compat == 1. */ 828c2ecf20Sopenharmony_ci if (!struct_v || struct_compat != 1) 838c2ecf20Sopenharmony_ci goto bad; 848c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 858c2ecf20Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 868c2ecf20Sopenharmony_ci end = *p + struct_len; 878c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->max_bytes, bad); 888c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->max_files, bad); 898c2ecf20Sopenharmony_ci *p = end; 908c2ecf20Sopenharmony_ci return 0; 918c2ecf20Sopenharmony_cibad: 928c2ecf20Sopenharmony_ci return -EIO; 938c2ecf20Sopenharmony_ci} 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci/* 968c2ecf20Sopenharmony_ci * parse individual inode info 978c2ecf20Sopenharmony_ci */ 988c2ecf20Sopenharmony_cistatic int parse_reply_info_in(void **p, void *end, 998c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_in *info, 1008c2ecf20Sopenharmony_ci u64 features) 1018c2ecf20Sopenharmony_ci{ 1028c2ecf20Sopenharmony_ci int err = 0; 1038c2ecf20Sopenharmony_ci u8 struct_v = 0; 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci if (features == (u64)-1) { 1068c2ecf20Sopenharmony_ci u32 struct_len; 1078c2ecf20Sopenharmony_ci u8 struct_compat; 1088c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 1098c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 1108c2ecf20Sopenharmony_ci /* struct_v is expected to be >= 1. we only understand 1118c2ecf20Sopenharmony_ci * encoding with struct_compat == 1. */ 1128c2ecf20Sopenharmony_ci if (!struct_v || struct_compat != 1) 1138c2ecf20Sopenharmony_ci goto bad; 1148c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 1158c2ecf20Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 1168c2ecf20Sopenharmony_ci end = *p + struct_len; 1178c2ecf20Sopenharmony_ci } 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(struct ceph_mds_reply_inode), bad); 1208c2ecf20Sopenharmony_ci info->in = *p; 1218c2ecf20Sopenharmony_ci *p += sizeof(struct ceph_mds_reply_inode) + 1228c2ecf20Sopenharmony_ci sizeof(*info->in->fragtree.splits) * 1238c2ecf20Sopenharmony_ci le32_to_cpu(info->in->fragtree.nsplits); 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->symlink_len, bad); 1268c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->symlink_len, bad); 1278c2ecf20Sopenharmony_ci info->symlink = *p; 1288c2ecf20Sopenharmony_ci *p += info->symlink_len; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci ceph_decode_copy_safe(p, end, &info->dir_layout, 1318c2ecf20Sopenharmony_ci sizeof(info->dir_layout), bad); 1328c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->xattr_len, bad); 1338c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->xattr_len, bad); 1348c2ecf20Sopenharmony_ci info->xattr_data = *p; 1358c2ecf20Sopenharmony_ci *p += info->xattr_len; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci if (features == (u64)-1) { 1388c2ecf20Sopenharmony_ci /* inline data */ 1398c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->inline_version, bad); 1408c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->inline_len, bad); 1418c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->inline_len, bad); 1428c2ecf20Sopenharmony_ci info->inline_data = *p; 1438c2ecf20Sopenharmony_ci *p += info->inline_len; 1448c2ecf20Sopenharmony_ci /* quota */ 1458c2ecf20Sopenharmony_ci err = parse_reply_info_quota(p, end, info); 1468c2ecf20Sopenharmony_ci if (err < 0) 1478c2ecf20Sopenharmony_ci goto out_bad; 1488c2ecf20Sopenharmony_ci /* pool namespace */ 1498c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->pool_ns_len, bad); 1508c2ecf20Sopenharmony_ci if (info->pool_ns_len > 0) { 1518c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->pool_ns_len, bad); 1528c2ecf20Sopenharmony_ci info->pool_ns_data = *p; 1538c2ecf20Sopenharmony_ci *p += info->pool_ns_len; 1548c2ecf20Sopenharmony_ci } 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci /* btime */ 1578c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(info->btime), bad); 1588c2ecf20Sopenharmony_ci ceph_decode_copy(p, &info->btime, sizeof(info->btime)); 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci /* change attribute */ 1618c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->change_attr, bad); 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci /* dir pin */ 1648c2ecf20Sopenharmony_ci if (struct_v >= 2) { 1658c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->dir_pin, bad); 1668c2ecf20Sopenharmony_ci } else { 1678c2ecf20Sopenharmony_ci info->dir_pin = -ENODATA; 1688c2ecf20Sopenharmony_ci } 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* snapshot birth time, remains zero for v<=2 */ 1718c2ecf20Sopenharmony_ci if (struct_v >= 3) { 1728c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(info->snap_btime), bad); 1738c2ecf20Sopenharmony_ci ceph_decode_copy(p, &info->snap_btime, 1748c2ecf20Sopenharmony_ci sizeof(info->snap_btime)); 1758c2ecf20Sopenharmony_ci } else { 1768c2ecf20Sopenharmony_ci memset(&info->snap_btime, 0, sizeof(info->snap_btime)); 1778c2ecf20Sopenharmony_ci } 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci *p = end; 1808c2ecf20Sopenharmony_ci } else { 1818c2ecf20Sopenharmony_ci if (features & CEPH_FEATURE_MDS_INLINE_DATA) { 1828c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->inline_version, bad); 1838c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->inline_len, bad); 1848c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->inline_len, bad); 1858c2ecf20Sopenharmony_ci info->inline_data = *p; 1868c2ecf20Sopenharmony_ci *p += info->inline_len; 1878c2ecf20Sopenharmony_ci } else 1888c2ecf20Sopenharmony_ci info->inline_version = CEPH_INLINE_NONE; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci if (features & CEPH_FEATURE_MDS_QUOTA) { 1918c2ecf20Sopenharmony_ci err = parse_reply_info_quota(p, end, info); 1928c2ecf20Sopenharmony_ci if (err < 0) 1938c2ecf20Sopenharmony_ci goto out_bad; 1948c2ecf20Sopenharmony_ci } else { 1958c2ecf20Sopenharmony_ci info->max_bytes = 0; 1968c2ecf20Sopenharmony_ci info->max_files = 0; 1978c2ecf20Sopenharmony_ci } 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci info->pool_ns_len = 0; 2008c2ecf20Sopenharmony_ci info->pool_ns_data = NULL; 2018c2ecf20Sopenharmony_ci if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) { 2028c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->pool_ns_len, bad); 2038c2ecf20Sopenharmony_ci if (info->pool_ns_len > 0) { 2048c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->pool_ns_len, bad); 2058c2ecf20Sopenharmony_ci info->pool_ns_data = *p; 2068c2ecf20Sopenharmony_ci *p += info->pool_ns_len; 2078c2ecf20Sopenharmony_ci } 2088c2ecf20Sopenharmony_ci } 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci if (features & CEPH_FEATURE_FS_BTIME) { 2118c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(info->btime), bad); 2128c2ecf20Sopenharmony_ci ceph_decode_copy(p, &info->btime, sizeof(info->btime)); 2138c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->change_attr, bad); 2148c2ecf20Sopenharmony_ci } 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci info->dir_pin = -ENODATA; 2178c2ecf20Sopenharmony_ci /* info->snap_btime remains zero */ 2188c2ecf20Sopenharmony_ci } 2198c2ecf20Sopenharmony_ci return 0; 2208c2ecf20Sopenharmony_cibad: 2218c2ecf20Sopenharmony_ci err = -EIO; 2228c2ecf20Sopenharmony_ciout_bad: 2238c2ecf20Sopenharmony_ci return err; 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic int parse_reply_info_dir(void **p, void *end, 2278c2ecf20Sopenharmony_ci struct ceph_mds_reply_dirfrag **dirfrag, 2288c2ecf20Sopenharmony_ci u64 features) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci if (features == (u64)-1) { 2318c2ecf20Sopenharmony_ci u8 struct_v, struct_compat; 2328c2ecf20Sopenharmony_ci u32 struct_len; 2338c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 2348c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 2358c2ecf20Sopenharmony_ci /* struct_v is expected to be >= 1. we only understand 2368c2ecf20Sopenharmony_ci * encoding whose struct_compat == 1. */ 2378c2ecf20Sopenharmony_ci if (!struct_v || struct_compat != 1) 2388c2ecf20Sopenharmony_ci goto bad; 2398c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 2408c2ecf20Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 2418c2ecf20Sopenharmony_ci end = *p + struct_len; 2428c2ecf20Sopenharmony_ci } 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(**dirfrag), bad); 2458c2ecf20Sopenharmony_ci *dirfrag = *p; 2468c2ecf20Sopenharmony_ci *p += sizeof(**dirfrag) + sizeof(u32) * le32_to_cpu((*dirfrag)->ndist); 2478c2ecf20Sopenharmony_ci if (unlikely(*p > end)) 2488c2ecf20Sopenharmony_ci goto bad; 2498c2ecf20Sopenharmony_ci if (features == (u64)-1) 2508c2ecf20Sopenharmony_ci *p = end; 2518c2ecf20Sopenharmony_ci return 0; 2528c2ecf20Sopenharmony_cibad: 2538c2ecf20Sopenharmony_ci return -EIO; 2548c2ecf20Sopenharmony_ci} 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_cistatic int parse_reply_info_lease(void **p, void *end, 2578c2ecf20Sopenharmony_ci struct ceph_mds_reply_lease **lease, 2588c2ecf20Sopenharmony_ci u64 features) 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci if (features == (u64)-1) { 2618c2ecf20Sopenharmony_ci u8 struct_v, struct_compat; 2628c2ecf20Sopenharmony_ci u32 struct_len; 2638c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 2648c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 2658c2ecf20Sopenharmony_ci /* struct_v is expected to be >= 1. we only understand 2668c2ecf20Sopenharmony_ci * encoding whose struct_compat == 1. */ 2678c2ecf20Sopenharmony_ci if (!struct_v || struct_compat != 1) 2688c2ecf20Sopenharmony_ci goto bad; 2698c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, struct_len, bad); 2708c2ecf20Sopenharmony_ci ceph_decode_need(p, end, struct_len, bad); 2718c2ecf20Sopenharmony_ci end = *p + struct_len; 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(**lease), bad); 2758c2ecf20Sopenharmony_ci *lease = *p; 2768c2ecf20Sopenharmony_ci *p += sizeof(**lease); 2778c2ecf20Sopenharmony_ci if (features == (u64)-1) 2788c2ecf20Sopenharmony_ci *p = end; 2798c2ecf20Sopenharmony_ci return 0; 2808c2ecf20Sopenharmony_cibad: 2818c2ecf20Sopenharmony_ci return -EIO; 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci/* 2858c2ecf20Sopenharmony_ci * parse a normal reply, which may contain a (dir+)dentry and/or a 2868c2ecf20Sopenharmony_ci * target inode. 2878c2ecf20Sopenharmony_ci */ 2888c2ecf20Sopenharmony_cistatic int parse_reply_info_trace(void **p, void *end, 2898c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 2908c2ecf20Sopenharmony_ci u64 features) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci int err; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci if (info->head->is_dentry) { 2958c2ecf20Sopenharmony_ci err = parse_reply_info_in(p, end, &info->diri, features); 2968c2ecf20Sopenharmony_ci if (err < 0) 2978c2ecf20Sopenharmony_ci goto out_bad; 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci err = parse_reply_info_dir(p, end, &info->dirfrag, features); 3008c2ecf20Sopenharmony_ci if (err < 0) 3018c2ecf20Sopenharmony_ci goto out_bad; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, info->dname_len, bad); 3048c2ecf20Sopenharmony_ci ceph_decode_need(p, end, info->dname_len, bad); 3058c2ecf20Sopenharmony_ci info->dname = *p; 3068c2ecf20Sopenharmony_ci *p += info->dname_len; 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci err = parse_reply_info_lease(p, end, &info->dlease, features); 3098c2ecf20Sopenharmony_ci if (err < 0) 3108c2ecf20Sopenharmony_ci goto out_bad; 3118c2ecf20Sopenharmony_ci } 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci if (info->head->is_target) { 3148c2ecf20Sopenharmony_ci err = parse_reply_info_in(p, end, &info->targeti, features); 3158c2ecf20Sopenharmony_ci if (err < 0) 3168c2ecf20Sopenharmony_ci goto out_bad; 3178c2ecf20Sopenharmony_ci } 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_ci if (unlikely(*p != end)) 3208c2ecf20Sopenharmony_ci goto bad; 3218c2ecf20Sopenharmony_ci return 0; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_cibad: 3248c2ecf20Sopenharmony_ci err = -EIO; 3258c2ecf20Sopenharmony_ciout_bad: 3268c2ecf20Sopenharmony_ci pr_err("problem parsing mds trace %d\n", err); 3278c2ecf20Sopenharmony_ci return err; 3288c2ecf20Sopenharmony_ci} 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci/* 3318c2ecf20Sopenharmony_ci * parse readdir results 3328c2ecf20Sopenharmony_ci */ 3338c2ecf20Sopenharmony_cistatic int parse_reply_info_readdir(void **p, void *end, 3348c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 3358c2ecf20Sopenharmony_ci u64 features) 3368c2ecf20Sopenharmony_ci{ 3378c2ecf20Sopenharmony_ci u32 num, i = 0; 3388c2ecf20Sopenharmony_ci int err; 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci err = parse_reply_info_dir(p, end, &info->dir_dir, features); 3418c2ecf20Sopenharmony_ci if (err < 0) 3428c2ecf20Sopenharmony_ci goto out_bad; 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci ceph_decode_need(p, end, sizeof(num) + 2, bad); 3458c2ecf20Sopenharmony_ci num = ceph_decode_32(p); 3468c2ecf20Sopenharmony_ci { 3478c2ecf20Sopenharmony_ci u16 flags = ceph_decode_16(p); 3488c2ecf20Sopenharmony_ci info->dir_end = !!(flags & CEPH_READDIR_FRAG_END); 3498c2ecf20Sopenharmony_ci info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE); 3508c2ecf20Sopenharmony_ci info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER); 3518c2ecf20Sopenharmony_ci info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH); 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci if (num == 0) 3548c2ecf20Sopenharmony_ci goto done; 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci BUG_ON(!info->dir_entries); 3578c2ecf20Sopenharmony_ci if ((unsigned long)(info->dir_entries + num) > 3588c2ecf20Sopenharmony_ci (unsigned long)info->dir_entries + info->dir_buf_size) { 3598c2ecf20Sopenharmony_ci pr_err("dir contents are larger than expected\n"); 3608c2ecf20Sopenharmony_ci WARN_ON(1); 3618c2ecf20Sopenharmony_ci goto bad; 3628c2ecf20Sopenharmony_ci } 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci info->dir_nr = num; 3658c2ecf20Sopenharmony_ci while (num) { 3668c2ecf20Sopenharmony_ci struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i; 3678c2ecf20Sopenharmony_ci /* dentry */ 3688c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, rde->name_len, bad); 3698c2ecf20Sopenharmony_ci ceph_decode_need(p, end, rde->name_len, bad); 3708c2ecf20Sopenharmony_ci rde->name = *p; 3718c2ecf20Sopenharmony_ci *p += rde->name_len; 3728c2ecf20Sopenharmony_ci dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name); 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci /* dentry lease */ 3758c2ecf20Sopenharmony_ci err = parse_reply_info_lease(p, end, &rde->lease, features); 3768c2ecf20Sopenharmony_ci if (err) 3778c2ecf20Sopenharmony_ci goto out_bad; 3788c2ecf20Sopenharmony_ci /* inode */ 3798c2ecf20Sopenharmony_ci err = parse_reply_info_in(p, end, &rde->inode, features); 3808c2ecf20Sopenharmony_ci if (err < 0) 3818c2ecf20Sopenharmony_ci goto out_bad; 3828c2ecf20Sopenharmony_ci /* ceph_readdir_prepopulate() will update it */ 3838c2ecf20Sopenharmony_ci rde->offset = 0; 3848c2ecf20Sopenharmony_ci i++; 3858c2ecf20Sopenharmony_ci num--; 3868c2ecf20Sopenharmony_ci } 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_cidone: 3898c2ecf20Sopenharmony_ci /* Skip over any unrecognized fields */ 3908c2ecf20Sopenharmony_ci *p = end; 3918c2ecf20Sopenharmony_ci return 0; 3928c2ecf20Sopenharmony_ci 3938c2ecf20Sopenharmony_cibad: 3948c2ecf20Sopenharmony_ci err = -EIO; 3958c2ecf20Sopenharmony_ciout_bad: 3968c2ecf20Sopenharmony_ci pr_err("problem parsing dir contents %d\n", err); 3978c2ecf20Sopenharmony_ci return err; 3988c2ecf20Sopenharmony_ci} 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci/* 4018c2ecf20Sopenharmony_ci * parse fcntl F_GETLK results 4028c2ecf20Sopenharmony_ci */ 4038c2ecf20Sopenharmony_cistatic int parse_reply_info_filelock(void **p, void *end, 4048c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 4058c2ecf20Sopenharmony_ci u64 features) 4068c2ecf20Sopenharmony_ci{ 4078c2ecf20Sopenharmony_ci if (*p + sizeof(*info->filelock_reply) > end) 4088c2ecf20Sopenharmony_ci goto bad; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci info->filelock_reply = *p; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci /* Skip over any unrecognized fields */ 4138c2ecf20Sopenharmony_ci *p = end; 4148c2ecf20Sopenharmony_ci return 0; 4158c2ecf20Sopenharmony_cibad: 4168c2ecf20Sopenharmony_ci return -EIO; 4178c2ecf20Sopenharmony_ci} 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci 4208c2ecf20Sopenharmony_ci#if BITS_PER_LONG == 64 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci#define DELEGATED_INO_AVAILABLE xa_mk_value(1) 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_cistatic int ceph_parse_deleg_inos(void **p, void *end, 4258c2ecf20Sopenharmony_ci struct ceph_mds_session *s) 4268c2ecf20Sopenharmony_ci{ 4278c2ecf20Sopenharmony_ci u32 sets; 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, sets, bad); 4308c2ecf20Sopenharmony_ci dout("got %u sets of delegated inodes\n", sets); 4318c2ecf20Sopenharmony_ci while (sets--) { 4328c2ecf20Sopenharmony_ci u64 start, len, ino; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, start, bad); 4358c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, len, bad); 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci /* Don't accept a delegation of system inodes */ 4388c2ecf20Sopenharmony_ci if (start < CEPH_INO_SYSTEM_BASE) { 4398c2ecf20Sopenharmony_ci pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n", 4408c2ecf20Sopenharmony_ci start, len); 4418c2ecf20Sopenharmony_ci continue; 4428c2ecf20Sopenharmony_ci } 4438c2ecf20Sopenharmony_ci while (len--) { 4448c2ecf20Sopenharmony_ci int err = xa_insert(&s->s_delegated_inos, ino = start++, 4458c2ecf20Sopenharmony_ci DELEGATED_INO_AVAILABLE, 4468c2ecf20Sopenharmony_ci GFP_KERNEL); 4478c2ecf20Sopenharmony_ci if (!err) { 4488c2ecf20Sopenharmony_ci dout("added delegated inode 0x%llx\n", 4498c2ecf20Sopenharmony_ci start - 1); 4508c2ecf20Sopenharmony_ci } else if (err == -EBUSY) { 4518c2ecf20Sopenharmony_ci pr_warn("ceph: MDS delegated inode 0x%llx more than once.\n", 4528c2ecf20Sopenharmony_ci start - 1); 4538c2ecf20Sopenharmony_ci } else { 4548c2ecf20Sopenharmony_ci return err; 4558c2ecf20Sopenharmony_ci } 4568c2ecf20Sopenharmony_ci } 4578c2ecf20Sopenharmony_ci } 4588c2ecf20Sopenharmony_ci return 0; 4598c2ecf20Sopenharmony_cibad: 4608c2ecf20Sopenharmony_ci return -EIO; 4618c2ecf20Sopenharmony_ci} 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ciu64 ceph_get_deleg_ino(struct ceph_mds_session *s) 4648c2ecf20Sopenharmony_ci{ 4658c2ecf20Sopenharmony_ci unsigned long ino; 4668c2ecf20Sopenharmony_ci void *val; 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci xa_for_each(&s->s_delegated_inos, ino, val) { 4698c2ecf20Sopenharmony_ci val = xa_erase(&s->s_delegated_inos, ino); 4708c2ecf20Sopenharmony_ci if (val == DELEGATED_INO_AVAILABLE) 4718c2ecf20Sopenharmony_ci return ino; 4728c2ecf20Sopenharmony_ci } 4738c2ecf20Sopenharmony_ci return 0; 4748c2ecf20Sopenharmony_ci} 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ciint ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino) 4778c2ecf20Sopenharmony_ci{ 4788c2ecf20Sopenharmony_ci return xa_insert(&s->s_delegated_inos, ino, DELEGATED_INO_AVAILABLE, 4798c2ecf20Sopenharmony_ci GFP_KERNEL); 4808c2ecf20Sopenharmony_ci} 4818c2ecf20Sopenharmony_ci#else /* BITS_PER_LONG == 64 */ 4828c2ecf20Sopenharmony_ci/* 4838c2ecf20Sopenharmony_ci * FIXME: xarrays can't handle 64-bit indexes on a 32-bit arch. For now, just 4848c2ecf20Sopenharmony_ci * ignore delegated_inos on 32 bit arch. Maybe eventually add xarrays for top 4858c2ecf20Sopenharmony_ci * and bottom words? 4868c2ecf20Sopenharmony_ci */ 4878c2ecf20Sopenharmony_cistatic int ceph_parse_deleg_inos(void **p, void *end, 4888c2ecf20Sopenharmony_ci struct ceph_mds_session *s) 4898c2ecf20Sopenharmony_ci{ 4908c2ecf20Sopenharmony_ci u32 sets; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, sets, bad); 4938c2ecf20Sopenharmony_ci if (sets) 4948c2ecf20Sopenharmony_ci ceph_decode_skip_n(p, end, sets * 2 * sizeof(__le64), bad); 4958c2ecf20Sopenharmony_ci return 0; 4968c2ecf20Sopenharmony_cibad: 4978c2ecf20Sopenharmony_ci return -EIO; 4988c2ecf20Sopenharmony_ci} 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ciu64 ceph_get_deleg_ino(struct ceph_mds_session *s) 5018c2ecf20Sopenharmony_ci{ 5028c2ecf20Sopenharmony_ci return 0; 5038c2ecf20Sopenharmony_ci} 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ciint ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino) 5068c2ecf20Sopenharmony_ci{ 5078c2ecf20Sopenharmony_ci return 0; 5088c2ecf20Sopenharmony_ci} 5098c2ecf20Sopenharmony_ci#endif /* BITS_PER_LONG == 64 */ 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci/* 5128c2ecf20Sopenharmony_ci * parse create results 5138c2ecf20Sopenharmony_ci */ 5148c2ecf20Sopenharmony_cistatic int parse_reply_info_create(void **p, void *end, 5158c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 5168c2ecf20Sopenharmony_ci u64 features, struct ceph_mds_session *s) 5178c2ecf20Sopenharmony_ci{ 5188c2ecf20Sopenharmony_ci int ret; 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci if (features == (u64)-1 || 5218c2ecf20Sopenharmony_ci (features & CEPH_FEATURE_REPLY_CREATE_INODE)) { 5228c2ecf20Sopenharmony_ci if (*p == end) { 5238c2ecf20Sopenharmony_ci /* Malformed reply? */ 5248c2ecf20Sopenharmony_ci info->has_create_ino = false; 5258c2ecf20Sopenharmony_ci } else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) { 5268c2ecf20Sopenharmony_ci u8 struct_v, struct_compat; 5278c2ecf20Sopenharmony_ci u32 len; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci info->has_create_ino = true; 5308c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_v, bad); 5318c2ecf20Sopenharmony_ci ceph_decode_8_safe(p, end, struct_compat, bad); 5328c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 5338c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->ino, bad); 5348c2ecf20Sopenharmony_ci ret = ceph_parse_deleg_inos(p, end, s); 5358c2ecf20Sopenharmony_ci if (ret) 5368c2ecf20Sopenharmony_ci return ret; 5378c2ecf20Sopenharmony_ci } else { 5388c2ecf20Sopenharmony_ci /* legacy */ 5398c2ecf20Sopenharmony_ci ceph_decode_64_safe(p, end, info->ino, bad); 5408c2ecf20Sopenharmony_ci info->has_create_ino = true; 5418c2ecf20Sopenharmony_ci } 5428c2ecf20Sopenharmony_ci } else { 5438c2ecf20Sopenharmony_ci if (*p != end) 5448c2ecf20Sopenharmony_ci goto bad; 5458c2ecf20Sopenharmony_ci } 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci /* Skip over any unrecognized fields */ 5488c2ecf20Sopenharmony_ci *p = end; 5498c2ecf20Sopenharmony_ci return 0; 5508c2ecf20Sopenharmony_cibad: 5518c2ecf20Sopenharmony_ci return -EIO; 5528c2ecf20Sopenharmony_ci} 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci/* 5558c2ecf20Sopenharmony_ci * parse extra results 5568c2ecf20Sopenharmony_ci */ 5578c2ecf20Sopenharmony_cistatic int parse_reply_info_extra(void **p, void *end, 5588c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 5598c2ecf20Sopenharmony_ci u64 features, struct ceph_mds_session *s) 5608c2ecf20Sopenharmony_ci{ 5618c2ecf20Sopenharmony_ci u32 op = le32_to_cpu(info->head->op); 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci if (op == CEPH_MDS_OP_GETFILELOCK) 5648c2ecf20Sopenharmony_ci return parse_reply_info_filelock(p, end, info, features); 5658c2ecf20Sopenharmony_ci else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP) 5668c2ecf20Sopenharmony_ci return parse_reply_info_readdir(p, end, info, features); 5678c2ecf20Sopenharmony_ci else if (op == CEPH_MDS_OP_CREATE) 5688c2ecf20Sopenharmony_ci return parse_reply_info_create(p, end, info, features, s); 5698c2ecf20Sopenharmony_ci else 5708c2ecf20Sopenharmony_ci return -EIO; 5718c2ecf20Sopenharmony_ci} 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci/* 5748c2ecf20Sopenharmony_ci * parse entire mds reply 5758c2ecf20Sopenharmony_ci */ 5768c2ecf20Sopenharmony_cistatic int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg, 5778c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *info, 5788c2ecf20Sopenharmony_ci u64 features) 5798c2ecf20Sopenharmony_ci{ 5808c2ecf20Sopenharmony_ci void *p, *end; 5818c2ecf20Sopenharmony_ci u32 len; 5828c2ecf20Sopenharmony_ci int err; 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci info->head = msg->front.iov_base; 5858c2ecf20Sopenharmony_ci p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head); 5868c2ecf20Sopenharmony_ci end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head); 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci /* trace */ 5898c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 5908c2ecf20Sopenharmony_ci if (len > 0) { 5918c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, len, bad); 5928c2ecf20Sopenharmony_ci err = parse_reply_info_trace(&p, p+len, info, features); 5938c2ecf20Sopenharmony_ci if (err < 0) 5948c2ecf20Sopenharmony_ci goto out_bad; 5958c2ecf20Sopenharmony_ci } 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci /* extra */ 5988c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 5998c2ecf20Sopenharmony_ci if (len > 0) { 6008c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, len, bad); 6018c2ecf20Sopenharmony_ci err = parse_reply_info_extra(&p, p+len, info, features, s); 6028c2ecf20Sopenharmony_ci if (err < 0) 6038c2ecf20Sopenharmony_ci goto out_bad; 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci /* snap blob */ 6078c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 6088c2ecf20Sopenharmony_ci info->snapblob_len = len; 6098c2ecf20Sopenharmony_ci info->snapblob = p; 6108c2ecf20Sopenharmony_ci p += len; 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_ci if (p != end) 6138c2ecf20Sopenharmony_ci goto bad; 6148c2ecf20Sopenharmony_ci return 0; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_cibad: 6178c2ecf20Sopenharmony_ci err = -EIO; 6188c2ecf20Sopenharmony_ciout_bad: 6198c2ecf20Sopenharmony_ci pr_err("mds parse_reply err %d\n", err); 6208c2ecf20Sopenharmony_ci return err; 6218c2ecf20Sopenharmony_ci} 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_cistatic void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) 6248c2ecf20Sopenharmony_ci{ 6258c2ecf20Sopenharmony_ci if (!info->dir_entries) 6268c2ecf20Sopenharmony_ci return; 6278c2ecf20Sopenharmony_ci free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size)); 6288c2ecf20Sopenharmony_ci} 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci/* 6328c2ecf20Sopenharmony_ci * sessions 6338c2ecf20Sopenharmony_ci */ 6348c2ecf20Sopenharmony_ciconst char *ceph_session_state_name(int s) 6358c2ecf20Sopenharmony_ci{ 6368c2ecf20Sopenharmony_ci switch (s) { 6378c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_NEW: return "new"; 6388c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_OPENING: return "opening"; 6398c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_OPEN: return "open"; 6408c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_HUNG: return "hung"; 6418c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_CLOSING: return "closing"; 6428c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_CLOSED: return "closed"; 6438c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_RESTARTING: return "restarting"; 6448c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting"; 6458c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_REJECTED: return "rejected"; 6468c2ecf20Sopenharmony_ci default: return "???"; 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci} 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_cistruct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s) 6518c2ecf20Sopenharmony_ci{ 6528c2ecf20Sopenharmony_ci if (refcount_inc_not_zero(&s->s_ref)) { 6538c2ecf20Sopenharmony_ci dout("mdsc get_session %p %d -> %d\n", s, 6548c2ecf20Sopenharmony_ci refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref)); 6558c2ecf20Sopenharmony_ci return s; 6568c2ecf20Sopenharmony_ci } else { 6578c2ecf20Sopenharmony_ci dout("mdsc get_session %p 0 -- FAIL\n", s); 6588c2ecf20Sopenharmony_ci return NULL; 6598c2ecf20Sopenharmony_ci } 6608c2ecf20Sopenharmony_ci} 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_civoid ceph_put_mds_session(struct ceph_mds_session *s) 6638c2ecf20Sopenharmony_ci{ 6648c2ecf20Sopenharmony_ci if (IS_ERR_OR_NULL(s)) 6658c2ecf20Sopenharmony_ci return; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci dout("mdsc put_session %p %d -> %d\n", s, 6688c2ecf20Sopenharmony_ci refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1); 6698c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&s->s_ref)) { 6708c2ecf20Sopenharmony_ci if (s->s_auth.authorizer) 6718c2ecf20Sopenharmony_ci ceph_auth_destroy_authorizer(s->s_auth.authorizer); 6728c2ecf20Sopenharmony_ci WARN_ON(mutex_is_locked(&s->s_mutex)); 6738c2ecf20Sopenharmony_ci xa_destroy(&s->s_delegated_inos); 6748c2ecf20Sopenharmony_ci kfree(s); 6758c2ecf20Sopenharmony_ci } 6768c2ecf20Sopenharmony_ci} 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci/* 6798c2ecf20Sopenharmony_ci * called under mdsc->mutex 6808c2ecf20Sopenharmony_ci */ 6818c2ecf20Sopenharmony_cistruct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc, 6828c2ecf20Sopenharmony_ci int mds) 6838c2ecf20Sopenharmony_ci{ 6848c2ecf20Sopenharmony_ci if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) 6858c2ecf20Sopenharmony_ci return NULL; 6868c2ecf20Sopenharmony_ci return ceph_get_mds_session(mdsc->sessions[mds]); 6878c2ecf20Sopenharmony_ci} 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_cistatic bool __have_session(struct ceph_mds_client *mdsc, int mds) 6908c2ecf20Sopenharmony_ci{ 6918c2ecf20Sopenharmony_ci if (mds >= mdsc->max_sessions || !mdsc->sessions[mds]) 6928c2ecf20Sopenharmony_ci return false; 6938c2ecf20Sopenharmony_ci else 6948c2ecf20Sopenharmony_ci return true; 6958c2ecf20Sopenharmony_ci} 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_cistatic int __verify_registered_session(struct ceph_mds_client *mdsc, 6988c2ecf20Sopenharmony_ci struct ceph_mds_session *s) 6998c2ecf20Sopenharmony_ci{ 7008c2ecf20Sopenharmony_ci if (s->s_mds >= mdsc->max_sessions || 7018c2ecf20Sopenharmony_ci mdsc->sessions[s->s_mds] != s) 7028c2ecf20Sopenharmony_ci return -ENOENT; 7038c2ecf20Sopenharmony_ci return 0; 7048c2ecf20Sopenharmony_ci} 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci/* 7078c2ecf20Sopenharmony_ci * create+register a new session for given mds. 7088c2ecf20Sopenharmony_ci * called under mdsc->mutex. 7098c2ecf20Sopenharmony_ci */ 7108c2ecf20Sopenharmony_cistatic struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, 7118c2ecf20Sopenharmony_ci int mds) 7128c2ecf20Sopenharmony_ci{ 7138c2ecf20Sopenharmony_ci struct ceph_mds_session *s; 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci if (mds >= mdsc->mdsmap->possible_max_rank) 7168c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci s = kzalloc(sizeof(*s), GFP_NOFS); 7198c2ecf20Sopenharmony_ci if (!s) 7208c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_ci if (mds >= mdsc->max_sessions) { 7238c2ecf20Sopenharmony_ci int newmax = 1 << get_count_order(mds + 1); 7248c2ecf20Sopenharmony_ci struct ceph_mds_session **sa; 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci dout("%s: realloc to %d\n", __func__, newmax); 7278c2ecf20Sopenharmony_ci sa = kcalloc(newmax, sizeof(void *), GFP_NOFS); 7288c2ecf20Sopenharmony_ci if (!sa) 7298c2ecf20Sopenharmony_ci goto fail_realloc; 7308c2ecf20Sopenharmony_ci if (mdsc->sessions) { 7318c2ecf20Sopenharmony_ci memcpy(sa, mdsc->sessions, 7328c2ecf20Sopenharmony_ci mdsc->max_sessions * sizeof(void *)); 7338c2ecf20Sopenharmony_ci kfree(mdsc->sessions); 7348c2ecf20Sopenharmony_ci } 7358c2ecf20Sopenharmony_ci mdsc->sessions = sa; 7368c2ecf20Sopenharmony_ci mdsc->max_sessions = newmax; 7378c2ecf20Sopenharmony_ci } 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci dout("%s: mds%d\n", __func__, mds); 7408c2ecf20Sopenharmony_ci s->s_mdsc = mdsc; 7418c2ecf20Sopenharmony_ci s->s_mds = mds; 7428c2ecf20Sopenharmony_ci s->s_state = CEPH_MDS_SESSION_NEW; 7438c2ecf20Sopenharmony_ci s->s_ttl = 0; 7448c2ecf20Sopenharmony_ci s->s_seq = 0; 7458c2ecf20Sopenharmony_ci mutex_init(&s->s_mutex); 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_ci ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr); 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci spin_lock_init(&s->s_gen_ttl_lock); 7508c2ecf20Sopenharmony_ci s->s_cap_gen = 1; 7518c2ecf20Sopenharmony_ci s->s_cap_ttl = jiffies - 1; 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci spin_lock_init(&s->s_cap_lock); 7548c2ecf20Sopenharmony_ci s->s_renew_requested = 0; 7558c2ecf20Sopenharmony_ci s->s_renew_seq = 0; 7568c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->s_caps); 7578c2ecf20Sopenharmony_ci s->s_nr_caps = 0; 7588c2ecf20Sopenharmony_ci refcount_set(&s->s_ref, 1); 7598c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->s_waiting); 7608c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->s_unsafe); 7618c2ecf20Sopenharmony_ci xa_init(&s->s_delegated_inos); 7628c2ecf20Sopenharmony_ci s->s_num_cap_releases = 0; 7638c2ecf20Sopenharmony_ci s->s_cap_reconnect = 0; 7648c2ecf20Sopenharmony_ci s->s_cap_iterator = NULL; 7658c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->s_cap_releases); 7668c2ecf20Sopenharmony_ci INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work); 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->s_cap_dirty); 7698c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&s->s_cap_flushing); 7708c2ecf20Sopenharmony_ci 7718c2ecf20Sopenharmony_ci mdsc->sessions[mds] = s; 7728c2ecf20Sopenharmony_ci atomic_inc(&mdsc->num_sessions); 7738c2ecf20Sopenharmony_ci refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */ 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds, 7768c2ecf20Sopenharmony_ci ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci return s; 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_cifail_realloc: 7818c2ecf20Sopenharmony_ci kfree(s); 7828c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 7838c2ecf20Sopenharmony_ci} 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci/* 7868c2ecf20Sopenharmony_ci * called under mdsc->mutex 7878c2ecf20Sopenharmony_ci */ 7888c2ecf20Sopenharmony_cistatic void __unregister_session(struct ceph_mds_client *mdsc, 7898c2ecf20Sopenharmony_ci struct ceph_mds_session *s) 7908c2ecf20Sopenharmony_ci{ 7918c2ecf20Sopenharmony_ci dout("__unregister_session mds%d %p\n", s->s_mds, s); 7928c2ecf20Sopenharmony_ci BUG_ON(mdsc->sessions[s->s_mds] != s); 7938c2ecf20Sopenharmony_ci mdsc->sessions[s->s_mds] = NULL; 7948c2ecf20Sopenharmony_ci ceph_con_close(&s->s_con); 7958c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 7968c2ecf20Sopenharmony_ci atomic_dec(&mdsc->num_sessions); 7978c2ecf20Sopenharmony_ci} 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci/* 8008c2ecf20Sopenharmony_ci * drop session refs in request. 8018c2ecf20Sopenharmony_ci * 8028c2ecf20Sopenharmony_ci * should be last request ref, or hold mdsc->mutex 8038c2ecf20Sopenharmony_ci */ 8048c2ecf20Sopenharmony_cistatic void put_request_session(struct ceph_mds_request *req) 8058c2ecf20Sopenharmony_ci{ 8068c2ecf20Sopenharmony_ci if (req->r_session) { 8078c2ecf20Sopenharmony_ci ceph_put_mds_session(req->r_session); 8088c2ecf20Sopenharmony_ci req->r_session = NULL; 8098c2ecf20Sopenharmony_ci } 8108c2ecf20Sopenharmony_ci} 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_civoid ceph_mdsc_iterate_sessions(struct ceph_mds_client *mdsc, 8138c2ecf20Sopenharmony_ci void (*cb)(struct ceph_mds_session *), 8148c2ecf20Sopenharmony_ci bool check_state) 8158c2ecf20Sopenharmony_ci{ 8168c2ecf20Sopenharmony_ci int mds; 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 8198c2ecf20Sopenharmony_ci for (mds = 0; mds < mdsc->max_sessions; ++mds) { 8208c2ecf20Sopenharmony_ci struct ceph_mds_session *s; 8218c2ecf20Sopenharmony_ci 8228c2ecf20Sopenharmony_ci s = __ceph_lookup_mds_session(mdsc, mds); 8238c2ecf20Sopenharmony_ci if (!s) 8248c2ecf20Sopenharmony_ci continue; 8258c2ecf20Sopenharmony_ci 8268c2ecf20Sopenharmony_ci if (check_state && !check_session_state(s)) { 8278c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 8288c2ecf20Sopenharmony_ci continue; 8298c2ecf20Sopenharmony_ci } 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 8328c2ecf20Sopenharmony_ci cb(s); 8338c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 8348c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 8358c2ecf20Sopenharmony_ci } 8368c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 8378c2ecf20Sopenharmony_ci} 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_civoid ceph_mdsc_release_request(struct kref *kref) 8408c2ecf20Sopenharmony_ci{ 8418c2ecf20Sopenharmony_ci struct ceph_mds_request *req = container_of(kref, 8428c2ecf20Sopenharmony_ci struct ceph_mds_request, 8438c2ecf20Sopenharmony_ci r_kref); 8448c2ecf20Sopenharmony_ci ceph_mdsc_release_dir_caps_no_check(req); 8458c2ecf20Sopenharmony_ci destroy_reply_info(&req->r_reply_info); 8468c2ecf20Sopenharmony_ci if (req->r_request) 8478c2ecf20Sopenharmony_ci ceph_msg_put(req->r_request); 8488c2ecf20Sopenharmony_ci if (req->r_reply) 8498c2ecf20Sopenharmony_ci ceph_msg_put(req->r_reply); 8508c2ecf20Sopenharmony_ci if (req->r_inode) { 8518c2ecf20Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 8528c2ecf20Sopenharmony_ci /* avoid calling iput_final() in mds dispatch threads */ 8538c2ecf20Sopenharmony_ci ceph_async_iput(req->r_inode); 8548c2ecf20Sopenharmony_ci } 8558c2ecf20Sopenharmony_ci if (req->r_parent) { 8568c2ecf20Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); 8578c2ecf20Sopenharmony_ci ceph_async_iput(req->r_parent); 8588c2ecf20Sopenharmony_ci } 8598c2ecf20Sopenharmony_ci ceph_async_iput(req->r_target_inode); 8608c2ecf20Sopenharmony_ci if (req->r_dentry) 8618c2ecf20Sopenharmony_ci dput(req->r_dentry); 8628c2ecf20Sopenharmony_ci if (req->r_old_dentry) 8638c2ecf20Sopenharmony_ci dput(req->r_old_dentry); 8648c2ecf20Sopenharmony_ci if (req->r_old_dentry_dir) { 8658c2ecf20Sopenharmony_ci /* 8668c2ecf20Sopenharmony_ci * track (and drop pins for) r_old_dentry_dir 8678c2ecf20Sopenharmony_ci * separately, since r_old_dentry's d_parent may have 8688c2ecf20Sopenharmony_ci * changed between the dir mutex being dropped and 8698c2ecf20Sopenharmony_ci * this request being freed. 8708c2ecf20Sopenharmony_ci */ 8718c2ecf20Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), 8728c2ecf20Sopenharmony_ci CEPH_CAP_PIN); 8738c2ecf20Sopenharmony_ci ceph_async_iput(req->r_old_dentry_dir); 8748c2ecf20Sopenharmony_ci } 8758c2ecf20Sopenharmony_ci kfree(req->r_path1); 8768c2ecf20Sopenharmony_ci kfree(req->r_path2); 8778c2ecf20Sopenharmony_ci if (req->r_pagelist) 8788c2ecf20Sopenharmony_ci ceph_pagelist_release(req->r_pagelist); 8798c2ecf20Sopenharmony_ci put_request_session(req); 8808c2ecf20Sopenharmony_ci ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation); 8818c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty(&req->r_wait)); 8828c2ecf20Sopenharmony_ci kmem_cache_free(ceph_mds_request_cachep, req); 8838c2ecf20Sopenharmony_ci} 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ciDEFINE_RB_FUNCS(request, struct ceph_mds_request, r_tid, r_node) 8868c2ecf20Sopenharmony_ci 8878c2ecf20Sopenharmony_ci/* 8888c2ecf20Sopenharmony_ci * lookup session, bump ref if found. 8898c2ecf20Sopenharmony_ci * 8908c2ecf20Sopenharmony_ci * called under mdsc->mutex. 8918c2ecf20Sopenharmony_ci */ 8928c2ecf20Sopenharmony_cistatic struct ceph_mds_request * 8938c2ecf20Sopenharmony_cilookup_get_request(struct ceph_mds_client *mdsc, u64 tid) 8948c2ecf20Sopenharmony_ci{ 8958c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci req = lookup_request(&mdsc->request_tree, tid); 8988c2ecf20Sopenharmony_ci if (req) 8998c2ecf20Sopenharmony_ci ceph_mdsc_get_request(req); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci return req; 9028c2ecf20Sopenharmony_ci} 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci/* 9058c2ecf20Sopenharmony_ci * Register an in-flight request, and assign a tid. Link to directory 9068c2ecf20Sopenharmony_ci * are modifying (if any). 9078c2ecf20Sopenharmony_ci * 9088c2ecf20Sopenharmony_ci * Called under mdsc->mutex. 9098c2ecf20Sopenharmony_ci */ 9108c2ecf20Sopenharmony_cistatic void __register_request(struct ceph_mds_client *mdsc, 9118c2ecf20Sopenharmony_ci struct ceph_mds_request *req, 9128c2ecf20Sopenharmony_ci struct inode *dir) 9138c2ecf20Sopenharmony_ci{ 9148c2ecf20Sopenharmony_ci int ret = 0; 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci req->r_tid = ++mdsc->last_tid; 9178c2ecf20Sopenharmony_ci if (req->r_num_caps) { 9188c2ecf20Sopenharmony_ci ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation, 9198c2ecf20Sopenharmony_ci req->r_num_caps); 9208c2ecf20Sopenharmony_ci if (ret < 0) { 9218c2ecf20Sopenharmony_ci pr_err("__register_request %p " 9228c2ecf20Sopenharmony_ci "failed to reserve caps: %d\n", req, ret); 9238c2ecf20Sopenharmony_ci /* set req->r_err to fail early from __do_request */ 9248c2ecf20Sopenharmony_ci req->r_err = ret; 9258c2ecf20Sopenharmony_ci return; 9268c2ecf20Sopenharmony_ci } 9278c2ecf20Sopenharmony_ci } 9288c2ecf20Sopenharmony_ci dout("__register_request %p tid %lld\n", req, req->r_tid); 9298c2ecf20Sopenharmony_ci ceph_mdsc_get_request(req); 9308c2ecf20Sopenharmony_ci insert_request(&mdsc->request_tree, req); 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci req->r_uid = current_fsuid(); 9338c2ecf20Sopenharmony_ci req->r_gid = current_fsgid(); 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) 9368c2ecf20Sopenharmony_ci mdsc->oldest_tid = req->r_tid; 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci if (dir) { 9398c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(dir); 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci ihold(dir); 9428c2ecf20Sopenharmony_ci req->r_unsafe_dir = dir; 9438c2ecf20Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 9448c2ecf20Sopenharmony_ci list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); 9458c2ecf20Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 9468c2ecf20Sopenharmony_ci } 9478c2ecf20Sopenharmony_ci} 9488c2ecf20Sopenharmony_ci 9498c2ecf20Sopenharmony_cistatic void __unregister_request(struct ceph_mds_client *mdsc, 9508c2ecf20Sopenharmony_ci struct ceph_mds_request *req) 9518c2ecf20Sopenharmony_ci{ 9528c2ecf20Sopenharmony_ci dout("__unregister_request %p tid %lld\n", req, req->r_tid); 9538c2ecf20Sopenharmony_ci 9548c2ecf20Sopenharmony_ci /* Never leave an unregistered request on an unsafe list! */ 9558c2ecf20Sopenharmony_ci list_del_init(&req->r_unsafe_item); 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci if (req->r_tid == mdsc->oldest_tid) { 9588c2ecf20Sopenharmony_ci struct rb_node *p = rb_next(&req->r_node); 9598c2ecf20Sopenharmony_ci mdsc->oldest_tid = 0; 9608c2ecf20Sopenharmony_ci while (p) { 9618c2ecf20Sopenharmony_ci struct ceph_mds_request *next_req = 9628c2ecf20Sopenharmony_ci rb_entry(p, struct ceph_mds_request, r_node); 9638c2ecf20Sopenharmony_ci if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) { 9648c2ecf20Sopenharmony_ci mdsc->oldest_tid = next_req->r_tid; 9658c2ecf20Sopenharmony_ci break; 9668c2ecf20Sopenharmony_ci } 9678c2ecf20Sopenharmony_ci p = rb_next(p); 9688c2ecf20Sopenharmony_ci } 9698c2ecf20Sopenharmony_ci } 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_ci erase_request(&mdsc->request_tree, req); 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci if (req->r_unsafe_dir) { 9748c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir); 9758c2ecf20Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 9768c2ecf20Sopenharmony_ci list_del_init(&req->r_unsafe_dir_item); 9778c2ecf20Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 9788c2ecf20Sopenharmony_ci } 9798c2ecf20Sopenharmony_ci if (req->r_target_inode && 9808c2ecf20Sopenharmony_ci test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 9818c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(req->r_target_inode); 9828c2ecf20Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 9838c2ecf20Sopenharmony_ci list_del_init(&req->r_unsafe_target_item); 9848c2ecf20Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 9858c2ecf20Sopenharmony_ci } 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci if (req->r_unsafe_dir) { 9888c2ecf20Sopenharmony_ci /* avoid calling iput_final() in mds dispatch threads */ 9898c2ecf20Sopenharmony_ci ceph_async_iput(req->r_unsafe_dir); 9908c2ecf20Sopenharmony_ci req->r_unsafe_dir = NULL; 9918c2ecf20Sopenharmony_ci } 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci complete_all(&req->r_safe_completion); 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_ci ceph_mdsc_put_request(req); 9968c2ecf20Sopenharmony_ci} 9978c2ecf20Sopenharmony_ci 9988c2ecf20Sopenharmony_ci/* 9998c2ecf20Sopenharmony_ci * Walk back up the dentry tree until we hit a dentry representing a 10008c2ecf20Sopenharmony_ci * non-snapshot inode. We do this using the rcu_read_lock (which must be held 10018c2ecf20Sopenharmony_ci * when calling this) to ensure that the objects won't disappear while we're 10028c2ecf20Sopenharmony_ci * working with them. Once we hit a candidate dentry, we attempt to take a 10038c2ecf20Sopenharmony_ci * reference to it, and return that as the result. 10048c2ecf20Sopenharmony_ci */ 10058c2ecf20Sopenharmony_cistatic struct inode *get_nonsnap_parent(struct dentry *dentry) 10068c2ecf20Sopenharmony_ci{ 10078c2ecf20Sopenharmony_ci struct inode *inode = NULL; 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_ci while (dentry && !IS_ROOT(dentry)) { 10108c2ecf20Sopenharmony_ci inode = d_inode_rcu(dentry); 10118c2ecf20Sopenharmony_ci if (!inode || ceph_snap(inode) == CEPH_NOSNAP) 10128c2ecf20Sopenharmony_ci break; 10138c2ecf20Sopenharmony_ci dentry = dentry->d_parent; 10148c2ecf20Sopenharmony_ci } 10158c2ecf20Sopenharmony_ci if (inode) 10168c2ecf20Sopenharmony_ci inode = igrab(inode); 10178c2ecf20Sopenharmony_ci return inode; 10188c2ecf20Sopenharmony_ci} 10198c2ecf20Sopenharmony_ci 10208c2ecf20Sopenharmony_ci/* 10218c2ecf20Sopenharmony_ci * Choose mds to send request to next. If there is a hint set in the 10228c2ecf20Sopenharmony_ci * request (e.g., due to a prior forward hint from the mds), use that. 10238c2ecf20Sopenharmony_ci * Otherwise, consult frag tree and/or caps to identify the 10248c2ecf20Sopenharmony_ci * appropriate mds. If all else fails, choose randomly. 10258c2ecf20Sopenharmony_ci * 10268c2ecf20Sopenharmony_ci * Called under mdsc->mutex. 10278c2ecf20Sopenharmony_ci */ 10288c2ecf20Sopenharmony_cistatic int __choose_mds(struct ceph_mds_client *mdsc, 10298c2ecf20Sopenharmony_ci struct ceph_mds_request *req, 10308c2ecf20Sopenharmony_ci bool *random) 10318c2ecf20Sopenharmony_ci{ 10328c2ecf20Sopenharmony_ci struct inode *inode; 10338c2ecf20Sopenharmony_ci struct ceph_inode_info *ci; 10348c2ecf20Sopenharmony_ci struct ceph_cap *cap; 10358c2ecf20Sopenharmony_ci int mode = req->r_direct_mode; 10368c2ecf20Sopenharmony_ci int mds = -1; 10378c2ecf20Sopenharmony_ci u32 hash = req->r_direct_hash; 10388c2ecf20Sopenharmony_ci bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags); 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci if (random) 10418c2ecf20Sopenharmony_ci *random = false; 10428c2ecf20Sopenharmony_ci 10438c2ecf20Sopenharmony_ci /* 10448c2ecf20Sopenharmony_ci * is there a specific mds we should try? ignore hint if we have 10458c2ecf20Sopenharmony_ci * no session and the mds is not up (active or recovering). 10468c2ecf20Sopenharmony_ci */ 10478c2ecf20Sopenharmony_ci if (req->r_resend_mds >= 0 && 10488c2ecf20Sopenharmony_ci (__have_session(mdsc, req->r_resend_mds) || 10498c2ecf20Sopenharmony_ci ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) { 10508c2ecf20Sopenharmony_ci dout("%s using resend_mds mds%d\n", __func__, 10518c2ecf20Sopenharmony_ci req->r_resend_mds); 10528c2ecf20Sopenharmony_ci return req->r_resend_mds; 10538c2ecf20Sopenharmony_ci } 10548c2ecf20Sopenharmony_ci 10558c2ecf20Sopenharmony_ci if (mode == USE_RANDOM_MDS) 10568c2ecf20Sopenharmony_ci goto random; 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci inode = NULL; 10598c2ecf20Sopenharmony_ci if (req->r_inode) { 10608c2ecf20Sopenharmony_ci if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) { 10618c2ecf20Sopenharmony_ci inode = req->r_inode; 10628c2ecf20Sopenharmony_ci ihold(inode); 10638c2ecf20Sopenharmony_ci } else { 10648c2ecf20Sopenharmony_ci /* req->r_dentry is non-null for LSSNAP request */ 10658c2ecf20Sopenharmony_ci rcu_read_lock(); 10668c2ecf20Sopenharmony_ci inode = get_nonsnap_parent(req->r_dentry); 10678c2ecf20Sopenharmony_ci rcu_read_unlock(); 10688c2ecf20Sopenharmony_ci dout("%s using snapdir's parent %p\n", __func__, inode); 10698c2ecf20Sopenharmony_ci } 10708c2ecf20Sopenharmony_ci } else if (req->r_dentry) { 10718c2ecf20Sopenharmony_ci /* ignore race with rename; old or new d_parent is okay */ 10728c2ecf20Sopenharmony_ci struct dentry *parent; 10738c2ecf20Sopenharmony_ci struct inode *dir; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci rcu_read_lock(); 10768c2ecf20Sopenharmony_ci parent = READ_ONCE(req->r_dentry->d_parent); 10778c2ecf20Sopenharmony_ci dir = req->r_parent ? : d_inode_rcu(parent); 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci if (!dir || dir->i_sb != mdsc->fsc->sb) { 10808c2ecf20Sopenharmony_ci /* not this fs or parent went negative */ 10818c2ecf20Sopenharmony_ci inode = d_inode(req->r_dentry); 10828c2ecf20Sopenharmony_ci if (inode) 10838c2ecf20Sopenharmony_ci ihold(inode); 10848c2ecf20Sopenharmony_ci } else if (ceph_snap(dir) != CEPH_NOSNAP) { 10858c2ecf20Sopenharmony_ci /* direct snapped/virtual snapdir requests 10868c2ecf20Sopenharmony_ci * based on parent dir inode */ 10878c2ecf20Sopenharmony_ci inode = get_nonsnap_parent(parent); 10888c2ecf20Sopenharmony_ci dout("%s using nonsnap parent %p\n", __func__, inode); 10898c2ecf20Sopenharmony_ci } else { 10908c2ecf20Sopenharmony_ci /* dentry target */ 10918c2ecf20Sopenharmony_ci inode = d_inode(req->r_dentry); 10928c2ecf20Sopenharmony_ci if (!inode || mode == USE_AUTH_MDS) { 10938c2ecf20Sopenharmony_ci /* dir + name */ 10948c2ecf20Sopenharmony_ci inode = igrab(dir); 10958c2ecf20Sopenharmony_ci hash = ceph_dentry_hash(dir, req->r_dentry); 10968c2ecf20Sopenharmony_ci is_hash = true; 10978c2ecf20Sopenharmony_ci } else { 10988c2ecf20Sopenharmony_ci ihold(inode); 10998c2ecf20Sopenharmony_ci } 11008c2ecf20Sopenharmony_ci } 11018c2ecf20Sopenharmony_ci rcu_read_unlock(); 11028c2ecf20Sopenharmony_ci } 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash, 11058c2ecf20Sopenharmony_ci hash, mode); 11068c2ecf20Sopenharmony_ci if (!inode) 11078c2ecf20Sopenharmony_ci goto random; 11088c2ecf20Sopenharmony_ci ci = ceph_inode(inode); 11098c2ecf20Sopenharmony_ci 11108c2ecf20Sopenharmony_ci if (is_hash && S_ISDIR(inode->i_mode)) { 11118c2ecf20Sopenharmony_ci struct ceph_inode_frag frag; 11128c2ecf20Sopenharmony_ci int found; 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_ci ceph_choose_frag(ci, hash, &frag, &found); 11158c2ecf20Sopenharmony_ci if (found) { 11168c2ecf20Sopenharmony_ci if (mode == USE_ANY_MDS && frag.ndist > 0) { 11178c2ecf20Sopenharmony_ci u8 r; 11188c2ecf20Sopenharmony_ci 11198c2ecf20Sopenharmony_ci /* choose a random replica */ 11208c2ecf20Sopenharmony_ci get_random_bytes(&r, 1); 11218c2ecf20Sopenharmony_ci r %= frag.ndist; 11228c2ecf20Sopenharmony_ci mds = frag.dist[r]; 11238c2ecf20Sopenharmony_ci dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n", 11248c2ecf20Sopenharmony_ci __func__, inode, ceph_vinop(inode), 11258c2ecf20Sopenharmony_ci frag.frag, mds, (int)r, frag.ndist); 11268c2ecf20Sopenharmony_ci if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 11278c2ecf20Sopenharmony_ci CEPH_MDS_STATE_ACTIVE && 11288c2ecf20Sopenharmony_ci !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) 11298c2ecf20Sopenharmony_ci goto out; 11308c2ecf20Sopenharmony_ci } 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci /* since this file/dir wasn't known to be 11338c2ecf20Sopenharmony_ci * replicated, then we want to look for the 11348c2ecf20Sopenharmony_ci * authoritative mds. */ 11358c2ecf20Sopenharmony_ci if (frag.mds >= 0) { 11368c2ecf20Sopenharmony_ci /* choose auth mds */ 11378c2ecf20Sopenharmony_ci mds = frag.mds; 11388c2ecf20Sopenharmony_ci dout("%s %p %llx.%llx frag %u mds%d (auth)\n", 11398c2ecf20Sopenharmony_ci __func__, inode, ceph_vinop(inode), 11408c2ecf20Sopenharmony_ci frag.frag, mds); 11418c2ecf20Sopenharmony_ci if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= 11428c2ecf20Sopenharmony_ci CEPH_MDS_STATE_ACTIVE) { 11438c2ecf20Sopenharmony_ci if (!ceph_mdsmap_is_laggy(mdsc->mdsmap, 11448c2ecf20Sopenharmony_ci mds)) 11458c2ecf20Sopenharmony_ci goto out; 11468c2ecf20Sopenharmony_ci } 11478c2ecf20Sopenharmony_ci } 11488c2ecf20Sopenharmony_ci mode = USE_AUTH_MDS; 11498c2ecf20Sopenharmony_ci } 11508c2ecf20Sopenharmony_ci } 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 11538c2ecf20Sopenharmony_ci cap = NULL; 11548c2ecf20Sopenharmony_ci if (mode == USE_AUTH_MDS) 11558c2ecf20Sopenharmony_ci cap = ci->i_auth_cap; 11568c2ecf20Sopenharmony_ci if (!cap && !RB_EMPTY_ROOT(&ci->i_caps)) 11578c2ecf20Sopenharmony_ci cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node); 11588c2ecf20Sopenharmony_ci if (!cap) { 11598c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 11608c2ecf20Sopenharmony_ci ceph_async_iput(inode); 11618c2ecf20Sopenharmony_ci goto random; 11628c2ecf20Sopenharmony_ci } 11638c2ecf20Sopenharmony_ci mds = cap->session->s_mds; 11648c2ecf20Sopenharmony_ci dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__, 11658c2ecf20Sopenharmony_ci inode, ceph_vinop(inode), mds, 11668c2ecf20Sopenharmony_ci cap == ci->i_auth_cap ? "auth " : "", cap); 11678c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 11688c2ecf20Sopenharmony_ciout: 11698c2ecf20Sopenharmony_ci /* avoid calling iput_final() while holding mdsc->mutex or 11708c2ecf20Sopenharmony_ci * in mds dispatch threads */ 11718c2ecf20Sopenharmony_ci ceph_async_iput(inode); 11728c2ecf20Sopenharmony_ci return mds; 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_cirandom: 11758c2ecf20Sopenharmony_ci if (random) 11768c2ecf20Sopenharmony_ci *random = true; 11778c2ecf20Sopenharmony_ci 11788c2ecf20Sopenharmony_ci mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap); 11798c2ecf20Sopenharmony_ci dout("%s chose random mds%d\n", __func__, mds); 11808c2ecf20Sopenharmony_ci return mds; 11818c2ecf20Sopenharmony_ci} 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci/* 11858c2ecf20Sopenharmony_ci * session messages 11868c2ecf20Sopenharmony_ci */ 11878c2ecf20Sopenharmony_cistruct ceph_msg *ceph_create_session_msg(u32 op, u64 seq) 11888c2ecf20Sopenharmony_ci{ 11898c2ecf20Sopenharmony_ci struct ceph_msg *msg; 11908c2ecf20Sopenharmony_ci struct ceph_mds_session_head *h; 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS, 11938c2ecf20Sopenharmony_ci false); 11948c2ecf20Sopenharmony_ci if (!msg) { 11958c2ecf20Sopenharmony_ci pr_err("ENOMEM creating session %s msg\n", 11968c2ecf20Sopenharmony_ci ceph_session_op_name(op)); 11978c2ecf20Sopenharmony_ci return NULL; 11988c2ecf20Sopenharmony_ci } 11998c2ecf20Sopenharmony_ci h = msg->front.iov_base; 12008c2ecf20Sopenharmony_ci h->op = cpu_to_le32(op); 12018c2ecf20Sopenharmony_ci h->seq = cpu_to_le64(seq); 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci return msg; 12048c2ecf20Sopenharmony_ci} 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_cistatic const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED; 12078c2ecf20Sopenharmony_ci#define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8) 12088c2ecf20Sopenharmony_cistatic int encode_supported_features(void **p, void *end) 12098c2ecf20Sopenharmony_ci{ 12108c2ecf20Sopenharmony_ci static const size_t count = ARRAY_SIZE(feature_bits); 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_ci if (count > 0) { 12138c2ecf20Sopenharmony_ci size_t i; 12148c2ecf20Sopenharmony_ci size_t size = FEATURE_BYTES(count); 12158c2ecf20Sopenharmony_ci unsigned long bit; 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 + size > end)) 12188c2ecf20Sopenharmony_ci return -ERANGE; 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci ceph_encode_32(p, size); 12218c2ecf20Sopenharmony_ci memset(*p, 0, size); 12228c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) { 12238c2ecf20Sopenharmony_ci bit = feature_bits[i]; 12248c2ecf20Sopenharmony_ci ((unsigned char *)(*p))[bit / 8] |= BIT(bit % 8); 12258c2ecf20Sopenharmony_ci } 12268c2ecf20Sopenharmony_ci *p += size; 12278c2ecf20Sopenharmony_ci } else { 12288c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 > end)) 12298c2ecf20Sopenharmony_ci return -ERANGE; 12308c2ecf20Sopenharmony_ci 12318c2ecf20Sopenharmony_ci ceph_encode_32(p, 0); 12328c2ecf20Sopenharmony_ci } 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci return 0; 12358c2ecf20Sopenharmony_ci} 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_cistatic const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED; 12388c2ecf20Sopenharmony_ci#define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8) 12398c2ecf20Sopenharmony_cistatic int encode_metric_spec(void **p, void *end) 12408c2ecf20Sopenharmony_ci{ 12418c2ecf20Sopenharmony_ci static const size_t count = ARRAY_SIZE(metric_bits); 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci /* header */ 12448c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(*p + 2 > end)) 12458c2ecf20Sopenharmony_ci return -ERANGE; 12468c2ecf20Sopenharmony_ci 12478c2ecf20Sopenharmony_ci ceph_encode_8(p, 1); /* version */ 12488c2ecf20Sopenharmony_ci ceph_encode_8(p, 1); /* compat */ 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci if (count > 0) { 12518c2ecf20Sopenharmony_ci size_t i; 12528c2ecf20Sopenharmony_ci size_t size = METRIC_BYTES(count); 12538c2ecf20Sopenharmony_ci 12548c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 + 4 + size > end)) 12558c2ecf20Sopenharmony_ci return -ERANGE; 12568c2ecf20Sopenharmony_ci 12578c2ecf20Sopenharmony_ci /* metric spec info length */ 12588c2ecf20Sopenharmony_ci ceph_encode_32(p, 4 + size); 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci /* metric spec */ 12618c2ecf20Sopenharmony_ci ceph_encode_32(p, size); 12628c2ecf20Sopenharmony_ci memset(*p, 0, size); 12638c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) 12648c2ecf20Sopenharmony_ci ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8); 12658c2ecf20Sopenharmony_ci *p += size; 12668c2ecf20Sopenharmony_ci } else { 12678c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(*p + 4 + 4 > end)) 12688c2ecf20Sopenharmony_ci return -ERANGE; 12698c2ecf20Sopenharmony_ci 12708c2ecf20Sopenharmony_ci /* metric spec info length */ 12718c2ecf20Sopenharmony_ci ceph_encode_32(p, 4); 12728c2ecf20Sopenharmony_ci /* metric spec */ 12738c2ecf20Sopenharmony_ci ceph_encode_32(p, 0); 12748c2ecf20Sopenharmony_ci } 12758c2ecf20Sopenharmony_ci 12768c2ecf20Sopenharmony_ci return 0; 12778c2ecf20Sopenharmony_ci} 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci/* 12808c2ecf20Sopenharmony_ci * session message, specialization for CEPH_SESSION_REQUEST_OPEN 12818c2ecf20Sopenharmony_ci * to include additional client metadata fields. 12828c2ecf20Sopenharmony_ci */ 12838c2ecf20Sopenharmony_cistatic struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq) 12848c2ecf20Sopenharmony_ci{ 12858c2ecf20Sopenharmony_ci struct ceph_msg *msg; 12868c2ecf20Sopenharmony_ci struct ceph_mds_session_head *h; 12878c2ecf20Sopenharmony_ci int i = -1; 12888c2ecf20Sopenharmony_ci int extra_bytes = 0; 12898c2ecf20Sopenharmony_ci int metadata_key_count = 0; 12908c2ecf20Sopenharmony_ci struct ceph_options *opt = mdsc->fsc->client->options; 12918c2ecf20Sopenharmony_ci struct ceph_mount_options *fsopt = mdsc->fsc->mount_options; 12928c2ecf20Sopenharmony_ci size_t size, count; 12938c2ecf20Sopenharmony_ci void *p, *end; 12948c2ecf20Sopenharmony_ci int ret; 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci const char* metadata[][2] = { 12978c2ecf20Sopenharmony_ci {"hostname", mdsc->nodename}, 12988c2ecf20Sopenharmony_ci {"kernel_version", init_utsname()->release}, 12998c2ecf20Sopenharmony_ci {"entity_id", opt->name ? : ""}, 13008c2ecf20Sopenharmony_ci {"root", fsopt->server_path ? : "/"}, 13018c2ecf20Sopenharmony_ci {NULL, NULL} 13028c2ecf20Sopenharmony_ci }; 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci /* Calculate serialized length of metadata */ 13058c2ecf20Sopenharmony_ci extra_bytes = 4; /* map length */ 13068c2ecf20Sopenharmony_ci for (i = 0; metadata[i][0]; ++i) { 13078c2ecf20Sopenharmony_ci extra_bytes += 8 + strlen(metadata[i][0]) + 13088c2ecf20Sopenharmony_ci strlen(metadata[i][1]); 13098c2ecf20Sopenharmony_ci metadata_key_count++; 13108c2ecf20Sopenharmony_ci } 13118c2ecf20Sopenharmony_ci 13128c2ecf20Sopenharmony_ci /* supported feature */ 13138c2ecf20Sopenharmony_ci size = 0; 13148c2ecf20Sopenharmony_ci count = ARRAY_SIZE(feature_bits); 13158c2ecf20Sopenharmony_ci if (count > 0) 13168c2ecf20Sopenharmony_ci size = FEATURE_BYTES(count); 13178c2ecf20Sopenharmony_ci extra_bytes += 4 + size; 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci /* metric spec */ 13208c2ecf20Sopenharmony_ci size = 0; 13218c2ecf20Sopenharmony_ci count = ARRAY_SIZE(metric_bits); 13228c2ecf20Sopenharmony_ci if (count > 0) 13238c2ecf20Sopenharmony_ci size = METRIC_BYTES(count); 13248c2ecf20Sopenharmony_ci extra_bytes += 2 + 4 + 4 + size; 13258c2ecf20Sopenharmony_ci 13268c2ecf20Sopenharmony_ci /* Allocate the message */ 13278c2ecf20Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, 13288c2ecf20Sopenharmony_ci GFP_NOFS, false); 13298c2ecf20Sopenharmony_ci if (!msg) { 13308c2ecf20Sopenharmony_ci pr_err("ENOMEM creating session open msg\n"); 13318c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 13328c2ecf20Sopenharmony_ci } 13338c2ecf20Sopenharmony_ci p = msg->front.iov_base; 13348c2ecf20Sopenharmony_ci end = p + msg->front.iov_len; 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci h = p; 13378c2ecf20Sopenharmony_ci h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); 13388c2ecf20Sopenharmony_ci h->seq = cpu_to_le64(seq); 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ci /* 13418c2ecf20Sopenharmony_ci * Serialize client metadata into waiting buffer space, using 13428c2ecf20Sopenharmony_ci * the format that userspace expects for map<string, string> 13438c2ecf20Sopenharmony_ci * 13448c2ecf20Sopenharmony_ci * ClientSession messages with metadata are v4 13458c2ecf20Sopenharmony_ci */ 13468c2ecf20Sopenharmony_ci msg->hdr.version = cpu_to_le16(4); 13478c2ecf20Sopenharmony_ci msg->hdr.compat_version = cpu_to_le16(1); 13488c2ecf20Sopenharmony_ci 13498c2ecf20Sopenharmony_ci /* The write pointer, following the session_head structure */ 13508c2ecf20Sopenharmony_ci p += sizeof(*h); 13518c2ecf20Sopenharmony_ci 13528c2ecf20Sopenharmony_ci /* Number of entries in the map */ 13538c2ecf20Sopenharmony_ci ceph_encode_32(&p, metadata_key_count); 13548c2ecf20Sopenharmony_ci 13558c2ecf20Sopenharmony_ci /* Two length-prefixed strings for each entry in the map */ 13568c2ecf20Sopenharmony_ci for (i = 0; metadata[i][0]; ++i) { 13578c2ecf20Sopenharmony_ci size_t const key_len = strlen(metadata[i][0]); 13588c2ecf20Sopenharmony_ci size_t const val_len = strlen(metadata[i][1]); 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ci ceph_encode_32(&p, key_len); 13618c2ecf20Sopenharmony_ci memcpy(p, metadata[i][0], key_len); 13628c2ecf20Sopenharmony_ci p += key_len; 13638c2ecf20Sopenharmony_ci ceph_encode_32(&p, val_len); 13648c2ecf20Sopenharmony_ci memcpy(p, metadata[i][1], val_len); 13658c2ecf20Sopenharmony_ci p += val_len; 13668c2ecf20Sopenharmony_ci } 13678c2ecf20Sopenharmony_ci 13688c2ecf20Sopenharmony_ci ret = encode_supported_features(&p, end); 13698c2ecf20Sopenharmony_ci if (ret) { 13708c2ecf20Sopenharmony_ci pr_err("encode_supported_features failed!\n"); 13718c2ecf20Sopenharmony_ci ceph_msg_put(msg); 13728c2ecf20Sopenharmony_ci return ERR_PTR(ret); 13738c2ecf20Sopenharmony_ci } 13748c2ecf20Sopenharmony_ci 13758c2ecf20Sopenharmony_ci ret = encode_metric_spec(&p, end); 13768c2ecf20Sopenharmony_ci if (ret) { 13778c2ecf20Sopenharmony_ci pr_err("encode_metric_spec failed!\n"); 13788c2ecf20Sopenharmony_ci ceph_msg_put(msg); 13798c2ecf20Sopenharmony_ci return ERR_PTR(ret); 13808c2ecf20Sopenharmony_ci } 13818c2ecf20Sopenharmony_ci 13828c2ecf20Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 13838c2ecf20Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_ci return msg; 13868c2ecf20Sopenharmony_ci} 13878c2ecf20Sopenharmony_ci 13888c2ecf20Sopenharmony_ci/* 13898c2ecf20Sopenharmony_ci * send session open request. 13908c2ecf20Sopenharmony_ci * 13918c2ecf20Sopenharmony_ci * called under mdsc->mutex 13928c2ecf20Sopenharmony_ci */ 13938c2ecf20Sopenharmony_cistatic int __open_session(struct ceph_mds_client *mdsc, 13948c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 13958c2ecf20Sopenharmony_ci{ 13968c2ecf20Sopenharmony_ci struct ceph_msg *msg; 13978c2ecf20Sopenharmony_ci int mstate; 13988c2ecf20Sopenharmony_ci int mds = session->s_mds; 13998c2ecf20Sopenharmony_ci 14008c2ecf20Sopenharmony_ci /* wait for mds to go active? */ 14018c2ecf20Sopenharmony_ci mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds); 14028c2ecf20Sopenharmony_ci dout("open_session to mds%d (%s)\n", mds, 14038c2ecf20Sopenharmony_ci ceph_mds_state_name(mstate)); 14048c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_OPENING; 14058c2ecf20Sopenharmony_ci session->s_renew_requested = jiffies; 14068c2ecf20Sopenharmony_ci 14078c2ecf20Sopenharmony_ci /* send connect message */ 14088c2ecf20Sopenharmony_ci msg = create_session_open_msg(mdsc, session->s_seq); 14098c2ecf20Sopenharmony_ci if (IS_ERR(msg)) 14108c2ecf20Sopenharmony_ci return PTR_ERR(msg); 14118c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 14128c2ecf20Sopenharmony_ci return 0; 14138c2ecf20Sopenharmony_ci} 14148c2ecf20Sopenharmony_ci 14158c2ecf20Sopenharmony_ci/* 14168c2ecf20Sopenharmony_ci * open sessions for any export targets for the given mds 14178c2ecf20Sopenharmony_ci * 14188c2ecf20Sopenharmony_ci * called under mdsc->mutex 14198c2ecf20Sopenharmony_ci */ 14208c2ecf20Sopenharmony_cistatic struct ceph_mds_session * 14218c2ecf20Sopenharmony_ci__open_export_target_session(struct ceph_mds_client *mdsc, int target) 14228c2ecf20Sopenharmony_ci{ 14238c2ecf20Sopenharmony_ci struct ceph_mds_session *session; 14248c2ecf20Sopenharmony_ci int ret; 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, target); 14278c2ecf20Sopenharmony_ci if (!session) { 14288c2ecf20Sopenharmony_ci session = register_session(mdsc, target); 14298c2ecf20Sopenharmony_ci if (IS_ERR(session)) 14308c2ecf20Sopenharmony_ci return session; 14318c2ecf20Sopenharmony_ci } 14328c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_NEW || 14338c2ecf20Sopenharmony_ci session->s_state == CEPH_MDS_SESSION_CLOSING) { 14348c2ecf20Sopenharmony_ci ret = __open_session(mdsc, session); 14358c2ecf20Sopenharmony_ci if (ret) 14368c2ecf20Sopenharmony_ci return ERR_PTR(ret); 14378c2ecf20Sopenharmony_ci } 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci return session; 14408c2ecf20Sopenharmony_ci} 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_cistruct ceph_mds_session * 14438c2ecf20Sopenharmony_ciceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target) 14448c2ecf20Sopenharmony_ci{ 14458c2ecf20Sopenharmony_ci struct ceph_mds_session *session; 14468c2ecf20Sopenharmony_ci 14478c2ecf20Sopenharmony_ci dout("open_export_target_session to mds%d\n", target); 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 14508c2ecf20Sopenharmony_ci session = __open_export_target_session(mdsc, target); 14518c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 14528c2ecf20Sopenharmony_ci 14538c2ecf20Sopenharmony_ci return session; 14548c2ecf20Sopenharmony_ci} 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_cistatic void __open_export_target_sessions(struct ceph_mds_client *mdsc, 14578c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 14588c2ecf20Sopenharmony_ci{ 14598c2ecf20Sopenharmony_ci struct ceph_mds_info *mi; 14608c2ecf20Sopenharmony_ci struct ceph_mds_session *ts; 14618c2ecf20Sopenharmony_ci int i, mds = session->s_mds; 14628c2ecf20Sopenharmony_ci 14638c2ecf20Sopenharmony_ci if (mds >= mdsc->mdsmap->possible_max_rank) 14648c2ecf20Sopenharmony_ci return; 14658c2ecf20Sopenharmony_ci 14668c2ecf20Sopenharmony_ci mi = &mdsc->mdsmap->m_info[mds]; 14678c2ecf20Sopenharmony_ci dout("open_export_target_sessions for mds%d (%d targets)\n", 14688c2ecf20Sopenharmony_ci session->s_mds, mi->num_export_targets); 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci for (i = 0; i < mi->num_export_targets; i++) { 14718c2ecf20Sopenharmony_ci ts = __open_export_target_session(mdsc, mi->export_targets[i]); 14728c2ecf20Sopenharmony_ci ceph_put_mds_session(ts); 14738c2ecf20Sopenharmony_ci } 14748c2ecf20Sopenharmony_ci} 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_civoid ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc, 14778c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 14788c2ecf20Sopenharmony_ci{ 14798c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 14808c2ecf20Sopenharmony_ci __open_export_target_sessions(mdsc, session); 14818c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 14828c2ecf20Sopenharmony_ci} 14838c2ecf20Sopenharmony_ci 14848c2ecf20Sopenharmony_ci/* 14858c2ecf20Sopenharmony_ci * session caps 14868c2ecf20Sopenharmony_ci */ 14878c2ecf20Sopenharmony_ci 14888c2ecf20Sopenharmony_cistatic void detach_cap_releases(struct ceph_mds_session *session, 14898c2ecf20Sopenharmony_ci struct list_head *target) 14908c2ecf20Sopenharmony_ci{ 14918c2ecf20Sopenharmony_ci lockdep_assert_held(&session->s_cap_lock); 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci list_splice_init(&session->s_cap_releases, target); 14948c2ecf20Sopenharmony_ci session->s_num_cap_releases = 0; 14958c2ecf20Sopenharmony_ci dout("dispose_cap_releases mds%d\n", session->s_mds); 14968c2ecf20Sopenharmony_ci} 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_cistatic void dispose_cap_releases(struct ceph_mds_client *mdsc, 14998c2ecf20Sopenharmony_ci struct list_head *dispose) 15008c2ecf20Sopenharmony_ci{ 15018c2ecf20Sopenharmony_ci while (!list_empty(dispose)) { 15028c2ecf20Sopenharmony_ci struct ceph_cap *cap; 15038c2ecf20Sopenharmony_ci /* zero out the in-progress message */ 15048c2ecf20Sopenharmony_ci cap = list_first_entry(dispose, struct ceph_cap, session_caps); 15058c2ecf20Sopenharmony_ci list_del(&cap->session_caps); 15068c2ecf20Sopenharmony_ci ceph_put_cap(mdsc, cap); 15078c2ecf20Sopenharmony_ci } 15088c2ecf20Sopenharmony_ci} 15098c2ecf20Sopenharmony_ci 15108c2ecf20Sopenharmony_cistatic void cleanup_session_requests(struct ceph_mds_client *mdsc, 15118c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 15128c2ecf20Sopenharmony_ci{ 15138c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 15148c2ecf20Sopenharmony_ci struct rb_node *p; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci dout("cleanup_session_requests mds%d\n", session->s_mds); 15178c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 15188c2ecf20Sopenharmony_ci while (!list_empty(&session->s_unsafe)) { 15198c2ecf20Sopenharmony_ci req = list_first_entry(&session->s_unsafe, 15208c2ecf20Sopenharmony_ci struct ceph_mds_request, r_unsafe_item); 15218c2ecf20Sopenharmony_ci pr_warn_ratelimited(" dropping unsafe request %llu\n", 15228c2ecf20Sopenharmony_ci req->r_tid); 15238c2ecf20Sopenharmony_ci if (req->r_target_inode) 15248c2ecf20Sopenharmony_ci mapping_set_error(req->r_target_inode->i_mapping, -EIO); 15258c2ecf20Sopenharmony_ci if (req->r_unsafe_dir) 15268c2ecf20Sopenharmony_ci mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO); 15278c2ecf20Sopenharmony_ci __unregister_request(mdsc, req); 15288c2ecf20Sopenharmony_ci } 15298c2ecf20Sopenharmony_ci /* zero r_attempts, so kick_requests() will re-send requests */ 15308c2ecf20Sopenharmony_ci p = rb_first(&mdsc->request_tree); 15318c2ecf20Sopenharmony_ci while (p) { 15328c2ecf20Sopenharmony_ci req = rb_entry(p, struct ceph_mds_request, r_node); 15338c2ecf20Sopenharmony_ci p = rb_next(p); 15348c2ecf20Sopenharmony_ci if (req->r_session && 15358c2ecf20Sopenharmony_ci req->r_session->s_mds == session->s_mds) 15368c2ecf20Sopenharmony_ci req->r_attempts = 0; 15378c2ecf20Sopenharmony_ci } 15388c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 15398c2ecf20Sopenharmony_ci} 15408c2ecf20Sopenharmony_ci 15418c2ecf20Sopenharmony_ci/* 15428c2ecf20Sopenharmony_ci * Helper to safely iterate over all caps associated with a session, with 15438c2ecf20Sopenharmony_ci * special care taken to handle a racing __ceph_remove_cap(). 15448c2ecf20Sopenharmony_ci * 15458c2ecf20Sopenharmony_ci * Caller must hold session s_mutex. 15468c2ecf20Sopenharmony_ci */ 15478c2ecf20Sopenharmony_ciint ceph_iterate_session_caps(struct ceph_mds_session *session, 15488c2ecf20Sopenharmony_ci int (*cb)(struct inode *, struct ceph_cap *, 15498c2ecf20Sopenharmony_ci void *), void *arg) 15508c2ecf20Sopenharmony_ci{ 15518c2ecf20Sopenharmony_ci struct list_head *p; 15528c2ecf20Sopenharmony_ci struct ceph_cap *cap; 15538c2ecf20Sopenharmony_ci struct inode *inode, *last_inode = NULL; 15548c2ecf20Sopenharmony_ci struct ceph_cap *old_cap = NULL; 15558c2ecf20Sopenharmony_ci int ret; 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci dout("iterate_session_caps %p mds%d\n", session, session->s_mds); 15588c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 15598c2ecf20Sopenharmony_ci p = session->s_caps.next; 15608c2ecf20Sopenharmony_ci while (p != &session->s_caps) { 15618c2ecf20Sopenharmony_ci cap = list_entry(p, struct ceph_cap, session_caps); 15628c2ecf20Sopenharmony_ci inode = igrab(&cap->ci->vfs_inode); 15638c2ecf20Sopenharmony_ci if (!inode) { 15648c2ecf20Sopenharmony_ci p = p->next; 15658c2ecf20Sopenharmony_ci continue; 15668c2ecf20Sopenharmony_ci } 15678c2ecf20Sopenharmony_ci session->s_cap_iterator = cap; 15688c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 15698c2ecf20Sopenharmony_ci 15708c2ecf20Sopenharmony_ci if (last_inode) { 15718c2ecf20Sopenharmony_ci /* avoid calling iput_final() while holding 15728c2ecf20Sopenharmony_ci * s_mutex or in mds dispatch threads */ 15738c2ecf20Sopenharmony_ci ceph_async_iput(last_inode); 15748c2ecf20Sopenharmony_ci last_inode = NULL; 15758c2ecf20Sopenharmony_ci } 15768c2ecf20Sopenharmony_ci if (old_cap) { 15778c2ecf20Sopenharmony_ci ceph_put_cap(session->s_mdsc, old_cap); 15788c2ecf20Sopenharmony_ci old_cap = NULL; 15798c2ecf20Sopenharmony_ci } 15808c2ecf20Sopenharmony_ci 15818c2ecf20Sopenharmony_ci ret = cb(inode, cap, arg); 15828c2ecf20Sopenharmony_ci last_inode = inode; 15838c2ecf20Sopenharmony_ci 15848c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 15858c2ecf20Sopenharmony_ci p = p->next; 15868c2ecf20Sopenharmony_ci if (!cap->ci) { 15878c2ecf20Sopenharmony_ci dout("iterate_session_caps finishing cap %p removal\n", 15888c2ecf20Sopenharmony_ci cap); 15898c2ecf20Sopenharmony_ci BUG_ON(cap->session != session); 15908c2ecf20Sopenharmony_ci cap->session = NULL; 15918c2ecf20Sopenharmony_ci list_del_init(&cap->session_caps); 15928c2ecf20Sopenharmony_ci session->s_nr_caps--; 15938c2ecf20Sopenharmony_ci atomic64_dec(&session->s_mdsc->metric.total_caps); 15948c2ecf20Sopenharmony_ci if (cap->queue_release) 15958c2ecf20Sopenharmony_ci __ceph_queue_cap_release(session, cap); 15968c2ecf20Sopenharmony_ci else 15978c2ecf20Sopenharmony_ci old_cap = cap; /* put_cap it w/o locks held */ 15988c2ecf20Sopenharmony_ci } 15998c2ecf20Sopenharmony_ci if (ret < 0) 16008c2ecf20Sopenharmony_ci goto out; 16018c2ecf20Sopenharmony_ci } 16028c2ecf20Sopenharmony_ci ret = 0; 16038c2ecf20Sopenharmony_ciout: 16048c2ecf20Sopenharmony_ci session->s_cap_iterator = NULL; 16058c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 16068c2ecf20Sopenharmony_ci 16078c2ecf20Sopenharmony_ci ceph_async_iput(last_inode); 16088c2ecf20Sopenharmony_ci if (old_cap) 16098c2ecf20Sopenharmony_ci ceph_put_cap(session->s_mdsc, old_cap); 16108c2ecf20Sopenharmony_ci 16118c2ecf20Sopenharmony_ci return ret; 16128c2ecf20Sopenharmony_ci} 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_cistatic int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode) 16158c2ecf20Sopenharmony_ci{ 16168c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 16178c2ecf20Sopenharmony_ci struct ceph_cap_snap *capsnap; 16188c2ecf20Sopenharmony_ci int capsnap_release = 0; 16198c2ecf20Sopenharmony_ci 16208c2ecf20Sopenharmony_ci lockdep_assert_held(&ci->i_ceph_lock); 16218c2ecf20Sopenharmony_ci 16228c2ecf20Sopenharmony_ci dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode); 16238c2ecf20Sopenharmony_ci 16248c2ecf20Sopenharmony_ci while (!list_empty(&ci->i_cap_snaps)) { 16258c2ecf20Sopenharmony_ci capsnap = list_first_entry(&ci->i_cap_snaps, 16268c2ecf20Sopenharmony_ci struct ceph_cap_snap, ci_item); 16278c2ecf20Sopenharmony_ci __ceph_remove_capsnap(inode, capsnap, NULL, NULL); 16288c2ecf20Sopenharmony_ci ceph_put_snap_context(capsnap->context); 16298c2ecf20Sopenharmony_ci ceph_put_cap_snap(capsnap); 16308c2ecf20Sopenharmony_ci capsnap_release++; 16318c2ecf20Sopenharmony_ci } 16328c2ecf20Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 16338c2ecf20Sopenharmony_ci wake_up_all(&mdsc->cap_flushing_wq); 16348c2ecf20Sopenharmony_ci return capsnap_release; 16358c2ecf20Sopenharmony_ci} 16368c2ecf20Sopenharmony_ci 16378c2ecf20Sopenharmony_cistatic int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, 16388c2ecf20Sopenharmony_ci void *arg) 16398c2ecf20Sopenharmony_ci{ 16408c2ecf20Sopenharmony_ci struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg; 16418c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = fsc->mdsc; 16428c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 16438c2ecf20Sopenharmony_ci LIST_HEAD(to_remove); 16448c2ecf20Sopenharmony_ci bool dirty_dropped = false; 16458c2ecf20Sopenharmony_ci bool invalidate = false; 16468c2ecf20Sopenharmony_ci int capsnap_release = 0; 16478c2ecf20Sopenharmony_ci 16488c2ecf20Sopenharmony_ci dout("removing cap %p, ci is %p, inode is %p\n", 16498c2ecf20Sopenharmony_ci cap, ci, &ci->vfs_inode); 16508c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 16518c2ecf20Sopenharmony_ci __ceph_remove_cap(cap, false); 16528c2ecf20Sopenharmony_ci if (!ci->i_auth_cap) { 16538c2ecf20Sopenharmony_ci struct ceph_cap_flush *cf; 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_ci if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { 16568c2ecf20Sopenharmony_ci if (inode->i_data.nrpages > 0) 16578c2ecf20Sopenharmony_ci invalidate = true; 16588c2ecf20Sopenharmony_ci if (ci->i_wrbuffer_ref > 0) 16598c2ecf20Sopenharmony_ci mapping_set_error(&inode->i_data, -EIO); 16608c2ecf20Sopenharmony_ci } 16618c2ecf20Sopenharmony_ci 16628c2ecf20Sopenharmony_ci while (!list_empty(&ci->i_cap_flush_list)) { 16638c2ecf20Sopenharmony_ci cf = list_first_entry(&ci->i_cap_flush_list, 16648c2ecf20Sopenharmony_ci struct ceph_cap_flush, i_list); 16658c2ecf20Sopenharmony_ci list_move(&cf->i_list, &to_remove); 16668c2ecf20Sopenharmony_ci } 16678c2ecf20Sopenharmony_ci 16688c2ecf20Sopenharmony_ci spin_lock(&mdsc->cap_dirty_lock); 16698c2ecf20Sopenharmony_ci 16708c2ecf20Sopenharmony_ci list_for_each_entry(cf, &to_remove, i_list) 16718c2ecf20Sopenharmony_ci list_del_init(&cf->g_list); 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_ci if (!list_empty(&ci->i_dirty_item)) { 16748c2ecf20Sopenharmony_ci pr_warn_ratelimited( 16758c2ecf20Sopenharmony_ci " dropping dirty %s state for %p %lld\n", 16768c2ecf20Sopenharmony_ci ceph_cap_string(ci->i_dirty_caps), 16778c2ecf20Sopenharmony_ci inode, ceph_ino(inode)); 16788c2ecf20Sopenharmony_ci ci->i_dirty_caps = 0; 16798c2ecf20Sopenharmony_ci list_del_init(&ci->i_dirty_item); 16808c2ecf20Sopenharmony_ci dirty_dropped = true; 16818c2ecf20Sopenharmony_ci } 16828c2ecf20Sopenharmony_ci if (!list_empty(&ci->i_flushing_item)) { 16838c2ecf20Sopenharmony_ci pr_warn_ratelimited( 16848c2ecf20Sopenharmony_ci " dropping dirty+flushing %s state for %p %lld\n", 16858c2ecf20Sopenharmony_ci ceph_cap_string(ci->i_flushing_caps), 16868c2ecf20Sopenharmony_ci inode, ceph_ino(inode)); 16878c2ecf20Sopenharmony_ci ci->i_flushing_caps = 0; 16888c2ecf20Sopenharmony_ci list_del_init(&ci->i_flushing_item); 16898c2ecf20Sopenharmony_ci mdsc->num_cap_flushing--; 16908c2ecf20Sopenharmony_ci dirty_dropped = true; 16918c2ecf20Sopenharmony_ci } 16928c2ecf20Sopenharmony_ci spin_unlock(&mdsc->cap_dirty_lock); 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci if (dirty_dropped) { 16958c2ecf20Sopenharmony_ci mapping_set_error(inode->i_mapping, -EIO); 16968c2ecf20Sopenharmony_ci 16978c2ecf20Sopenharmony_ci if (ci->i_wrbuffer_ref_head == 0 && 16988c2ecf20Sopenharmony_ci ci->i_wr_ref == 0 && 16998c2ecf20Sopenharmony_ci ci->i_dirty_caps == 0 && 17008c2ecf20Sopenharmony_ci ci->i_flushing_caps == 0) { 17018c2ecf20Sopenharmony_ci ceph_put_snap_context(ci->i_head_snapc); 17028c2ecf20Sopenharmony_ci ci->i_head_snapc = NULL; 17038c2ecf20Sopenharmony_ci } 17048c2ecf20Sopenharmony_ci } 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci if (atomic_read(&ci->i_filelock_ref) > 0) { 17078c2ecf20Sopenharmony_ci /* make further file lock syscall return -EIO */ 17088c2ecf20Sopenharmony_ci ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK; 17098c2ecf20Sopenharmony_ci pr_warn_ratelimited(" dropping file locks for %p %lld\n", 17108c2ecf20Sopenharmony_ci inode, ceph_ino(inode)); 17118c2ecf20Sopenharmony_ci } 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) { 17148c2ecf20Sopenharmony_ci list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove); 17158c2ecf20Sopenharmony_ci ci->i_prealloc_cap_flush = NULL; 17168c2ecf20Sopenharmony_ci } 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci if (!list_empty(&ci->i_cap_snaps)) 17198c2ecf20Sopenharmony_ci capsnap_release = remove_capsnaps(mdsc, inode); 17208c2ecf20Sopenharmony_ci } 17218c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 17228c2ecf20Sopenharmony_ci while (!list_empty(&to_remove)) { 17238c2ecf20Sopenharmony_ci struct ceph_cap_flush *cf; 17248c2ecf20Sopenharmony_ci cf = list_first_entry(&to_remove, 17258c2ecf20Sopenharmony_ci struct ceph_cap_flush, i_list); 17268c2ecf20Sopenharmony_ci list_del_init(&cf->i_list); 17278c2ecf20Sopenharmony_ci if (!cf->is_capsnap) 17288c2ecf20Sopenharmony_ci ceph_free_cap_flush(cf); 17298c2ecf20Sopenharmony_ci } 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 17328c2ecf20Sopenharmony_ci if (invalidate) 17338c2ecf20Sopenharmony_ci ceph_queue_invalidate(inode); 17348c2ecf20Sopenharmony_ci if (dirty_dropped) 17358c2ecf20Sopenharmony_ci iput(inode); 17368c2ecf20Sopenharmony_ci while (capsnap_release--) 17378c2ecf20Sopenharmony_ci iput(inode); 17388c2ecf20Sopenharmony_ci return 0; 17398c2ecf20Sopenharmony_ci} 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci/* 17428c2ecf20Sopenharmony_ci * caller must hold session s_mutex 17438c2ecf20Sopenharmony_ci */ 17448c2ecf20Sopenharmony_cistatic void remove_session_caps(struct ceph_mds_session *session) 17458c2ecf20Sopenharmony_ci{ 17468c2ecf20Sopenharmony_ci struct ceph_fs_client *fsc = session->s_mdsc->fsc; 17478c2ecf20Sopenharmony_ci struct super_block *sb = fsc->sb; 17488c2ecf20Sopenharmony_ci LIST_HEAD(dispose); 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci dout("remove_session_caps on %p\n", session); 17518c2ecf20Sopenharmony_ci ceph_iterate_session_caps(session, remove_session_caps_cb, fsc); 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci wake_up_all(&fsc->mdsc->cap_flushing_wq); 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 17568c2ecf20Sopenharmony_ci if (session->s_nr_caps > 0) { 17578c2ecf20Sopenharmony_ci struct inode *inode; 17588c2ecf20Sopenharmony_ci struct ceph_cap *cap, *prev = NULL; 17598c2ecf20Sopenharmony_ci struct ceph_vino vino; 17608c2ecf20Sopenharmony_ci /* 17618c2ecf20Sopenharmony_ci * iterate_session_caps() skips inodes that are being 17628c2ecf20Sopenharmony_ci * deleted, we need to wait until deletions are complete. 17638c2ecf20Sopenharmony_ci * __wait_on_freeing_inode() is designed for the job, 17648c2ecf20Sopenharmony_ci * but it is not exported, so use lookup inode function 17658c2ecf20Sopenharmony_ci * to access it. 17668c2ecf20Sopenharmony_ci */ 17678c2ecf20Sopenharmony_ci while (!list_empty(&session->s_caps)) { 17688c2ecf20Sopenharmony_ci cap = list_entry(session->s_caps.next, 17698c2ecf20Sopenharmony_ci struct ceph_cap, session_caps); 17708c2ecf20Sopenharmony_ci if (cap == prev) 17718c2ecf20Sopenharmony_ci break; 17728c2ecf20Sopenharmony_ci prev = cap; 17738c2ecf20Sopenharmony_ci vino = cap->ci->i_vino; 17748c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 17758c2ecf20Sopenharmony_ci 17768c2ecf20Sopenharmony_ci inode = ceph_find_inode(sb, vino); 17778c2ecf20Sopenharmony_ci /* avoid calling iput_final() while holding s_mutex */ 17788c2ecf20Sopenharmony_ci ceph_async_iput(inode); 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 17818c2ecf20Sopenharmony_ci } 17828c2ecf20Sopenharmony_ci } 17838c2ecf20Sopenharmony_ci 17848c2ecf20Sopenharmony_ci // drop cap expires and unlock s_cap_lock 17858c2ecf20Sopenharmony_ci detach_cap_releases(session, &dispose); 17868c2ecf20Sopenharmony_ci 17878c2ecf20Sopenharmony_ci BUG_ON(session->s_nr_caps > 0); 17888c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&session->s_cap_flushing)); 17898c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 17908c2ecf20Sopenharmony_ci dispose_cap_releases(session->s_mdsc, &dispose); 17918c2ecf20Sopenharmony_ci} 17928c2ecf20Sopenharmony_ci 17938c2ecf20Sopenharmony_cienum { 17948c2ecf20Sopenharmony_ci RECONNECT, 17958c2ecf20Sopenharmony_ci RENEWCAPS, 17968c2ecf20Sopenharmony_ci FORCE_RO, 17978c2ecf20Sopenharmony_ci}; 17988c2ecf20Sopenharmony_ci 17998c2ecf20Sopenharmony_ci/* 18008c2ecf20Sopenharmony_ci * wake up any threads waiting on this session's caps. if the cap is 18018c2ecf20Sopenharmony_ci * old (didn't get renewed on the client reconnect), remove it now. 18028c2ecf20Sopenharmony_ci * 18038c2ecf20Sopenharmony_ci * caller must hold s_mutex. 18048c2ecf20Sopenharmony_ci */ 18058c2ecf20Sopenharmony_cistatic int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, 18068c2ecf20Sopenharmony_ci void *arg) 18078c2ecf20Sopenharmony_ci{ 18088c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 18098c2ecf20Sopenharmony_ci unsigned long ev = (unsigned long)arg; 18108c2ecf20Sopenharmony_ci 18118c2ecf20Sopenharmony_ci if (ev == RECONNECT) { 18128c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 18138c2ecf20Sopenharmony_ci ci->i_wanted_max_size = 0; 18148c2ecf20Sopenharmony_ci ci->i_requested_max_size = 0; 18158c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 18168c2ecf20Sopenharmony_ci } else if (ev == RENEWCAPS) { 18178c2ecf20Sopenharmony_ci if (cap->cap_gen < cap->session->s_cap_gen) { 18188c2ecf20Sopenharmony_ci /* mds did not re-issue stale cap */ 18198c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 18208c2ecf20Sopenharmony_ci cap->issued = cap->implemented = CEPH_CAP_PIN; 18218c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 18228c2ecf20Sopenharmony_ci } 18238c2ecf20Sopenharmony_ci } else if (ev == FORCE_RO) { 18248c2ecf20Sopenharmony_ci } 18258c2ecf20Sopenharmony_ci wake_up_all(&ci->i_cap_wq); 18268c2ecf20Sopenharmony_ci return 0; 18278c2ecf20Sopenharmony_ci} 18288c2ecf20Sopenharmony_ci 18298c2ecf20Sopenharmony_cistatic void wake_up_session_caps(struct ceph_mds_session *session, int ev) 18308c2ecf20Sopenharmony_ci{ 18318c2ecf20Sopenharmony_ci dout("wake_up_session_caps %p mds%d\n", session, session->s_mds); 18328c2ecf20Sopenharmony_ci ceph_iterate_session_caps(session, wake_up_session_cb, 18338c2ecf20Sopenharmony_ci (void *)(unsigned long)ev); 18348c2ecf20Sopenharmony_ci} 18358c2ecf20Sopenharmony_ci 18368c2ecf20Sopenharmony_ci/* 18378c2ecf20Sopenharmony_ci * Send periodic message to MDS renewing all currently held caps. The 18388c2ecf20Sopenharmony_ci * ack will reset the expiration for all caps from this session. 18398c2ecf20Sopenharmony_ci * 18408c2ecf20Sopenharmony_ci * caller holds s_mutex 18418c2ecf20Sopenharmony_ci */ 18428c2ecf20Sopenharmony_cistatic int send_renew_caps(struct ceph_mds_client *mdsc, 18438c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 18448c2ecf20Sopenharmony_ci{ 18458c2ecf20Sopenharmony_ci struct ceph_msg *msg; 18468c2ecf20Sopenharmony_ci int state; 18478c2ecf20Sopenharmony_ci 18488c2ecf20Sopenharmony_ci if (time_after_eq(jiffies, session->s_cap_ttl) && 18498c2ecf20Sopenharmony_ci time_after_eq(session->s_cap_ttl, session->s_renew_requested)) 18508c2ecf20Sopenharmony_ci pr_info("mds%d caps stale\n", session->s_mds); 18518c2ecf20Sopenharmony_ci session->s_renew_requested = jiffies; 18528c2ecf20Sopenharmony_ci 18538c2ecf20Sopenharmony_ci /* do not try to renew caps until a recovering mds has reconnected 18548c2ecf20Sopenharmony_ci * with its clients. */ 18558c2ecf20Sopenharmony_ci state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds); 18568c2ecf20Sopenharmony_ci if (state < CEPH_MDS_STATE_RECONNECT) { 18578c2ecf20Sopenharmony_ci dout("send_renew_caps ignoring mds%d (%s)\n", 18588c2ecf20Sopenharmony_ci session->s_mds, ceph_mds_state_name(state)); 18598c2ecf20Sopenharmony_ci return 0; 18608c2ecf20Sopenharmony_ci } 18618c2ecf20Sopenharmony_ci 18628c2ecf20Sopenharmony_ci dout("send_renew_caps to mds%d (%s)\n", session->s_mds, 18638c2ecf20Sopenharmony_ci ceph_mds_state_name(state)); 18648c2ecf20Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, 18658c2ecf20Sopenharmony_ci ++session->s_renew_seq); 18668c2ecf20Sopenharmony_ci if (!msg) 18678c2ecf20Sopenharmony_ci return -ENOMEM; 18688c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 18698c2ecf20Sopenharmony_ci return 0; 18708c2ecf20Sopenharmony_ci} 18718c2ecf20Sopenharmony_ci 18728c2ecf20Sopenharmony_cistatic int send_flushmsg_ack(struct ceph_mds_client *mdsc, 18738c2ecf20Sopenharmony_ci struct ceph_mds_session *session, u64 seq) 18748c2ecf20Sopenharmony_ci{ 18758c2ecf20Sopenharmony_ci struct ceph_msg *msg; 18768c2ecf20Sopenharmony_ci 18778c2ecf20Sopenharmony_ci dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n", 18788c2ecf20Sopenharmony_ci session->s_mds, ceph_session_state_name(session->s_state), seq); 18798c2ecf20Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq); 18808c2ecf20Sopenharmony_ci if (!msg) 18818c2ecf20Sopenharmony_ci return -ENOMEM; 18828c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 18838c2ecf20Sopenharmony_ci return 0; 18848c2ecf20Sopenharmony_ci} 18858c2ecf20Sopenharmony_ci 18868c2ecf20Sopenharmony_ci 18878c2ecf20Sopenharmony_ci/* 18888c2ecf20Sopenharmony_ci * Note new cap ttl, and any transition from stale -> not stale (fresh?). 18898c2ecf20Sopenharmony_ci * 18908c2ecf20Sopenharmony_ci * Called under session->s_mutex 18918c2ecf20Sopenharmony_ci */ 18928c2ecf20Sopenharmony_cistatic void renewed_caps(struct ceph_mds_client *mdsc, 18938c2ecf20Sopenharmony_ci struct ceph_mds_session *session, int is_renew) 18948c2ecf20Sopenharmony_ci{ 18958c2ecf20Sopenharmony_ci int was_stale; 18968c2ecf20Sopenharmony_ci int wake = 0; 18978c2ecf20Sopenharmony_ci 18988c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 18998c2ecf20Sopenharmony_ci was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl); 19008c2ecf20Sopenharmony_ci 19018c2ecf20Sopenharmony_ci session->s_cap_ttl = session->s_renew_requested + 19028c2ecf20Sopenharmony_ci mdsc->mdsmap->m_session_timeout*HZ; 19038c2ecf20Sopenharmony_ci 19048c2ecf20Sopenharmony_ci if (was_stale) { 19058c2ecf20Sopenharmony_ci if (time_before(jiffies, session->s_cap_ttl)) { 19068c2ecf20Sopenharmony_ci pr_info("mds%d caps renewed\n", session->s_mds); 19078c2ecf20Sopenharmony_ci wake = 1; 19088c2ecf20Sopenharmony_ci } else { 19098c2ecf20Sopenharmony_ci pr_info("mds%d caps still stale\n", session->s_mds); 19108c2ecf20Sopenharmony_ci } 19118c2ecf20Sopenharmony_ci } 19128c2ecf20Sopenharmony_ci dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n", 19138c2ecf20Sopenharmony_ci session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh", 19148c2ecf20Sopenharmony_ci time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh"); 19158c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 19168c2ecf20Sopenharmony_ci 19178c2ecf20Sopenharmony_ci if (wake) 19188c2ecf20Sopenharmony_ci wake_up_session_caps(session, RENEWCAPS); 19198c2ecf20Sopenharmony_ci} 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci/* 19228c2ecf20Sopenharmony_ci * send a session close request 19238c2ecf20Sopenharmony_ci */ 19248c2ecf20Sopenharmony_cistatic int request_close_session(struct ceph_mds_session *session) 19258c2ecf20Sopenharmony_ci{ 19268c2ecf20Sopenharmony_ci struct ceph_msg *msg; 19278c2ecf20Sopenharmony_ci 19288c2ecf20Sopenharmony_ci dout("request_close_session mds%d state %s seq %lld\n", 19298c2ecf20Sopenharmony_ci session->s_mds, ceph_session_state_name(session->s_state), 19308c2ecf20Sopenharmony_ci session->s_seq); 19318c2ecf20Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_CLOSE, 19328c2ecf20Sopenharmony_ci session->s_seq); 19338c2ecf20Sopenharmony_ci if (!msg) 19348c2ecf20Sopenharmony_ci return -ENOMEM; 19358c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 19368c2ecf20Sopenharmony_ci return 1; 19378c2ecf20Sopenharmony_ci} 19388c2ecf20Sopenharmony_ci 19398c2ecf20Sopenharmony_ci/* 19408c2ecf20Sopenharmony_ci * Called with s_mutex held. 19418c2ecf20Sopenharmony_ci */ 19428c2ecf20Sopenharmony_cistatic int __close_session(struct ceph_mds_client *mdsc, 19438c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 19448c2ecf20Sopenharmony_ci{ 19458c2ecf20Sopenharmony_ci if (session->s_state >= CEPH_MDS_SESSION_CLOSING) 19468c2ecf20Sopenharmony_ci return 0; 19478c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_CLOSING; 19488c2ecf20Sopenharmony_ci return request_close_session(session); 19498c2ecf20Sopenharmony_ci} 19508c2ecf20Sopenharmony_ci 19518c2ecf20Sopenharmony_cistatic bool drop_negative_children(struct dentry *dentry) 19528c2ecf20Sopenharmony_ci{ 19538c2ecf20Sopenharmony_ci struct dentry *child; 19548c2ecf20Sopenharmony_ci bool all_negative = true; 19558c2ecf20Sopenharmony_ci 19568c2ecf20Sopenharmony_ci if (!d_is_dir(dentry)) 19578c2ecf20Sopenharmony_ci goto out; 19588c2ecf20Sopenharmony_ci 19598c2ecf20Sopenharmony_ci spin_lock(&dentry->d_lock); 19608c2ecf20Sopenharmony_ci list_for_each_entry(child, &dentry->d_subdirs, d_child) { 19618c2ecf20Sopenharmony_ci if (d_really_is_positive(child)) { 19628c2ecf20Sopenharmony_ci all_negative = false; 19638c2ecf20Sopenharmony_ci break; 19648c2ecf20Sopenharmony_ci } 19658c2ecf20Sopenharmony_ci } 19668c2ecf20Sopenharmony_ci spin_unlock(&dentry->d_lock); 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci if (all_negative) 19698c2ecf20Sopenharmony_ci shrink_dcache_parent(dentry); 19708c2ecf20Sopenharmony_ciout: 19718c2ecf20Sopenharmony_ci return all_negative; 19728c2ecf20Sopenharmony_ci} 19738c2ecf20Sopenharmony_ci 19748c2ecf20Sopenharmony_ci/* 19758c2ecf20Sopenharmony_ci * Trim old(er) caps. 19768c2ecf20Sopenharmony_ci * 19778c2ecf20Sopenharmony_ci * Because we can't cache an inode without one or more caps, we do 19788c2ecf20Sopenharmony_ci * this indirectly: if a cap is unused, we prune its aliases, at which 19798c2ecf20Sopenharmony_ci * point the inode will hopefully get dropped to. 19808c2ecf20Sopenharmony_ci * 19818c2ecf20Sopenharmony_ci * Yes, this is a bit sloppy. Our only real goal here is to respond to 19828c2ecf20Sopenharmony_ci * memory pressure from the MDS, though, so it needn't be perfect. 19838c2ecf20Sopenharmony_ci */ 19848c2ecf20Sopenharmony_cistatic int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg) 19858c2ecf20Sopenharmony_ci{ 19868c2ecf20Sopenharmony_ci int *remaining = arg; 19878c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(inode); 19888c2ecf20Sopenharmony_ci int used, wanted, oissued, mine; 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci if (*remaining <= 0) 19918c2ecf20Sopenharmony_ci return -1; 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 19948c2ecf20Sopenharmony_ci mine = cap->issued | cap->implemented; 19958c2ecf20Sopenharmony_ci used = __ceph_caps_used(ci); 19968c2ecf20Sopenharmony_ci wanted = __ceph_caps_file_wanted(ci); 19978c2ecf20Sopenharmony_ci oissued = __ceph_caps_issued_other(ci, cap); 19988c2ecf20Sopenharmony_ci 19998c2ecf20Sopenharmony_ci dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n", 20008c2ecf20Sopenharmony_ci inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued), 20018c2ecf20Sopenharmony_ci ceph_cap_string(used), ceph_cap_string(wanted)); 20028c2ecf20Sopenharmony_ci if (cap == ci->i_auth_cap) { 20038c2ecf20Sopenharmony_ci if (ci->i_dirty_caps || ci->i_flushing_caps || 20048c2ecf20Sopenharmony_ci !list_empty(&ci->i_cap_snaps)) 20058c2ecf20Sopenharmony_ci goto out; 20068c2ecf20Sopenharmony_ci if ((used | wanted) & CEPH_CAP_ANY_WR) 20078c2ecf20Sopenharmony_ci goto out; 20088c2ecf20Sopenharmony_ci /* Note: it's possible that i_filelock_ref becomes non-zero 20098c2ecf20Sopenharmony_ci * after dropping auth caps. It doesn't hurt because reply 20108c2ecf20Sopenharmony_ci * of lock mds request will re-add auth caps. */ 20118c2ecf20Sopenharmony_ci if (atomic_read(&ci->i_filelock_ref) > 0) 20128c2ecf20Sopenharmony_ci goto out; 20138c2ecf20Sopenharmony_ci } 20148c2ecf20Sopenharmony_ci /* The inode has cached pages, but it's no longer used. 20158c2ecf20Sopenharmony_ci * we can safely drop it */ 20168c2ecf20Sopenharmony_ci if (S_ISREG(inode->i_mode) && 20178c2ecf20Sopenharmony_ci wanted == 0 && used == CEPH_CAP_FILE_CACHE && 20188c2ecf20Sopenharmony_ci !(oissued & CEPH_CAP_FILE_CACHE)) { 20198c2ecf20Sopenharmony_ci used = 0; 20208c2ecf20Sopenharmony_ci oissued = 0; 20218c2ecf20Sopenharmony_ci } 20228c2ecf20Sopenharmony_ci if ((used | wanted) & ~oissued & mine) 20238c2ecf20Sopenharmony_ci goto out; /* we need these caps */ 20248c2ecf20Sopenharmony_ci 20258c2ecf20Sopenharmony_ci if (oissued) { 20268c2ecf20Sopenharmony_ci /* we aren't the only cap.. just remove us */ 20278c2ecf20Sopenharmony_ci __ceph_remove_cap(cap, true); 20288c2ecf20Sopenharmony_ci (*remaining)--; 20298c2ecf20Sopenharmony_ci } else { 20308c2ecf20Sopenharmony_ci struct dentry *dentry; 20318c2ecf20Sopenharmony_ci /* try dropping referring dentries */ 20328c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 20338c2ecf20Sopenharmony_ci dentry = d_find_any_alias(inode); 20348c2ecf20Sopenharmony_ci if (dentry && drop_negative_children(dentry)) { 20358c2ecf20Sopenharmony_ci int count; 20368c2ecf20Sopenharmony_ci dput(dentry); 20378c2ecf20Sopenharmony_ci d_prune_aliases(inode); 20388c2ecf20Sopenharmony_ci count = atomic_read(&inode->i_count); 20398c2ecf20Sopenharmony_ci if (count == 1) 20408c2ecf20Sopenharmony_ci (*remaining)--; 20418c2ecf20Sopenharmony_ci dout("trim_caps_cb %p cap %p pruned, count now %d\n", 20428c2ecf20Sopenharmony_ci inode, cap, count); 20438c2ecf20Sopenharmony_ci } else { 20448c2ecf20Sopenharmony_ci dput(dentry); 20458c2ecf20Sopenharmony_ci } 20468c2ecf20Sopenharmony_ci return 0; 20478c2ecf20Sopenharmony_ci } 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ciout: 20508c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 20518c2ecf20Sopenharmony_ci return 0; 20528c2ecf20Sopenharmony_ci} 20538c2ecf20Sopenharmony_ci 20548c2ecf20Sopenharmony_ci/* 20558c2ecf20Sopenharmony_ci * Trim session cap count down to some max number. 20568c2ecf20Sopenharmony_ci */ 20578c2ecf20Sopenharmony_ciint ceph_trim_caps(struct ceph_mds_client *mdsc, 20588c2ecf20Sopenharmony_ci struct ceph_mds_session *session, 20598c2ecf20Sopenharmony_ci int max_caps) 20608c2ecf20Sopenharmony_ci{ 20618c2ecf20Sopenharmony_ci int trim_caps = session->s_nr_caps - max_caps; 20628c2ecf20Sopenharmony_ci 20638c2ecf20Sopenharmony_ci dout("trim_caps mds%d start: %d / %d, trim %d\n", 20648c2ecf20Sopenharmony_ci session->s_mds, session->s_nr_caps, max_caps, trim_caps); 20658c2ecf20Sopenharmony_ci if (trim_caps > 0) { 20668c2ecf20Sopenharmony_ci int remaining = trim_caps; 20678c2ecf20Sopenharmony_ci 20688c2ecf20Sopenharmony_ci ceph_iterate_session_caps(session, trim_caps_cb, &remaining); 20698c2ecf20Sopenharmony_ci dout("trim_caps mds%d done: %d / %d, trimmed %d\n", 20708c2ecf20Sopenharmony_ci session->s_mds, session->s_nr_caps, max_caps, 20718c2ecf20Sopenharmony_ci trim_caps - remaining); 20728c2ecf20Sopenharmony_ci } 20738c2ecf20Sopenharmony_ci 20748c2ecf20Sopenharmony_ci ceph_flush_cap_releases(mdsc, session); 20758c2ecf20Sopenharmony_ci return 0; 20768c2ecf20Sopenharmony_ci} 20778c2ecf20Sopenharmony_ci 20788c2ecf20Sopenharmony_cistatic int check_caps_flush(struct ceph_mds_client *mdsc, 20798c2ecf20Sopenharmony_ci u64 want_flush_tid) 20808c2ecf20Sopenharmony_ci{ 20818c2ecf20Sopenharmony_ci int ret = 1; 20828c2ecf20Sopenharmony_ci 20838c2ecf20Sopenharmony_ci spin_lock(&mdsc->cap_dirty_lock); 20848c2ecf20Sopenharmony_ci if (!list_empty(&mdsc->cap_flush_list)) { 20858c2ecf20Sopenharmony_ci struct ceph_cap_flush *cf = 20868c2ecf20Sopenharmony_ci list_first_entry(&mdsc->cap_flush_list, 20878c2ecf20Sopenharmony_ci struct ceph_cap_flush, g_list); 20888c2ecf20Sopenharmony_ci if (cf->tid <= want_flush_tid) { 20898c2ecf20Sopenharmony_ci dout("check_caps_flush still flushing tid " 20908c2ecf20Sopenharmony_ci "%llu <= %llu\n", cf->tid, want_flush_tid); 20918c2ecf20Sopenharmony_ci ret = 0; 20928c2ecf20Sopenharmony_ci } 20938c2ecf20Sopenharmony_ci } 20948c2ecf20Sopenharmony_ci spin_unlock(&mdsc->cap_dirty_lock); 20958c2ecf20Sopenharmony_ci return ret; 20968c2ecf20Sopenharmony_ci} 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci/* 20998c2ecf20Sopenharmony_ci * flush all dirty inode data to disk. 21008c2ecf20Sopenharmony_ci * 21018c2ecf20Sopenharmony_ci * returns true if we've flushed through want_flush_tid 21028c2ecf20Sopenharmony_ci */ 21038c2ecf20Sopenharmony_cistatic void wait_caps_flush(struct ceph_mds_client *mdsc, 21048c2ecf20Sopenharmony_ci u64 want_flush_tid) 21058c2ecf20Sopenharmony_ci{ 21068c2ecf20Sopenharmony_ci dout("check_caps_flush want %llu\n", want_flush_tid); 21078c2ecf20Sopenharmony_ci 21088c2ecf20Sopenharmony_ci wait_event(mdsc->cap_flushing_wq, 21098c2ecf20Sopenharmony_ci check_caps_flush(mdsc, want_flush_tid)); 21108c2ecf20Sopenharmony_ci 21118c2ecf20Sopenharmony_ci dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid); 21128c2ecf20Sopenharmony_ci} 21138c2ecf20Sopenharmony_ci 21148c2ecf20Sopenharmony_ci/* 21158c2ecf20Sopenharmony_ci * called under s_mutex 21168c2ecf20Sopenharmony_ci */ 21178c2ecf20Sopenharmony_cistatic void ceph_send_cap_releases(struct ceph_mds_client *mdsc, 21188c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 21198c2ecf20Sopenharmony_ci{ 21208c2ecf20Sopenharmony_ci struct ceph_msg *msg = NULL; 21218c2ecf20Sopenharmony_ci struct ceph_mds_cap_release *head; 21228c2ecf20Sopenharmony_ci struct ceph_mds_cap_item *item; 21238c2ecf20Sopenharmony_ci struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc; 21248c2ecf20Sopenharmony_ci struct ceph_cap *cap; 21258c2ecf20Sopenharmony_ci LIST_HEAD(tmp_list); 21268c2ecf20Sopenharmony_ci int num_cap_releases; 21278c2ecf20Sopenharmony_ci __le32 barrier, *cap_barrier; 21288c2ecf20Sopenharmony_ci 21298c2ecf20Sopenharmony_ci down_read(&osdc->lock); 21308c2ecf20Sopenharmony_ci barrier = cpu_to_le32(osdc->epoch_barrier); 21318c2ecf20Sopenharmony_ci up_read(&osdc->lock); 21328c2ecf20Sopenharmony_ci 21338c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 21348c2ecf20Sopenharmony_ciagain: 21358c2ecf20Sopenharmony_ci list_splice_init(&session->s_cap_releases, &tmp_list); 21368c2ecf20Sopenharmony_ci num_cap_releases = session->s_num_cap_releases; 21378c2ecf20Sopenharmony_ci session->s_num_cap_releases = 0; 21388c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 21398c2ecf20Sopenharmony_ci 21408c2ecf20Sopenharmony_ci while (!list_empty(&tmp_list)) { 21418c2ecf20Sopenharmony_ci if (!msg) { 21428c2ecf20Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, 21438c2ecf20Sopenharmony_ci PAGE_SIZE, GFP_NOFS, false); 21448c2ecf20Sopenharmony_ci if (!msg) 21458c2ecf20Sopenharmony_ci goto out_err; 21468c2ecf20Sopenharmony_ci head = msg->front.iov_base; 21478c2ecf20Sopenharmony_ci head->num = cpu_to_le32(0); 21488c2ecf20Sopenharmony_ci msg->front.iov_len = sizeof(*head); 21498c2ecf20Sopenharmony_ci 21508c2ecf20Sopenharmony_ci msg->hdr.version = cpu_to_le16(2); 21518c2ecf20Sopenharmony_ci msg->hdr.compat_version = cpu_to_le16(1); 21528c2ecf20Sopenharmony_ci } 21538c2ecf20Sopenharmony_ci 21548c2ecf20Sopenharmony_ci cap = list_first_entry(&tmp_list, struct ceph_cap, 21558c2ecf20Sopenharmony_ci session_caps); 21568c2ecf20Sopenharmony_ci list_del(&cap->session_caps); 21578c2ecf20Sopenharmony_ci num_cap_releases--; 21588c2ecf20Sopenharmony_ci 21598c2ecf20Sopenharmony_ci head = msg->front.iov_base; 21608c2ecf20Sopenharmony_ci put_unaligned_le32(get_unaligned_le32(&head->num) + 1, 21618c2ecf20Sopenharmony_ci &head->num); 21628c2ecf20Sopenharmony_ci item = msg->front.iov_base + msg->front.iov_len; 21638c2ecf20Sopenharmony_ci item->ino = cpu_to_le64(cap->cap_ino); 21648c2ecf20Sopenharmony_ci item->cap_id = cpu_to_le64(cap->cap_id); 21658c2ecf20Sopenharmony_ci item->migrate_seq = cpu_to_le32(cap->mseq); 21668c2ecf20Sopenharmony_ci item->seq = cpu_to_le32(cap->issue_seq); 21678c2ecf20Sopenharmony_ci msg->front.iov_len += sizeof(*item); 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci ceph_put_cap(mdsc, cap); 21708c2ecf20Sopenharmony_ci 21718c2ecf20Sopenharmony_ci if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { 21728c2ecf20Sopenharmony_ci // Append cap_barrier field 21738c2ecf20Sopenharmony_ci cap_barrier = msg->front.iov_base + msg->front.iov_len; 21748c2ecf20Sopenharmony_ci *cap_barrier = barrier; 21758c2ecf20Sopenharmony_ci msg->front.iov_len += sizeof(*cap_barrier); 21768c2ecf20Sopenharmony_ci 21778c2ecf20Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 21788c2ecf20Sopenharmony_ci dout("send_cap_releases mds%d %p\n", session->s_mds, msg); 21798c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 21808c2ecf20Sopenharmony_ci msg = NULL; 21818c2ecf20Sopenharmony_ci } 21828c2ecf20Sopenharmony_ci } 21838c2ecf20Sopenharmony_ci 21848c2ecf20Sopenharmony_ci BUG_ON(num_cap_releases != 0); 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 21878c2ecf20Sopenharmony_ci if (!list_empty(&session->s_cap_releases)) 21888c2ecf20Sopenharmony_ci goto again; 21898c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 21908c2ecf20Sopenharmony_ci 21918c2ecf20Sopenharmony_ci if (msg) { 21928c2ecf20Sopenharmony_ci // Append cap_barrier field 21938c2ecf20Sopenharmony_ci cap_barrier = msg->front.iov_base + msg->front.iov_len; 21948c2ecf20Sopenharmony_ci *cap_barrier = barrier; 21958c2ecf20Sopenharmony_ci msg->front.iov_len += sizeof(*cap_barrier); 21968c2ecf20Sopenharmony_ci 21978c2ecf20Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 21988c2ecf20Sopenharmony_ci dout("send_cap_releases mds%d %p\n", session->s_mds, msg); 21998c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 22008c2ecf20Sopenharmony_ci } 22018c2ecf20Sopenharmony_ci return; 22028c2ecf20Sopenharmony_ciout_err: 22038c2ecf20Sopenharmony_ci pr_err("send_cap_releases mds%d, failed to allocate message\n", 22048c2ecf20Sopenharmony_ci session->s_mds); 22058c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 22068c2ecf20Sopenharmony_ci list_splice(&tmp_list, &session->s_cap_releases); 22078c2ecf20Sopenharmony_ci session->s_num_cap_releases += num_cap_releases; 22088c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 22098c2ecf20Sopenharmony_ci} 22108c2ecf20Sopenharmony_ci 22118c2ecf20Sopenharmony_cistatic void ceph_cap_release_work(struct work_struct *work) 22128c2ecf20Sopenharmony_ci{ 22138c2ecf20Sopenharmony_ci struct ceph_mds_session *session = 22148c2ecf20Sopenharmony_ci container_of(work, struct ceph_mds_session, s_cap_release_work); 22158c2ecf20Sopenharmony_ci 22168c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 22178c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_OPEN || 22188c2ecf20Sopenharmony_ci session->s_state == CEPH_MDS_SESSION_HUNG) 22198c2ecf20Sopenharmony_ci ceph_send_cap_releases(session->s_mdsc, session); 22208c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 22218c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 22228c2ecf20Sopenharmony_ci} 22238c2ecf20Sopenharmony_ci 22248c2ecf20Sopenharmony_civoid ceph_flush_cap_releases(struct ceph_mds_client *mdsc, 22258c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 22268c2ecf20Sopenharmony_ci{ 22278c2ecf20Sopenharmony_ci if (mdsc->stopping) 22288c2ecf20Sopenharmony_ci return; 22298c2ecf20Sopenharmony_ci 22308c2ecf20Sopenharmony_ci ceph_get_mds_session(session); 22318c2ecf20Sopenharmony_ci if (queue_work(mdsc->fsc->cap_wq, 22328c2ecf20Sopenharmony_ci &session->s_cap_release_work)) { 22338c2ecf20Sopenharmony_ci dout("cap release work queued\n"); 22348c2ecf20Sopenharmony_ci } else { 22358c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 22368c2ecf20Sopenharmony_ci dout("failed to queue cap release work\n"); 22378c2ecf20Sopenharmony_ci } 22388c2ecf20Sopenharmony_ci} 22398c2ecf20Sopenharmony_ci 22408c2ecf20Sopenharmony_ci/* 22418c2ecf20Sopenharmony_ci * caller holds session->s_cap_lock 22428c2ecf20Sopenharmony_ci */ 22438c2ecf20Sopenharmony_civoid __ceph_queue_cap_release(struct ceph_mds_session *session, 22448c2ecf20Sopenharmony_ci struct ceph_cap *cap) 22458c2ecf20Sopenharmony_ci{ 22468c2ecf20Sopenharmony_ci list_add_tail(&cap->session_caps, &session->s_cap_releases); 22478c2ecf20Sopenharmony_ci session->s_num_cap_releases++; 22488c2ecf20Sopenharmony_ci 22498c2ecf20Sopenharmony_ci if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE)) 22508c2ecf20Sopenharmony_ci ceph_flush_cap_releases(session->s_mdsc, session); 22518c2ecf20Sopenharmony_ci} 22528c2ecf20Sopenharmony_ci 22538c2ecf20Sopenharmony_cistatic void ceph_cap_reclaim_work(struct work_struct *work) 22548c2ecf20Sopenharmony_ci{ 22558c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = 22568c2ecf20Sopenharmony_ci container_of(work, struct ceph_mds_client, cap_reclaim_work); 22578c2ecf20Sopenharmony_ci int ret = ceph_trim_dentries(mdsc); 22588c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 22598c2ecf20Sopenharmony_ci ceph_queue_cap_reclaim_work(mdsc); 22608c2ecf20Sopenharmony_ci} 22618c2ecf20Sopenharmony_ci 22628c2ecf20Sopenharmony_civoid ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc) 22638c2ecf20Sopenharmony_ci{ 22648c2ecf20Sopenharmony_ci if (mdsc->stopping) 22658c2ecf20Sopenharmony_ci return; 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_ci if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) { 22688c2ecf20Sopenharmony_ci dout("caps reclaim work queued\n"); 22698c2ecf20Sopenharmony_ci } else { 22708c2ecf20Sopenharmony_ci dout("failed to queue caps release work\n"); 22718c2ecf20Sopenharmony_ci } 22728c2ecf20Sopenharmony_ci} 22738c2ecf20Sopenharmony_ci 22748c2ecf20Sopenharmony_civoid ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr) 22758c2ecf20Sopenharmony_ci{ 22768c2ecf20Sopenharmony_ci int val; 22778c2ecf20Sopenharmony_ci if (!nr) 22788c2ecf20Sopenharmony_ci return; 22798c2ecf20Sopenharmony_ci val = atomic_add_return(nr, &mdsc->cap_reclaim_pending); 22808c2ecf20Sopenharmony_ci if ((val % CEPH_CAPS_PER_RELEASE) < nr) { 22818c2ecf20Sopenharmony_ci atomic_set(&mdsc->cap_reclaim_pending, 0); 22828c2ecf20Sopenharmony_ci ceph_queue_cap_reclaim_work(mdsc); 22838c2ecf20Sopenharmony_ci } 22848c2ecf20Sopenharmony_ci} 22858c2ecf20Sopenharmony_ci 22868c2ecf20Sopenharmony_ci/* 22878c2ecf20Sopenharmony_ci * requests 22888c2ecf20Sopenharmony_ci */ 22898c2ecf20Sopenharmony_ci 22908c2ecf20Sopenharmony_ciint ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, 22918c2ecf20Sopenharmony_ci struct inode *dir) 22928c2ecf20Sopenharmony_ci{ 22938c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(dir); 22948c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; 22958c2ecf20Sopenharmony_ci struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; 22968c2ecf20Sopenharmony_ci size_t size = sizeof(struct ceph_mds_reply_dir_entry); 22978c2ecf20Sopenharmony_ci unsigned int num_entries; 22988c2ecf20Sopenharmony_ci int order; 22998c2ecf20Sopenharmony_ci 23008c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 23018c2ecf20Sopenharmony_ci num_entries = ci->i_files + ci->i_subdirs; 23028c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 23038c2ecf20Sopenharmony_ci num_entries = max(num_entries, 1U); 23048c2ecf20Sopenharmony_ci num_entries = min(num_entries, opt->max_readdir); 23058c2ecf20Sopenharmony_ci 23068c2ecf20Sopenharmony_ci order = get_order(size * num_entries); 23078c2ecf20Sopenharmony_ci while (order >= 0) { 23088c2ecf20Sopenharmony_ci rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL | 23098c2ecf20Sopenharmony_ci __GFP_NOWARN, 23108c2ecf20Sopenharmony_ci order); 23118c2ecf20Sopenharmony_ci if (rinfo->dir_entries) 23128c2ecf20Sopenharmony_ci break; 23138c2ecf20Sopenharmony_ci order--; 23148c2ecf20Sopenharmony_ci } 23158c2ecf20Sopenharmony_ci if (!rinfo->dir_entries) 23168c2ecf20Sopenharmony_ci return -ENOMEM; 23178c2ecf20Sopenharmony_ci 23188c2ecf20Sopenharmony_ci num_entries = (PAGE_SIZE << order) / size; 23198c2ecf20Sopenharmony_ci num_entries = min(num_entries, opt->max_readdir); 23208c2ecf20Sopenharmony_ci 23218c2ecf20Sopenharmony_ci rinfo->dir_buf_size = PAGE_SIZE << order; 23228c2ecf20Sopenharmony_ci req->r_num_caps = num_entries + 1; 23238c2ecf20Sopenharmony_ci req->r_args.readdir.max_entries = cpu_to_le32(num_entries); 23248c2ecf20Sopenharmony_ci req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes); 23258c2ecf20Sopenharmony_ci return 0; 23268c2ecf20Sopenharmony_ci} 23278c2ecf20Sopenharmony_ci 23288c2ecf20Sopenharmony_ci/* 23298c2ecf20Sopenharmony_ci * Create an mds request. 23308c2ecf20Sopenharmony_ci */ 23318c2ecf20Sopenharmony_cistruct ceph_mds_request * 23328c2ecf20Sopenharmony_ciceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) 23338c2ecf20Sopenharmony_ci{ 23348c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 23358c2ecf20Sopenharmony_ci 23368c2ecf20Sopenharmony_ci req = kmem_cache_zalloc(ceph_mds_request_cachep, GFP_NOFS); 23378c2ecf20Sopenharmony_ci if (!req) 23388c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 23398c2ecf20Sopenharmony_ci 23408c2ecf20Sopenharmony_ci mutex_init(&req->r_fill_mutex); 23418c2ecf20Sopenharmony_ci req->r_mdsc = mdsc; 23428c2ecf20Sopenharmony_ci req->r_started = jiffies; 23438c2ecf20Sopenharmony_ci req->r_start_latency = ktime_get(); 23448c2ecf20Sopenharmony_ci req->r_resend_mds = -1; 23458c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&req->r_unsafe_dir_item); 23468c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&req->r_unsafe_target_item); 23478c2ecf20Sopenharmony_ci req->r_fmode = -1; 23488c2ecf20Sopenharmony_ci kref_init(&req->r_kref); 23498c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&req->r_node); 23508c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&req->r_wait); 23518c2ecf20Sopenharmony_ci init_completion(&req->r_completion); 23528c2ecf20Sopenharmony_ci init_completion(&req->r_safe_completion); 23538c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&req->r_unsafe_item); 23548c2ecf20Sopenharmony_ci 23558c2ecf20Sopenharmony_ci ktime_get_coarse_real_ts64(&req->r_stamp); 23568c2ecf20Sopenharmony_ci 23578c2ecf20Sopenharmony_ci req->r_op = op; 23588c2ecf20Sopenharmony_ci req->r_direct_mode = mode; 23598c2ecf20Sopenharmony_ci return req; 23608c2ecf20Sopenharmony_ci} 23618c2ecf20Sopenharmony_ci 23628c2ecf20Sopenharmony_ci/* 23638c2ecf20Sopenharmony_ci * return oldest (lowest) request, tid in request tree, 0 if none. 23648c2ecf20Sopenharmony_ci * 23658c2ecf20Sopenharmony_ci * called under mdsc->mutex. 23668c2ecf20Sopenharmony_ci */ 23678c2ecf20Sopenharmony_cistatic struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc) 23688c2ecf20Sopenharmony_ci{ 23698c2ecf20Sopenharmony_ci if (RB_EMPTY_ROOT(&mdsc->request_tree)) 23708c2ecf20Sopenharmony_ci return NULL; 23718c2ecf20Sopenharmony_ci return rb_entry(rb_first(&mdsc->request_tree), 23728c2ecf20Sopenharmony_ci struct ceph_mds_request, r_node); 23738c2ecf20Sopenharmony_ci} 23748c2ecf20Sopenharmony_ci 23758c2ecf20Sopenharmony_cistatic inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) 23768c2ecf20Sopenharmony_ci{ 23778c2ecf20Sopenharmony_ci return mdsc->oldest_tid; 23788c2ecf20Sopenharmony_ci} 23798c2ecf20Sopenharmony_ci 23808c2ecf20Sopenharmony_ci/* 23818c2ecf20Sopenharmony_ci * Build a dentry's path. Allocate on heap; caller must kfree. Based 23828c2ecf20Sopenharmony_ci * on build_path_from_dentry in fs/cifs/dir.c. 23838c2ecf20Sopenharmony_ci * 23848c2ecf20Sopenharmony_ci * If @stop_on_nosnap, generate path relative to the first non-snapped 23858c2ecf20Sopenharmony_ci * inode. 23868c2ecf20Sopenharmony_ci * 23878c2ecf20Sopenharmony_ci * Encode hidden .snap dirs as a double /, i.e. 23888c2ecf20Sopenharmony_ci * foo/.snap/bar -> foo//bar 23898c2ecf20Sopenharmony_ci */ 23908c2ecf20Sopenharmony_cichar *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, 23918c2ecf20Sopenharmony_ci int stop_on_nosnap) 23928c2ecf20Sopenharmony_ci{ 23938c2ecf20Sopenharmony_ci struct dentry *temp; 23948c2ecf20Sopenharmony_ci char *path; 23958c2ecf20Sopenharmony_ci int pos; 23968c2ecf20Sopenharmony_ci unsigned seq; 23978c2ecf20Sopenharmony_ci u64 base; 23988c2ecf20Sopenharmony_ci 23998c2ecf20Sopenharmony_ci if (!dentry) 24008c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 24018c2ecf20Sopenharmony_ci 24028c2ecf20Sopenharmony_ci path = __getname(); 24038c2ecf20Sopenharmony_ci if (!path) 24048c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 24058c2ecf20Sopenharmony_ciretry: 24068c2ecf20Sopenharmony_ci pos = PATH_MAX - 1; 24078c2ecf20Sopenharmony_ci path[pos] = '\0'; 24088c2ecf20Sopenharmony_ci 24098c2ecf20Sopenharmony_ci seq = read_seqbegin(&rename_lock); 24108c2ecf20Sopenharmony_ci rcu_read_lock(); 24118c2ecf20Sopenharmony_ci temp = dentry; 24128c2ecf20Sopenharmony_ci for (;;) { 24138c2ecf20Sopenharmony_ci struct inode *inode; 24148c2ecf20Sopenharmony_ci 24158c2ecf20Sopenharmony_ci spin_lock(&temp->d_lock); 24168c2ecf20Sopenharmony_ci inode = d_inode(temp); 24178c2ecf20Sopenharmony_ci if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { 24188c2ecf20Sopenharmony_ci dout("build_path path+%d: %p SNAPDIR\n", 24198c2ecf20Sopenharmony_ci pos, temp); 24208c2ecf20Sopenharmony_ci } else if (stop_on_nosnap && inode && dentry != temp && 24218c2ecf20Sopenharmony_ci ceph_snap(inode) == CEPH_NOSNAP) { 24228c2ecf20Sopenharmony_ci spin_unlock(&temp->d_lock); 24238c2ecf20Sopenharmony_ci pos++; /* get rid of any prepended '/' */ 24248c2ecf20Sopenharmony_ci break; 24258c2ecf20Sopenharmony_ci } else { 24268c2ecf20Sopenharmony_ci pos -= temp->d_name.len; 24278c2ecf20Sopenharmony_ci if (pos < 0) { 24288c2ecf20Sopenharmony_ci spin_unlock(&temp->d_lock); 24298c2ecf20Sopenharmony_ci break; 24308c2ecf20Sopenharmony_ci } 24318c2ecf20Sopenharmony_ci memcpy(path + pos, temp->d_name.name, temp->d_name.len); 24328c2ecf20Sopenharmony_ci } 24338c2ecf20Sopenharmony_ci spin_unlock(&temp->d_lock); 24348c2ecf20Sopenharmony_ci temp = READ_ONCE(temp->d_parent); 24358c2ecf20Sopenharmony_ci 24368c2ecf20Sopenharmony_ci /* Are we at the root? */ 24378c2ecf20Sopenharmony_ci if (IS_ROOT(temp)) 24388c2ecf20Sopenharmony_ci break; 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci /* Are we out of buffer? */ 24418c2ecf20Sopenharmony_ci if (--pos < 0) 24428c2ecf20Sopenharmony_ci break; 24438c2ecf20Sopenharmony_ci 24448c2ecf20Sopenharmony_ci path[pos] = '/'; 24458c2ecf20Sopenharmony_ci } 24468c2ecf20Sopenharmony_ci base = ceph_ino(d_inode(temp)); 24478c2ecf20Sopenharmony_ci rcu_read_unlock(); 24488c2ecf20Sopenharmony_ci 24498c2ecf20Sopenharmony_ci if (read_seqretry(&rename_lock, seq)) 24508c2ecf20Sopenharmony_ci goto retry; 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ci if (pos < 0) { 24538c2ecf20Sopenharmony_ci /* 24548c2ecf20Sopenharmony_ci * A rename didn't occur, but somehow we didn't end up where 24558c2ecf20Sopenharmony_ci * we thought we would. Throw a warning and try again. 24568c2ecf20Sopenharmony_ci */ 24578c2ecf20Sopenharmony_ci pr_warn("build_path did not end path lookup where " 24588c2ecf20Sopenharmony_ci "expected, pos is %d\n", pos); 24598c2ecf20Sopenharmony_ci goto retry; 24608c2ecf20Sopenharmony_ci } 24618c2ecf20Sopenharmony_ci 24628c2ecf20Sopenharmony_ci *pbase = base; 24638c2ecf20Sopenharmony_ci *plen = PATH_MAX - 1 - pos; 24648c2ecf20Sopenharmony_ci dout("build_path on %p %d built %llx '%.*s'\n", 24658c2ecf20Sopenharmony_ci dentry, d_count(dentry), base, *plen, path + pos); 24668c2ecf20Sopenharmony_ci return path + pos; 24678c2ecf20Sopenharmony_ci} 24688c2ecf20Sopenharmony_ci 24698c2ecf20Sopenharmony_cistatic int build_dentry_path(struct dentry *dentry, struct inode *dir, 24708c2ecf20Sopenharmony_ci const char **ppath, int *ppathlen, u64 *pino, 24718c2ecf20Sopenharmony_ci bool *pfreepath, bool parent_locked) 24728c2ecf20Sopenharmony_ci{ 24738c2ecf20Sopenharmony_ci char *path; 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_ci rcu_read_lock(); 24768c2ecf20Sopenharmony_ci if (!dir) 24778c2ecf20Sopenharmony_ci dir = d_inode_rcu(dentry->d_parent); 24788c2ecf20Sopenharmony_ci if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) { 24798c2ecf20Sopenharmony_ci *pino = ceph_ino(dir); 24808c2ecf20Sopenharmony_ci rcu_read_unlock(); 24818c2ecf20Sopenharmony_ci *ppath = dentry->d_name.name; 24828c2ecf20Sopenharmony_ci *ppathlen = dentry->d_name.len; 24838c2ecf20Sopenharmony_ci return 0; 24848c2ecf20Sopenharmony_ci } 24858c2ecf20Sopenharmony_ci rcu_read_unlock(); 24868c2ecf20Sopenharmony_ci path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); 24878c2ecf20Sopenharmony_ci if (IS_ERR(path)) 24888c2ecf20Sopenharmony_ci return PTR_ERR(path); 24898c2ecf20Sopenharmony_ci *ppath = path; 24908c2ecf20Sopenharmony_ci *pfreepath = true; 24918c2ecf20Sopenharmony_ci return 0; 24928c2ecf20Sopenharmony_ci} 24938c2ecf20Sopenharmony_ci 24948c2ecf20Sopenharmony_cistatic int build_inode_path(struct inode *inode, 24958c2ecf20Sopenharmony_ci const char **ppath, int *ppathlen, u64 *pino, 24968c2ecf20Sopenharmony_ci bool *pfreepath) 24978c2ecf20Sopenharmony_ci{ 24988c2ecf20Sopenharmony_ci struct dentry *dentry; 24998c2ecf20Sopenharmony_ci char *path; 25008c2ecf20Sopenharmony_ci 25018c2ecf20Sopenharmony_ci if (ceph_snap(inode) == CEPH_NOSNAP) { 25028c2ecf20Sopenharmony_ci *pino = ceph_ino(inode); 25038c2ecf20Sopenharmony_ci *ppathlen = 0; 25048c2ecf20Sopenharmony_ci return 0; 25058c2ecf20Sopenharmony_ci } 25068c2ecf20Sopenharmony_ci dentry = d_find_alias(inode); 25078c2ecf20Sopenharmony_ci path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); 25088c2ecf20Sopenharmony_ci dput(dentry); 25098c2ecf20Sopenharmony_ci if (IS_ERR(path)) 25108c2ecf20Sopenharmony_ci return PTR_ERR(path); 25118c2ecf20Sopenharmony_ci *ppath = path; 25128c2ecf20Sopenharmony_ci *pfreepath = true; 25138c2ecf20Sopenharmony_ci return 0; 25148c2ecf20Sopenharmony_ci} 25158c2ecf20Sopenharmony_ci 25168c2ecf20Sopenharmony_ci/* 25178c2ecf20Sopenharmony_ci * request arguments may be specified via an inode *, a dentry *, or 25188c2ecf20Sopenharmony_ci * an explicit ino+path. 25198c2ecf20Sopenharmony_ci */ 25208c2ecf20Sopenharmony_cistatic int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, 25218c2ecf20Sopenharmony_ci struct inode *rdiri, const char *rpath, 25228c2ecf20Sopenharmony_ci u64 rino, const char **ppath, int *pathlen, 25238c2ecf20Sopenharmony_ci u64 *ino, bool *freepath, bool parent_locked) 25248c2ecf20Sopenharmony_ci{ 25258c2ecf20Sopenharmony_ci int r = 0; 25268c2ecf20Sopenharmony_ci 25278c2ecf20Sopenharmony_ci if (rinode) { 25288c2ecf20Sopenharmony_ci r = build_inode_path(rinode, ppath, pathlen, ino, freepath); 25298c2ecf20Sopenharmony_ci dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), 25308c2ecf20Sopenharmony_ci ceph_snap(rinode)); 25318c2ecf20Sopenharmony_ci } else if (rdentry) { 25328c2ecf20Sopenharmony_ci r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, 25338c2ecf20Sopenharmony_ci freepath, parent_locked); 25348c2ecf20Sopenharmony_ci dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, 25358c2ecf20Sopenharmony_ci *ppath); 25368c2ecf20Sopenharmony_ci } else if (rpath || rino) { 25378c2ecf20Sopenharmony_ci *ino = rino; 25388c2ecf20Sopenharmony_ci *ppath = rpath; 25398c2ecf20Sopenharmony_ci *pathlen = rpath ? strlen(rpath) : 0; 25408c2ecf20Sopenharmony_ci dout(" path %.*s\n", *pathlen, rpath); 25418c2ecf20Sopenharmony_ci } 25428c2ecf20Sopenharmony_ci 25438c2ecf20Sopenharmony_ci return r; 25448c2ecf20Sopenharmony_ci} 25458c2ecf20Sopenharmony_ci 25468c2ecf20Sopenharmony_ci/* 25478c2ecf20Sopenharmony_ci * called under mdsc->mutex 25488c2ecf20Sopenharmony_ci */ 25498c2ecf20Sopenharmony_cistatic struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, 25508c2ecf20Sopenharmony_ci struct ceph_mds_request *req, 25518c2ecf20Sopenharmony_ci int mds, bool drop_cap_releases) 25528c2ecf20Sopenharmony_ci{ 25538c2ecf20Sopenharmony_ci struct ceph_msg *msg; 25548c2ecf20Sopenharmony_ci struct ceph_mds_request_head *head; 25558c2ecf20Sopenharmony_ci const char *path1 = NULL; 25568c2ecf20Sopenharmony_ci const char *path2 = NULL; 25578c2ecf20Sopenharmony_ci u64 ino1 = 0, ino2 = 0; 25588c2ecf20Sopenharmony_ci int pathlen1 = 0, pathlen2 = 0; 25598c2ecf20Sopenharmony_ci bool freepath1 = false, freepath2 = false; 25608c2ecf20Sopenharmony_ci int len; 25618c2ecf20Sopenharmony_ci u16 releases; 25628c2ecf20Sopenharmony_ci void *p, *end; 25638c2ecf20Sopenharmony_ci int ret; 25648c2ecf20Sopenharmony_ci 25658c2ecf20Sopenharmony_ci ret = set_request_path_attr(req->r_inode, req->r_dentry, 25668c2ecf20Sopenharmony_ci req->r_parent, req->r_path1, req->r_ino1.ino, 25678c2ecf20Sopenharmony_ci &path1, &pathlen1, &ino1, &freepath1, 25688c2ecf20Sopenharmony_ci test_bit(CEPH_MDS_R_PARENT_LOCKED, 25698c2ecf20Sopenharmony_ci &req->r_req_flags)); 25708c2ecf20Sopenharmony_ci if (ret < 0) { 25718c2ecf20Sopenharmony_ci msg = ERR_PTR(ret); 25728c2ecf20Sopenharmony_ci goto out; 25738c2ecf20Sopenharmony_ci } 25748c2ecf20Sopenharmony_ci 25758c2ecf20Sopenharmony_ci /* If r_old_dentry is set, then assume that its parent is locked */ 25768c2ecf20Sopenharmony_ci ret = set_request_path_attr(NULL, req->r_old_dentry, 25778c2ecf20Sopenharmony_ci req->r_old_dentry_dir, 25788c2ecf20Sopenharmony_ci req->r_path2, req->r_ino2.ino, 25798c2ecf20Sopenharmony_ci &path2, &pathlen2, &ino2, &freepath2, true); 25808c2ecf20Sopenharmony_ci if (ret < 0) { 25818c2ecf20Sopenharmony_ci msg = ERR_PTR(ret); 25828c2ecf20Sopenharmony_ci goto out_free1; 25838c2ecf20Sopenharmony_ci } 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci len = sizeof(*head) + 25868c2ecf20Sopenharmony_ci pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + 25878c2ecf20Sopenharmony_ci sizeof(struct ceph_timespec); 25888c2ecf20Sopenharmony_ci 25898c2ecf20Sopenharmony_ci /* calculate (max) length for cap releases */ 25908c2ecf20Sopenharmony_ci len += sizeof(struct ceph_mds_request_release) * 25918c2ecf20Sopenharmony_ci (!!req->r_inode_drop + !!req->r_dentry_drop + 25928c2ecf20Sopenharmony_ci !!req->r_old_inode_drop + !!req->r_old_dentry_drop); 25938c2ecf20Sopenharmony_ci if (req->r_dentry_drop) 25948c2ecf20Sopenharmony_ci len += pathlen1; 25958c2ecf20Sopenharmony_ci if (req->r_old_dentry_drop) 25968c2ecf20Sopenharmony_ci len += pathlen2; 25978c2ecf20Sopenharmony_ci 25988c2ecf20Sopenharmony_ci msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); 25998c2ecf20Sopenharmony_ci if (!msg) { 26008c2ecf20Sopenharmony_ci msg = ERR_PTR(-ENOMEM); 26018c2ecf20Sopenharmony_ci goto out_free2; 26028c2ecf20Sopenharmony_ci } 26038c2ecf20Sopenharmony_ci 26048c2ecf20Sopenharmony_ci msg->hdr.version = cpu_to_le16(2); 26058c2ecf20Sopenharmony_ci msg->hdr.tid = cpu_to_le64(req->r_tid); 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci head = msg->front.iov_base; 26088c2ecf20Sopenharmony_ci p = msg->front.iov_base + sizeof(*head); 26098c2ecf20Sopenharmony_ci end = msg->front.iov_base + msg->front.iov_len; 26108c2ecf20Sopenharmony_ci 26118c2ecf20Sopenharmony_ci head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); 26128c2ecf20Sopenharmony_ci head->op = cpu_to_le32(req->r_op); 26138c2ecf20Sopenharmony_ci head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid)); 26148c2ecf20Sopenharmony_ci head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid)); 26158c2ecf20Sopenharmony_ci head->ino = cpu_to_le64(req->r_deleg_ino); 26168c2ecf20Sopenharmony_ci head->args = req->r_args; 26178c2ecf20Sopenharmony_ci 26188c2ecf20Sopenharmony_ci ceph_encode_filepath(&p, end, ino1, path1); 26198c2ecf20Sopenharmony_ci ceph_encode_filepath(&p, end, ino2, path2); 26208c2ecf20Sopenharmony_ci 26218c2ecf20Sopenharmony_ci /* make note of release offset, in case we need to replay */ 26228c2ecf20Sopenharmony_ci req->r_request_release_offset = p - msg->front.iov_base; 26238c2ecf20Sopenharmony_ci 26248c2ecf20Sopenharmony_ci /* cap releases */ 26258c2ecf20Sopenharmony_ci releases = 0; 26268c2ecf20Sopenharmony_ci if (req->r_inode_drop) 26278c2ecf20Sopenharmony_ci releases += ceph_encode_inode_release(&p, 26288c2ecf20Sopenharmony_ci req->r_inode ? req->r_inode : d_inode(req->r_dentry), 26298c2ecf20Sopenharmony_ci mds, req->r_inode_drop, req->r_inode_unless, 26308c2ecf20Sopenharmony_ci req->r_op == CEPH_MDS_OP_READDIR); 26318c2ecf20Sopenharmony_ci if (req->r_dentry_drop) 26328c2ecf20Sopenharmony_ci releases += ceph_encode_dentry_release(&p, req->r_dentry, 26338c2ecf20Sopenharmony_ci req->r_parent, mds, req->r_dentry_drop, 26348c2ecf20Sopenharmony_ci req->r_dentry_unless); 26358c2ecf20Sopenharmony_ci if (req->r_old_dentry_drop) 26368c2ecf20Sopenharmony_ci releases += ceph_encode_dentry_release(&p, req->r_old_dentry, 26378c2ecf20Sopenharmony_ci req->r_old_dentry_dir, mds, 26388c2ecf20Sopenharmony_ci req->r_old_dentry_drop, 26398c2ecf20Sopenharmony_ci req->r_old_dentry_unless); 26408c2ecf20Sopenharmony_ci if (req->r_old_inode_drop) 26418c2ecf20Sopenharmony_ci releases += ceph_encode_inode_release(&p, 26428c2ecf20Sopenharmony_ci d_inode(req->r_old_dentry), 26438c2ecf20Sopenharmony_ci mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); 26448c2ecf20Sopenharmony_ci 26458c2ecf20Sopenharmony_ci if (drop_cap_releases) { 26468c2ecf20Sopenharmony_ci releases = 0; 26478c2ecf20Sopenharmony_ci p = msg->front.iov_base + req->r_request_release_offset; 26488c2ecf20Sopenharmony_ci } 26498c2ecf20Sopenharmony_ci 26508c2ecf20Sopenharmony_ci head->num_releases = cpu_to_le16(releases); 26518c2ecf20Sopenharmony_ci 26528c2ecf20Sopenharmony_ci /* time stamp */ 26538c2ecf20Sopenharmony_ci { 26548c2ecf20Sopenharmony_ci struct ceph_timespec ts; 26558c2ecf20Sopenharmony_ci ceph_encode_timespec64(&ts, &req->r_stamp); 26568c2ecf20Sopenharmony_ci ceph_encode_copy(&p, &ts, sizeof(ts)); 26578c2ecf20Sopenharmony_ci } 26588c2ecf20Sopenharmony_ci 26598c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(p > end)) { 26608c2ecf20Sopenharmony_ci ceph_msg_put(msg); 26618c2ecf20Sopenharmony_ci msg = ERR_PTR(-ERANGE); 26628c2ecf20Sopenharmony_ci goto out_free2; 26638c2ecf20Sopenharmony_ci } 26648c2ecf20Sopenharmony_ci 26658c2ecf20Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 26668c2ecf20Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 26678c2ecf20Sopenharmony_ci 26688c2ecf20Sopenharmony_ci if (req->r_pagelist) { 26698c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist = req->r_pagelist; 26708c2ecf20Sopenharmony_ci ceph_msg_data_add_pagelist(msg, pagelist); 26718c2ecf20Sopenharmony_ci msg->hdr.data_len = cpu_to_le32(pagelist->length); 26728c2ecf20Sopenharmony_ci } else { 26738c2ecf20Sopenharmony_ci msg->hdr.data_len = 0; 26748c2ecf20Sopenharmony_ci } 26758c2ecf20Sopenharmony_ci 26768c2ecf20Sopenharmony_ci msg->hdr.data_off = cpu_to_le16(0); 26778c2ecf20Sopenharmony_ci 26788c2ecf20Sopenharmony_ciout_free2: 26798c2ecf20Sopenharmony_ci if (freepath2) 26808c2ecf20Sopenharmony_ci ceph_mdsc_free_path((char *)path2, pathlen2); 26818c2ecf20Sopenharmony_ciout_free1: 26828c2ecf20Sopenharmony_ci if (freepath1) 26838c2ecf20Sopenharmony_ci ceph_mdsc_free_path((char *)path1, pathlen1); 26848c2ecf20Sopenharmony_ciout: 26858c2ecf20Sopenharmony_ci return msg; 26868c2ecf20Sopenharmony_ci} 26878c2ecf20Sopenharmony_ci 26888c2ecf20Sopenharmony_ci/* 26898c2ecf20Sopenharmony_ci * called under mdsc->mutex if error, under no mutex if 26908c2ecf20Sopenharmony_ci * success. 26918c2ecf20Sopenharmony_ci */ 26928c2ecf20Sopenharmony_cistatic void complete_request(struct ceph_mds_client *mdsc, 26938c2ecf20Sopenharmony_ci struct ceph_mds_request *req) 26948c2ecf20Sopenharmony_ci{ 26958c2ecf20Sopenharmony_ci req->r_end_latency = ktime_get(); 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_ci if (req->r_callback) 26988c2ecf20Sopenharmony_ci req->r_callback(mdsc, req); 26998c2ecf20Sopenharmony_ci complete_all(&req->r_completion); 27008c2ecf20Sopenharmony_ci} 27018c2ecf20Sopenharmony_ci 27028c2ecf20Sopenharmony_ci/* 27038c2ecf20Sopenharmony_ci * called under mdsc->mutex 27048c2ecf20Sopenharmony_ci */ 27058c2ecf20Sopenharmony_cistatic int __prepare_send_request(struct ceph_mds_client *mdsc, 27068c2ecf20Sopenharmony_ci struct ceph_mds_request *req, 27078c2ecf20Sopenharmony_ci int mds, bool drop_cap_releases) 27088c2ecf20Sopenharmony_ci{ 27098c2ecf20Sopenharmony_ci struct ceph_mds_request_head *rhead; 27108c2ecf20Sopenharmony_ci struct ceph_msg *msg; 27118c2ecf20Sopenharmony_ci int flags = 0; 27128c2ecf20Sopenharmony_ci 27138c2ecf20Sopenharmony_ci req->r_attempts++; 27148c2ecf20Sopenharmony_ci if (req->r_inode) { 27158c2ecf20Sopenharmony_ci struct ceph_cap *cap = 27168c2ecf20Sopenharmony_ci ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds); 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci if (cap) 27198c2ecf20Sopenharmony_ci req->r_sent_on_mseq = cap->mseq; 27208c2ecf20Sopenharmony_ci else 27218c2ecf20Sopenharmony_ci req->r_sent_on_mseq = -1; 27228c2ecf20Sopenharmony_ci } 27238c2ecf20Sopenharmony_ci dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req, 27248c2ecf20Sopenharmony_ci req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts); 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 27278c2ecf20Sopenharmony_ci void *p; 27288c2ecf20Sopenharmony_ci /* 27298c2ecf20Sopenharmony_ci * Replay. Do not regenerate message (and rebuild 27308c2ecf20Sopenharmony_ci * paths, etc.); just use the original message. 27318c2ecf20Sopenharmony_ci * Rebuilding paths will break for renames because 27328c2ecf20Sopenharmony_ci * d_move mangles the src name. 27338c2ecf20Sopenharmony_ci */ 27348c2ecf20Sopenharmony_ci msg = req->r_request; 27358c2ecf20Sopenharmony_ci rhead = msg->front.iov_base; 27368c2ecf20Sopenharmony_ci 27378c2ecf20Sopenharmony_ci flags = le32_to_cpu(rhead->flags); 27388c2ecf20Sopenharmony_ci flags |= CEPH_MDS_FLAG_REPLAY; 27398c2ecf20Sopenharmony_ci rhead->flags = cpu_to_le32(flags); 27408c2ecf20Sopenharmony_ci 27418c2ecf20Sopenharmony_ci if (req->r_target_inode) 27428c2ecf20Sopenharmony_ci rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode)); 27438c2ecf20Sopenharmony_ci 27448c2ecf20Sopenharmony_ci rhead->num_retry = req->r_attempts - 1; 27458c2ecf20Sopenharmony_ci 27468c2ecf20Sopenharmony_ci /* remove cap/dentry releases from message */ 27478c2ecf20Sopenharmony_ci rhead->num_releases = 0; 27488c2ecf20Sopenharmony_ci 27498c2ecf20Sopenharmony_ci /* time stamp */ 27508c2ecf20Sopenharmony_ci p = msg->front.iov_base + req->r_request_release_offset; 27518c2ecf20Sopenharmony_ci { 27528c2ecf20Sopenharmony_ci struct ceph_timespec ts; 27538c2ecf20Sopenharmony_ci ceph_encode_timespec64(&ts, &req->r_stamp); 27548c2ecf20Sopenharmony_ci ceph_encode_copy(&p, &ts, sizeof(ts)); 27558c2ecf20Sopenharmony_ci } 27568c2ecf20Sopenharmony_ci 27578c2ecf20Sopenharmony_ci msg->front.iov_len = p - msg->front.iov_base; 27588c2ecf20Sopenharmony_ci msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); 27598c2ecf20Sopenharmony_ci return 0; 27608c2ecf20Sopenharmony_ci } 27618c2ecf20Sopenharmony_ci 27628c2ecf20Sopenharmony_ci if (req->r_request) { 27638c2ecf20Sopenharmony_ci ceph_msg_put(req->r_request); 27648c2ecf20Sopenharmony_ci req->r_request = NULL; 27658c2ecf20Sopenharmony_ci } 27668c2ecf20Sopenharmony_ci msg = create_request_message(mdsc, req, mds, drop_cap_releases); 27678c2ecf20Sopenharmony_ci if (IS_ERR(msg)) { 27688c2ecf20Sopenharmony_ci req->r_err = PTR_ERR(msg); 27698c2ecf20Sopenharmony_ci return PTR_ERR(msg); 27708c2ecf20Sopenharmony_ci } 27718c2ecf20Sopenharmony_ci req->r_request = msg; 27728c2ecf20Sopenharmony_ci 27738c2ecf20Sopenharmony_ci rhead = msg->front.iov_base; 27748c2ecf20Sopenharmony_ci rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); 27758c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) 27768c2ecf20Sopenharmony_ci flags |= CEPH_MDS_FLAG_REPLAY; 27778c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) 27788c2ecf20Sopenharmony_ci flags |= CEPH_MDS_FLAG_ASYNC; 27798c2ecf20Sopenharmony_ci if (req->r_parent) 27808c2ecf20Sopenharmony_ci flags |= CEPH_MDS_FLAG_WANT_DENTRY; 27818c2ecf20Sopenharmony_ci rhead->flags = cpu_to_le32(flags); 27828c2ecf20Sopenharmony_ci rhead->num_fwd = req->r_num_fwd; 27838c2ecf20Sopenharmony_ci rhead->num_retry = req->r_attempts - 1; 27848c2ecf20Sopenharmony_ci 27858c2ecf20Sopenharmony_ci dout(" r_parent = %p\n", req->r_parent); 27868c2ecf20Sopenharmony_ci return 0; 27878c2ecf20Sopenharmony_ci} 27888c2ecf20Sopenharmony_ci 27898c2ecf20Sopenharmony_ci/* 27908c2ecf20Sopenharmony_ci * called under mdsc->mutex 27918c2ecf20Sopenharmony_ci */ 27928c2ecf20Sopenharmony_cistatic int __send_request(struct ceph_mds_client *mdsc, 27938c2ecf20Sopenharmony_ci struct ceph_mds_session *session, 27948c2ecf20Sopenharmony_ci struct ceph_mds_request *req, 27958c2ecf20Sopenharmony_ci bool drop_cap_releases) 27968c2ecf20Sopenharmony_ci{ 27978c2ecf20Sopenharmony_ci int err; 27988c2ecf20Sopenharmony_ci 27998c2ecf20Sopenharmony_ci err = __prepare_send_request(mdsc, req, session->s_mds, 28008c2ecf20Sopenharmony_ci drop_cap_releases); 28018c2ecf20Sopenharmony_ci if (!err) { 28028c2ecf20Sopenharmony_ci ceph_msg_get(req->r_request); 28038c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, req->r_request); 28048c2ecf20Sopenharmony_ci } 28058c2ecf20Sopenharmony_ci 28068c2ecf20Sopenharmony_ci return err; 28078c2ecf20Sopenharmony_ci} 28088c2ecf20Sopenharmony_ci 28098c2ecf20Sopenharmony_ci/* 28108c2ecf20Sopenharmony_ci * send request, or put it on the appropriate wait list. 28118c2ecf20Sopenharmony_ci */ 28128c2ecf20Sopenharmony_cistatic void __do_request(struct ceph_mds_client *mdsc, 28138c2ecf20Sopenharmony_ci struct ceph_mds_request *req) 28148c2ecf20Sopenharmony_ci{ 28158c2ecf20Sopenharmony_ci struct ceph_mds_session *session = NULL; 28168c2ecf20Sopenharmony_ci int mds = -1; 28178c2ecf20Sopenharmony_ci int err = 0; 28188c2ecf20Sopenharmony_ci bool random; 28198c2ecf20Sopenharmony_ci 28208c2ecf20Sopenharmony_ci if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { 28218c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) 28228c2ecf20Sopenharmony_ci __unregister_request(mdsc, req); 28238c2ecf20Sopenharmony_ci return; 28248c2ecf20Sopenharmony_ci } 28258c2ecf20Sopenharmony_ci 28268c2ecf20Sopenharmony_ci if (req->r_timeout && 28278c2ecf20Sopenharmony_ci time_after_eq(jiffies, req->r_started + req->r_timeout)) { 28288c2ecf20Sopenharmony_ci dout("do_request timed out\n"); 28298c2ecf20Sopenharmony_ci err = -ETIMEDOUT; 28308c2ecf20Sopenharmony_ci goto finish; 28318c2ecf20Sopenharmony_ci } 28328c2ecf20Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { 28338c2ecf20Sopenharmony_ci dout("do_request forced umount\n"); 28348c2ecf20Sopenharmony_ci err = -EIO; 28358c2ecf20Sopenharmony_ci goto finish; 28368c2ecf20Sopenharmony_ci } 28378c2ecf20Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) { 28388c2ecf20Sopenharmony_ci if (mdsc->mdsmap_err) { 28398c2ecf20Sopenharmony_ci err = mdsc->mdsmap_err; 28408c2ecf20Sopenharmony_ci dout("do_request mdsmap err %d\n", err); 28418c2ecf20Sopenharmony_ci goto finish; 28428c2ecf20Sopenharmony_ci } 28438c2ecf20Sopenharmony_ci if (mdsc->mdsmap->m_epoch == 0) { 28448c2ecf20Sopenharmony_ci dout("do_request no mdsmap, waiting for map\n"); 28458c2ecf20Sopenharmony_ci list_add(&req->r_wait, &mdsc->waiting_for_map); 28468c2ecf20Sopenharmony_ci return; 28478c2ecf20Sopenharmony_ci } 28488c2ecf20Sopenharmony_ci if (!(mdsc->fsc->mount_options->flags & 28498c2ecf20Sopenharmony_ci CEPH_MOUNT_OPT_MOUNTWAIT) && 28508c2ecf20Sopenharmony_ci !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { 28518c2ecf20Sopenharmony_ci err = -EHOSTUNREACH; 28528c2ecf20Sopenharmony_ci goto finish; 28538c2ecf20Sopenharmony_ci } 28548c2ecf20Sopenharmony_ci } 28558c2ecf20Sopenharmony_ci 28568c2ecf20Sopenharmony_ci put_request_session(req); 28578c2ecf20Sopenharmony_ci 28588c2ecf20Sopenharmony_ci mds = __choose_mds(mdsc, req, &random); 28598c2ecf20Sopenharmony_ci if (mds < 0 || 28608c2ecf20Sopenharmony_ci ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { 28618c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) { 28628c2ecf20Sopenharmony_ci err = -EJUKEBOX; 28638c2ecf20Sopenharmony_ci goto finish; 28648c2ecf20Sopenharmony_ci } 28658c2ecf20Sopenharmony_ci dout("do_request no mds or not active, waiting for map\n"); 28668c2ecf20Sopenharmony_ci list_add(&req->r_wait, &mdsc->waiting_for_map); 28678c2ecf20Sopenharmony_ci return; 28688c2ecf20Sopenharmony_ci } 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci /* get, open session */ 28718c2ecf20Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, mds); 28728c2ecf20Sopenharmony_ci if (!session) { 28738c2ecf20Sopenharmony_ci session = register_session(mdsc, mds); 28748c2ecf20Sopenharmony_ci if (IS_ERR(session)) { 28758c2ecf20Sopenharmony_ci err = PTR_ERR(session); 28768c2ecf20Sopenharmony_ci goto finish; 28778c2ecf20Sopenharmony_ci } 28788c2ecf20Sopenharmony_ci } 28798c2ecf20Sopenharmony_ci req->r_session = ceph_get_mds_session(session); 28808c2ecf20Sopenharmony_ci 28818c2ecf20Sopenharmony_ci dout("do_request mds%d session %p state %s\n", mds, session, 28828c2ecf20Sopenharmony_ci ceph_session_state_name(session->s_state)); 28838c2ecf20Sopenharmony_ci if (session->s_state != CEPH_MDS_SESSION_OPEN && 28848c2ecf20Sopenharmony_ci session->s_state != CEPH_MDS_SESSION_HUNG) { 28858c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_REJECTED) { 28868c2ecf20Sopenharmony_ci err = -EACCES; 28878c2ecf20Sopenharmony_ci goto out_session; 28888c2ecf20Sopenharmony_ci } 28898c2ecf20Sopenharmony_ci /* 28908c2ecf20Sopenharmony_ci * We cannot queue async requests since the caps and delegated 28918c2ecf20Sopenharmony_ci * inodes are bound to the session. Just return -EJUKEBOX and 28928c2ecf20Sopenharmony_ci * let the caller retry a sync request in that case. 28938c2ecf20Sopenharmony_ci */ 28948c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) { 28958c2ecf20Sopenharmony_ci err = -EJUKEBOX; 28968c2ecf20Sopenharmony_ci goto out_session; 28978c2ecf20Sopenharmony_ci } 28988c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_NEW || 28998c2ecf20Sopenharmony_ci session->s_state == CEPH_MDS_SESSION_CLOSING) { 29008c2ecf20Sopenharmony_ci err = __open_session(mdsc, session); 29018c2ecf20Sopenharmony_ci if (err) 29028c2ecf20Sopenharmony_ci goto out_session; 29038c2ecf20Sopenharmony_ci /* retry the same mds later */ 29048c2ecf20Sopenharmony_ci if (random) 29058c2ecf20Sopenharmony_ci req->r_resend_mds = mds; 29068c2ecf20Sopenharmony_ci } 29078c2ecf20Sopenharmony_ci list_add(&req->r_wait, &session->s_waiting); 29088c2ecf20Sopenharmony_ci goto out_session; 29098c2ecf20Sopenharmony_ci } 29108c2ecf20Sopenharmony_ci 29118c2ecf20Sopenharmony_ci /* send request */ 29128c2ecf20Sopenharmony_ci req->r_resend_mds = -1; /* forget any previous mds hint */ 29138c2ecf20Sopenharmony_ci 29148c2ecf20Sopenharmony_ci if (req->r_request_started == 0) /* note request start time */ 29158c2ecf20Sopenharmony_ci req->r_request_started = jiffies; 29168c2ecf20Sopenharmony_ci 29178c2ecf20Sopenharmony_ci err = __send_request(mdsc, session, req, false); 29188c2ecf20Sopenharmony_ci 29198c2ecf20Sopenharmony_ciout_session: 29208c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 29218c2ecf20Sopenharmony_cifinish: 29228c2ecf20Sopenharmony_ci if (err) { 29238c2ecf20Sopenharmony_ci dout("__do_request early error %d\n", err); 29248c2ecf20Sopenharmony_ci req->r_err = err; 29258c2ecf20Sopenharmony_ci complete_request(mdsc, req); 29268c2ecf20Sopenharmony_ci __unregister_request(mdsc, req); 29278c2ecf20Sopenharmony_ci } 29288c2ecf20Sopenharmony_ci return; 29298c2ecf20Sopenharmony_ci} 29308c2ecf20Sopenharmony_ci 29318c2ecf20Sopenharmony_ci/* 29328c2ecf20Sopenharmony_ci * called under mdsc->mutex 29338c2ecf20Sopenharmony_ci */ 29348c2ecf20Sopenharmony_cistatic void __wake_requests(struct ceph_mds_client *mdsc, 29358c2ecf20Sopenharmony_ci struct list_head *head) 29368c2ecf20Sopenharmony_ci{ 29378c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 29388c2ecf20Sopenharmony_ci LIST_HEAD(tmp_list); 29398c2ecf20Sopenharmony_ci 29408c2ecf20Sopenharmony_ci list_splice_init(head, &tmp_list); 29418c2ecf20Sopenharmony_ci 29428c2ecf20Sopenharmony_ci while (!list_empty(&tmp_list)) { 29438c2ecf20Sopenharmony_ci req = list_entry(tmp_list.next, 29448c2ecf20Sopenharmony_ci struct ceph_mds_request, r_wait); 29458c2ecf20Sopenharmony_ci list_del_init(&req->r_wait); 29468c2ecf20Sopenharmony_ci dout(" wake request %p tid %llu\n", req, req->r_tid); 29478c2ecf20Sopenharmony_ci __do_request(mdsc, req); 29488c2ecf20Sopenharmony_ci } 29498c2ecf20Sopenharmony_ci} 29508c2ecf20Sopenharmony_ci 29518c2ecf20Sopenharmony_ci/* 29528c2ecf20Sopenharmony_ci * Wake up threads with requests pending for @mds, so that they can 29538c2ecf20Sopenharmony_ci * resubmit their requests to a possibly different mds. 29548c2ecf20Sopenharmony_ci */ 29558c2ecf20Sopenharmony_cistatic void kick_requests(struct ceph_mds_client *mdsc, int mds) 29568c2ecf20Sopenharmony_ci{ 29578c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 29588c2ecf20Sopenharmony_ci struct rb_node *p = rb_first(&mdsc->request_tree); 29598c2ecf20Sopenharmony_ci 29608c2ecf20Sopenharmony_ci dout("kick_requests mds%d\n", mds); 29618c2ecf20Sopenharmony_ci while (p) { 29628c2ecf20Sopenharmony_ci req = rb_entry(p, struct ceph_mds_request, r_node); 29638c2ecf20Sopenharmony_ci p = rb_next(p); 29648c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) 29658c2ecf20Sopenharmony_ci continue; 29668c2ecf20Sopenharmony_ci if (req->r_attempts > 0) 29678c2ecf20Sopenharmony_ci continue; /* only new requests */ 29688c2ecf20Sopenharmony_ci if (req->r_session && 29698c2ecf20Sopenharmony_ci req->r_session->s_mds == mds) { 29708c2ecf20Sopenharmony_ci dout(" kicking tid %llu\n", req->r_tid); 29718c2ecf20Sopenharmony_ci list_del_init(&req->r_wait); 29728c2ecf20Sopenharmony_ci __do_request(mdsc, req); 29738c2ecf20Sopenharmony_ci } 29748c2ecf20Sopenharmony_ci } 29758c2ecf20Sopenharmony_ci} 29768c2ecf20Sopenharmony_ci 29778c2ecf20Sopenharmony_ciint ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir, 29788c2ecf20Sopenharmony_ci struct ceph_mds_request *req) 29798c2ecf20Sopenharmony_ci{ 29808c2ecf20Sopenharmony_ci int err = 0; 29818c2ecf20Sopenharmony_ci 29828c2ecf20Sopenharmony_ci /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */ 29838c2ecf20Sopenharmony_ci if (req->r_inode) 29848c2ecf20Sopenharmony_ci ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); 29858c2ecf20Sopenharmony_ci if (req->r_parent) { 29868c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = ceph_inode(req->r_parent); 29878c2ecf20Sopenharmony_ci int fmode = (req->r_op & CEPH_MDS_OP_WRITE) ? 29888c2ecf20Sopenharmony_ci CEPH_FILE_MODE_WR : CEPH_FILE_MODE_RD; 29898c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 29908c2ecf20Sopenharmony_ci ceph_take_cap_refs(ci, CEPH_CAP_PIN, false); 29918c2ecf20Sopenharmony_ci __ceph_touch_fmode(ci, mdsc, fmode); 29928c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 29938c2ecf20Sopenharmony_ci ihold(req->r_parent); 29948c2ecf20Sopenharmony_ci } 29958c2ecf20Sopenharmony_ci if (req->r_old_dentry_dir) 29968c2ecf20Sopenharmony_ci ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), 29978c2ecf20Sopenharmony_ci CEPH_CAP_PIN); 29988c2ecf20Sopenharmony_ci 29998c2ecf20Sopenharmony_ci if (req->r_inode) { 30008c2ecf20Sopenharmony_ci err = ceph_wait_on_async_create(req->r_inode); 30018c2ecf20Sopenharmony_ci if (err) { 30028c2ecf20Sopenharmony_ci dout("%s: wait for async create returned: %d\n", 30038c2ecf20Sopenharmony_ci __func__, err); 30048c2ecf20Sopenharmony_ci return err; 30058c2ecf20Sopenharmony_ci } 30068c2ecf20Sopenharmony_ci } 30078c2ecf20Sopenharmony_ci 30088c2ecf20Sopenharmony_ci if (!err && req->r_old_inode) { 30098c2ecf20Sopenharmony_ci err = ceph_wait_on_async_create(req->r_old_inode); 30108c2ecf20Sopenharmony_ci if (err) { 30118c2ecf20Sopenharmony_ci dout("%s: wait for async create returned: %d\n", 30128c2ecf20Sopenharmony_ci __func__, err); 30138c2ecf20Sopenharmony_ci return err; 30148c2ecf20Sopenharmony_ci } 30158c2ecf20Sopenharmony_ci } 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_ci dout("submit_request on %p for inode %p\n", req, dir); 30188c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 30198c2ecf20Sopenharmony_ci __register_request(mdsc, req, dir); 30208c2ecf20Sopenharmony_ci __do_request(mdsc, req); 30218c2ecf20Sopenharmony_ci err = req->r_err; 30228c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 30238c2ecf20Sopenharmony_ci return err; 30248c2ecf20Sopenharmony_ci} 30258c2ecf20Sopenharmony_ci 30268c2ecf20Sopenharmony_cistatic int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc, 30278c2ecf20Sopenharmony_ci struct ceph_mds_request *req) 30288c2ecf20Sopenharmony_ci{ 30298c2ecf20Sopenharmony_ci int err; 30308c2ecf20Sopenharmony_ci 30318c2ecf20Sopenharmony_ci /* wait */ 30328c2ecf20Sopenharmony_ci dout("do_request waiting\n"); 30338c2ecf20Sopenharmony_ci if (!req->r_timeout && req->r_wait_for_completion) { 30348c2ecf20Sopenharmony_ci err = req->r_wait_for_completion(mdsc, req); 30358c2ecf20Sopenharmony_ci } else { 30368c2ecf20Sopenharmony_ci long timeleft = wait_for_completion_killable_timeout( 30378c2ecf20Sopenharmony_ci &req->r_completion, 30388c2ecf20Sopenharmony_ci ceph_timeout_jiffies(req->r_timeout)); 30398c2ecf20Sopenharmony_ci if (timeleft > 0) 30408c2ecf20Sopenharmony_ci err = 0; 30418c2ecf20Sopenharmony_ci else if (!timeleft) 30428c2ecf20Sopenharmony_ci err = -ETIMEDOUT; /* timed out */ 30438c2ecf20Sopenharmony_ci else 30448c2ecf20Sopenharmony_ci err = timeleft; /* killed */ 30458c2ecf20Sopenharmony_ci } 30468c2ecf20Sopenharmony_ci dout("do_request waited, got %d\n", err); 30478c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 30488c2ecf20Sopenharmony_ci 30498c2ecf20Sopenharmony_ci /* only abort if we didn't race with a real reply */ 30508c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) { 30518c2ecf20Sopenharmony_ci err = le32_to_cpu(req->r_reply_info.head->result); 30528c2ecf20Sopenharmony_ci } else if (err < 0) { 30538c2ecf20Sopenharmony_ci dout("aborted request %lld with %d\n", req->r_tid, err); 30548c2ecf20Sopenharmony_ci 30558c2ecf20Sopenharmony_ci /* 30568c2ecf20Sopenharmony_ci * ensure we aren't running concurrently with 30578c2ecf20Sopenharmony_ci * ceph_fill_trace or ceph_readdir_prepopulate, which 30588c2ecf20Sopenharmony_ci * rely on locks (dir mutex) held by our caller. 30598c2ecf20Sopenharmony_ci */ 30608c2ecf20Sopenharmony_ci mutex_lock(&req->r_fill_mutex); 30618c2ecf20Sopenharmony_ci req->r_err = err; 30628c2ecf20Sopenharmony_ci set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags); 30638c2ecf20Sopenharmony_ci mutex_unlock(&req->r_fill_mutex); 30648c2ecf20Sopenharmony_ci 30658c2ecf20Sopenharmony_ci if (req->r_parent && 30668c2ecf20Sopenharmony_ci (req->r_op & CEPH_MDS_OP_WRITE)) 30678c2ecf20Sopenharmony_ci ceph_invalidate_dir_request(req); 30688c2ecf20Sopenharmony_ci } else { 30698c2ecf20Sopenharmony_ci err = req->r_err; 30708c2ecf20Sopenharmony_ci } 30718c2ecf20Sopenharmony_ci 30728c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 30738c2ecf20Sopenharmony_ci return err; 30748c2ecf20Sopenharmony_ci} 30758c2ecf20Sopenharmony_ci 30768c2ecf20Sopenharmony_ci/* 30778c2ecf20Sopenharmony_ci * Synchrously perform an mds request. Take care of all of the 30788c2ecf20Sopenharmony_ci * session setup, forwarding, retry details. 30798c2ecf20Sopenharmony_ci */ 30808c2ecf20Sopenharmony_ciint ceph_mdsc_do_request(struct ceph_mds_client *mdsc, 30818c2ecf20Sopenharmony_ci struct inode *dir, 30828c2ecf20Sopenharmony_ci struct ceph_mds_request *req) 30838c2ecf20Sopenharmony_ci{ 30848c2ecf20Sopenharmony_ci int err; 30858c2ecf20Sopenharmony_ci 30868c2ecf20Sopenharmony_ci dout("do_request on %p\n", req); 30878c2ecf20Sopenharmony_ci 30888c2ecf20Sopenharmony_ci /* issue */ 30898c2ecf20Sopenharmony_ci err = ceph_mdsc_submit_request(mdsc, dir, req); 30908c2ecf20Sopenharmony_ci if (!err) 30918c2ecf20Sopenharmony_ci err = ceph_mdsc_wait_request(mdsc, req); 30928c2ecf20Sopenharmony_ci dout("do_request %p done, result %d\n", req, err); 30938c2ecf20Sopenharmony_ci return err; 30948c2ecf20Sopenharmony_ci} 30958c2ecf20Sopenharmony_ci 30968c2ecf20Sopenharmony_ci/* 30978c2ecf20Sopenharmony_ci * Invalidate dir's completeness, dentry lease state on an aborted MDS 30988c2ecf20Sopenharmony_ci * namespace request. 30998c2ecf20Sopenharmony_ci */ 31008c2ecf20Sopenharmony_civoid ceph_invalidate_dir_request(struct ceph_mds_request *req) 31018c2ecf20Sopenharmony_ci{ 31028c2ecf20Sopenharmony_ci struct inode *dir = req->r_parent; 31038c2ecf20Sopenharmony_ci struct inode *old_dir = req->r_old_dentry_dir; 31048c2ecf20Sopenharmony_ci 31058c2ecf20Sopenharmony_ci dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir); 31068c2ecf20Sopenharmony_ci 31078c2ecf20Sopenharmony_ci ceph_dir_clear_complete(dir); 31088c2ecf20Sopenharmony_ci if (old_dir) 31098c2ecf20Sopenharmony_ci ceph_dir_clear_complete(old_dir); 31108c2ecf20Sopenharmony_ci if (req->r_dentry) 31118c2ecf20Sopenharmony_ci ceph_invalidate_dentry_lease(req->r_dentry); 31128c2ecf20Sopenharmony_ci if (req->r_old_dentry) 31138c2ecf20Sopenharmony_ci ceph_invalidate_dentry_lease(req->r_old_dentry); 31148c2ecf20Sopenharmony_ci} 31158c2ecf20Sopenharmony_ci 31168c2ecf20Sopenharmony_ci/* 31178c2ecf20Sopenharmony_ci * Handle mds reply. 31188c2ecf20Sopenharmony_ci * 31198c2ecf20Sopenharmony_ci * We take the session mutex and parse and process the reply immediately. 31208c2ecf20Sopenharmony_ci * This preserves the logical ordering of replies, capabilities, etc., sent 31218c2ecf20Sopenharmony_ci * by the MDS as they are applied to our local cache. 31228c2ecf20Sopenharmony_ci */ 31238c2ecf20Sopenharmony_cistatic void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) 31248c2ecf20Sopenharmony_ci{ 31258c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = session->s_mdsc; 31268c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 31278c2ecf20Sopenharmony_ci struct ceph_mds_reply_head *head = msg->front.iov_base; 31288c2ecf20Sopenharmony_ci struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */ 31298c2ecf20Sopenharmony_ci struct ceph_snap_realm *realm; 31308c2ecf20Sopenharmony_ci u64 tid; 31318c2ecf20Sopenharmony_ci int err, result; 31328c2ecf20Sopenharmony_ci int mds = session->s_mds; 31338c2ecf20Sopenharmony_ci 31348c2ecf20Sopenharmony_ci if (msg->front.iov_len < sizeof(*head)) { 31358c2ecf20Sopenharmony_ci pr_err("mdsc_handle_reply got corrupt (short) reply\n"); 31368c2ecf20Sopenharmony_ci ceph_msg_dump(msg); 31378c2ecf20Sopenharmony_ci return; 31388c2ecf20Sopenharmony_ci } 31398c2ecf20Sopenharmony_ci 31408c2ecf20Sopenharmony_ci /* get request, session */ 31418c2ecf20Sopenharmony_ci tid = le64_to_cpu(msg->hdr.tid); 31428c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 31438c2ecf20Sopenharmony_ci req = lookup_get_request(mdsc, tid); 31448c2ecf20Sopenharmony_ci if (!req) { 31458c2ecf20Sopenharmony_ci dout("handle_reply on unknown tid %llu\n", tid); 31468c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 31478c2ecf20Sopenharmony_ci return; 31488c2ecf20Sopenharmony_ci } 31498c2ecf20Sopenharmony_ci dout("handle_reply %p\n", req); 31508c2ecf20Sopenharmony_ci 31518c2ecf20Sopenharmony_ci /* correct session? */ 31528c2ecf20Sopenharmony_ci if (req->r_session != session) { 31538c2ecf20Sopenharmony_ci pr_err("mdsc_handle_reply got %llu on session mds%d" 31548c2ecf20Sopenharmony_ci " not mds%d\n", tid, session->s_mds, 31558c2ecf20Sopenharmony_ci req->r_session ? req->r_session->s_mds : -1); 31568c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 31578c2ecf20Sopenharmony_ci goto out; 31588c2ecf20Sopenharmony_ci } 31598c2ecf20Sopenharmony_ci 31608c2ecf20Sopenharmony_ci /* dup? */ 31618c2ecf20Sopenharmony_ci if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) || 31628c2ecf20Sopenharmony_ci (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) { 31638c2ecf20Sopenharmony_ci pr_warn("got a dup %s reply on %llu from mds%d\n", 31648c2ecf20Sopenharmony_ci head->safe ? "safe" : "unsafe", tid, mds); 31658c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 31668c2ecf20Sopenharmony_ci goto out; 31678c2ecf20Sopenharmony_ci } 31688c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) { 31698c2ecf20Sopenharmony_ci pr_warn("got unsafe after safe on %llu from mds%d\n", 31708c2ecf20Sopenharmony_ci tid, mds); 31718c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 31728c2ecf20Sopenharmony_ci goto out; 31738c2ecf20Sopenharmony_ci } 31748c2ecf20Sopenharmony_ci 31758c2ecf20Sopenharmony_ci result = le32_to_cpu(head->result); 31768c2ecf20Sopenharmony_ci 31778c2ecf20Sopenharmony_ci /* 31788c2ecf20Sopenharmony_ci * Handle an ESTALE 31798c2ecf20Sopenharmony_ci * if we're not talking to the authority, send to them 31808c2ecf20Sopenharmony_ci * if the authority has changed while we weren't looking, 31818c2ecf20Sopenharmony_ci * send to new authority 31828c2ecf20Sopenharmony_ci * Otherwise we just have to return an ESTALE 31838c2ecf20Sopenharmony_ci */ 31848c2ecf20Sopenharmony_ci if (result == -ESTALE) { 31858c2ecf20Sopenharmony_ci dout("got ESTALE on request %llu\n", req->r_tid); 31868c2ecf20Sopenharmony_ci req->r_resend_mds = -1; 31878c2ecf20Sopenharmony_ci if (req->r_direct_mode != USE_AUTH_MDS) { 31888c2ecf20Sopenharmony_ci dout("not using auth, setting for that now\n"); 31898c2ecf20Sopenharmony_ci req->r_direct_mode = USE_AUTH_MDS; 31908c2ecf20Sopenharmony_ci __do_request(mdsc, req); 31918c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 31928c2ecf20Sopenharmony_ci goto out; 31938c2ecf20Sopenharmony_ci } else { 31948c2ecf20Sopenharmony_ci int mds = __choose_mds(mdsc, req, NULL); 31958c2ecf20Sopenharmony_ci if (mds >= 0 && mds != req->r_session->s_mds) { 31968c2ecf20Sopenharmony_ci dout("but auth changed, so resending\n"); 31978c2ecf20Sopenharmony_ci __do_request(mdsc, req); 31988c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 31998c2ecf20Sopenharmony_ci goto out; 32008c2ecf20Sopenharmony_ci } 32018c2ecf20Sopenharmony_ci } 32028c2ecf20Sopenharmony_ci dout("have to return ESTALE on request %llu\n", req->r_tid); 32038c2ecf20Sopenharmony_ci } 32048c2ecf20Sopenharmony_ci 32058c2ecf20Sopenharmony_ci 32068c2ecf20Sopenharmony_ci if (head->safe) { 32078c2ecf20Sopenharmony_ci set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags); 32088c2ecf20Sopenharmony_ci __unregister_request(mdsc, req); 32098c2ecf20Sopenharmony_ci 32108c2ecf20Sopenharmony_ci /* last request during umount? */ 32118c2ecf20Sopenharmony_ci if (mdsc->stopping && !__get_oldest_req(mdsc)) 32128c2ecf20Sopenharmony_ci complete_all(&mdsc->safe_umount_waiters); 32138c2ecf20Sopenharmony_ci 32148c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 32158c2ecf20Sopenharmony_ci /* 32168c2ecf20Sopenharmony_ci * We already handled the unsafe response, now do the 32178c2ecf20Sopenharmony_ci * cleanup. No need to examine the response; the MDS 32188c2ecf20Sopenharmony_ci * doesn't include any result info in the safe 32198c2ecf20Sopenharmony_ci * response. And even if it did, there is nothing 32208c2ecf20Sopenharmony_ci * useful we could do with a revised return value. 32218c2ecf20Sopenharmony_ci */ 32228c2ecf20Sopenharmony_ci dout("got safe reply %llu, mds%d\n", tid, mds); 32238c2ecf20Sopenharmony_ci 32248c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 32258c2ecf20Sopenharmony_ci goto out; 32268c2ecf20Sopenharmony_ci } 32278c2ecf20Sopenharmony_ci } else { 32288c2ecf20Sopenharmony_ci set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags); 32298c2ecf20Sopenharmony_ci list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe); 32308c2ecf20Sopenharmony_ci } 32318c2ecf20Sopenharmony_ci 32328c2ecf20Sopenharmony_ci dout("handle_reply tid %lld result %d\n", tid, result); 32338c2ecf20Sopenharmony_ci rinfo = &req->r_reply_info; 32348c2ecf20Sopenharmony_ci if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features)) 32358c2ecf20Sopenharmony_ci err = parse_reply_info(session, msg, rinfo, (u64)-1); 32368c2ecf20Sopenharmony_ci else 32378c2ecf20Sopenharmony_ci err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features); 32388c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 32398c2ecf20Sopenharmony_ci 32408c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 32418c2ecf20Sopenharmony_ci if (err < 0) { 32428c2ecf20Sopenharmony_ci pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); 32438c2ecf20Sopenharmony_ci ceph_msg_dump(msg); 32448c2ecf20Sopenharmony_ci goto out_err; 32458c2ecf20Sopenharmony_ci } 32468c2ecf20Sopenharmony_ci 32478c2ecf20Sopenharmony_ci /* snap trace */ 32488c2ecf20Sopenharmony_ci realm = NULL; 32498c2ecf20Sopenharmony_ci if (rinfo->snapblob_len) { 32508c2ecf20Sopenharmony_ci down_write(&mdsc->snap_rwsem); 32518c2ecf20Sopenharmony_ci ceph_update_snap_trace(mdsc, rinfo->snapblob, 32528c2ecf20Sopenharmony_ci rinfo->snapblob + rinfo->snapblob_len, 32538c2ecf20Sopenharmony_ci le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP, 32548c2ecf20Sopenharmony_ci &realm); 32558c2ecf20Sopenharmony_ci downgrade_write(&mdsc->snap_rwsem); 32568c2ecf20Sopenharmony_ci } else { 32578c2ecf20Sopenharmony_ci down_read(&mdsc->snap_rwsem); 32588c2ecf20Sopenharmony_ci } 32598c2ecf20Sopenharmony_ci 32608c2ecf20Sopenharmony_ci /* insert trace into our cache */ 32618c2ecf20Sopenharmony_ci mutex_lock(&req->r_fill_mutex); 32628c2ecf20Sopenharmony_ci current->journal_info = req; 32638c2ecf20Sopenharmony_ci err = ceph_fill_trace(mdsc->fsc->sb, req); 32648c2ecf20Sopenharmony_ci if (err == 0) { 32658c2ecf20Sopenharmony_ci if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || 32668c2ecf20Sopenharmony_ci req->r_op == CEPH_MDS_OP_LSSNAP)) 32678c2ecf20Sopenharmony_ci ceph_readdir_prepopulate(req, req->r_session); 32688c2ecf20Sopenharmony_ci } 32698c2ecf20Sopenharmony_ci current->journal_info = NULL; 32708c2ecf20Sopenharmony_ci mutex_unlock(&req->r_fill_mutex); 32718c2ecf20Sopenharmony_ci 32728c2ecf20Sopenharmony_ci up_read(&mdsc->snap_rwsem); 32738c2ecf20Sopenharmony_ci if (realm) 32748c2ecf20Sopenharmony_ci ceph_put_snap_realm(mdsc, realm); 32758c2ecf20Sopenharmony_ci 32768c2ecf20Sopenharmony_ci if (err == 0) { 32778c2ecf20Sopenharmony_ci if (req->r_target_inode && 32788c2ecf20Sopenharmony_ci test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { 32798c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = 32808c2ecf20Sopenharmony_ci ceph_inode(req->r_target_inode); 32818c2ecf20Sopenharmony_ci spin_lock(&ci->i_unsafe_lock); 32828c2ecf20Sopenharmony_ci list_add_tail(&req->r_unsafe_target_item, 32838c2ecf20Sopenharmony_ci &ci->i_unsafe_iops); 32848c2ecf20Sopenharmony_ci spin_unlock(&ci->i_unsafe_lock); 32858c2ecf20Sopenharmony_ci } 32868c2ecf20Sopenharmony_ci 32878c2ecf20Sopenharmony_ci ceph_unreserve_caps(mdsc, &req->r_caps_reservation); 32888c2ecf20Sopenharmony_ci } 32898c2ecf20Sopenharmony_ciout_err: 32908c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 32918c2ecf20Sopenharmony_ci if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 32928c2ecf20Sopenharmony_ci if (err) { 32938c2ecf20Sopenharmony_ci req->r_err = err; 32948c2ecf20Sopenharmony_ci } else { 32958c2ecf20Sopenharmony_ci req->r_reply = ceph_msg_get(msg); 32968c2ecf20Sopenharmony_ci set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags); 32978c2ecf20Sopenharmony_ci } 32988c2ecf20Sopenharmony_ci } else { 32998c2ecf20Sopenharmony_ci dout("reply arrived after request %lld was aborted\n", tid); 33008c2ecf20Sopenharmony_ci } 33018c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 33028c2ecf20Sopenharmony_ci 33038c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 33048c2ecf20Sopenharmony_ci 33058c2ecf20Sopenharmony_ci /* kick calling process */ 33068c2ecf20Sopenharmony_ci complete_request(mdsc, req); 33078c2ecf20Sopenharmony_ci 33088c2ecf20Sopenharmony_ci ceph_update_metadata_latency(&mdsc->metric, req->r_start_latency, 33098c2ecf20Sopenharmony_ci req->r_end_latency, err); 33108c2ecf20Sopenharmony_ciout: 33118c2ecf20Sopenharmony_ci ceph_mdsc_put_request(req); 33128c2ecf20Sopenharmony_ci return; 33138c2ecf20Sopenharmony_ci} 33148c2ecf20Sopenharmony_ci 33158c2ecf20Sopenharmony_ci 33168c2ecf20Sopenharmony_ci 33178c2ecf20Sopenharmony_ci/* 33188c2ecf20Sopenharmony_ci * handle mds notification that our request has been forwarded. 33198c2ecf20Sopenharmony_ci */ 33208c2ecf20Sopenharmony_cistatic void handle_forward(struct ceph_mds_client *mdsc, 33218c2ecf20Sopenharmony_ci struct ceph_mds_session *session, 33228c2ecf20Sopenharmony_ci struct ceph_msg *msg) 33238c2ecf20Sopenharmony_ci{ 33248c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 33258c2ecf20Sopenharmony_ci u64 tid = le64_to_cpu(msg->hdr.tid); 33268c2ecf20Sopenharmony_ci u32 next_mds; 33278c2ecf20Sopenharmony_ci u32 fwd_seq; 33288c2ecf20Sopenharmony_ci int err = -EINVAL; 33298c2ecf20Sopenharmony_ci void *p = msg->front.iov_base; 33308c2ecf20Sopenharmony_ci void *end = p + msg->front.iov_len; 33318c2ecf20Sopenharmony_ci 33328c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, 2*sizeof(u32), bad); 33338c2ecf20Sopenharmony_ci next_mds = ceph_decode_32(&p); 33348c2ecf20Sopenharmony_ci fwd_seq = ceph_decode_32(&p); 33358c2ecf20Sopenharmony_ci 33368c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 33378c2ecf20Sopenharmony_ci req = lookup_get_request(mdsc, tid); 33388c2ecf20Sopenharmony_ci if (!req) { 33398c2ecf20Sopenharmony_ci dout("forward tid %llu to mds%d - req dne\n", tid, next_mds); 33408c2ecf20Sopenharmony_ci goto out; /* dup reply? */ 33418c2ecf20Sopenharmony_ci } 33428c2ecf20Sopenharmony_ci 33438c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { 33448c2ecf20Sopenharmony_ci dout("forward tid %llu aborted, unregistering\n", tid); 33458c2ecf20Sopenharmony_ci __unregister_request(mdsc, req); 33468c2ecf20Sopenharmony_ci } else if (fwd_seq <= req->r_num_fwd) { 33478c2ecf20Sopenharmony_ci dout("forward tid %llu to mds%d - old seq %d <= %d\n", 33488c2ecf20Sopenharmony_ci tid, next_mds, req->r_num_fwd, fwd_seq); 33498c2ecf20Sopenharmony_ci } else { 33508c2ecf20Sopenharmony_ci /* resend. forward race not possible; mds would drop */ 33518c2ecf20Sopenharmony_ci dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds); 33528c2ecf20Sopenharmony_ci BUG_ON(req->r_err); 33538c2ecf20Sopenharmony_ci BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)); 33548c2ecf20Sopenharmony_ci req->r_attempts = 0; 33558c2ecf20Sopenharmony_ci req->r_num_fwd = fwd_seq; 33568c2ecf20Sopenharmony_ci req->r_resend_mds = next_mds; 33578c2ecf20Sopenharmony_ci put_request_session(req); 33588c2ecf20Sopenharmony_ci __do_request(mdsc, req); 33598c2ecf20Sopenharmony_ci } 33608c2ecf20Sopenharmony_ci ceph_mdsc_put_request(req); 33618c2ecf20Sopenharmony_ciout: 33628c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 33638c2ecf20Sopenharmony_ci return; 33648c2ecf20Sopenharmony_ci 33658c2ecf20Sopenharmony_cibad: 33668c2ecf20Sopenharmony_ci pr_err("mdsc_handle_forward decode error err=%d\n", err); 33678c2ecf20Sopenharmony_ci} 33688c2ecf20Sopenharmony_ci 33698c2ecf20Sopenharmony_cistatic int __decode_session_metadata(void **p, void *end, 33708c2ecf20Sopenharmony_ci bool *blocklisted) 33718c2ecf20Sopenharmony_ci{ 33728c2ecf20Sopenharmony_ci /* map<string,string> */ 33738c2ecf20Sopenharmony_ci u32 n; 33748c2ecf20Sopenharmony_ci bool err_str; 33758c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, n, bad); 33768c2ecf20Sopenharmony_ci while (n-- > 0) { 33778c2ecf20Sopenharmony_ci u32 len; 33788c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 33798c2ecf20Sopenharmony_ci ceph_decode_need(p, end, len, bad); 33808c2ecf20Sopenharmony_ci err_str = !strncmp(*p, "error_string", len); 33818c2ecf20Sopenharmony_ci *p += len; 33828c2ecf20Sopenharmony_ci ceph_decode_32_safe(p, end, len, bad); 33838c2ecf20Sopenharmony_ci ceph_decode_need(p, end, len, bad); 33848c2ecf20Sopenharmony_ci /* 33858c2ecf20Sopenharmony_ci * Match "blocklisted (blacklisted)" from newer MDSes, 33868c2ecf20Sopenharmony_ci * or "blacklisted" from older MDSes. 33878c2ecf20Sopenharmony_ci */ 33888c2ecf20Sopenharmony_ci if (err_str && strnstr(*p, "blacklisted", len)) 33898c2ecf20Sopenharmony_ci *blocklisted = true; 33908c2ecf20Sopenharmony_ci *p += len; 33918c2ecf20Sopenharmony_ci } 33928c2ecf20Sopenharmony_ci return 0; 33938c2ecf20Sopenharmony_cibad: 33948c2ecf20Sopenharmony_ci return -1; 33958c2ecf20Sopenharmony_ci} 33968c2ecf20Sopenharmony_ci 33978c2ecf20Sopenharmony_ci/* 33988c2ecf20Sopenharmony_ci * handle a mds session control message 33998c2ecf20Sopenharmony_ci */ 34008c2ecf20Sopenharmony_cistatic void handle_session(struct ceph_mds_session *session, 34018c2ecf20Sopenharmony_ci struct ceph_msg *msg) 34028c2ecf20Sopenharmony_ci{ 34038c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = session->s_mdsc; 34048c2ecf20Sopenharmony_ci int mds = session->s_mds; 34058c2ecf20Sopenharmony_ci int msg_version = le16_to_cpu(msg->hdr.version); 34068c2ecf20Sopenharmony_ci void *p = msg->front.iov_base; 34078c2ecf20Sopenharmony_ci void *end = p + msg->front.iov_len; 34088c2ecf20Sopenharmony_ci struct ceph_mds_session_head *h; 34098c2ecf20Sopenharmony_ci u32 op; 34108c2ecf20Sopenharmony_ci u64 seq, features = 0; 34118c2ecf20Sopenharmony_ci int wake = 0; 34128c2ecf20Sopenharmony_ci bool blocklisted = false; 34138c2ecf20Sopenharmony_ci 34148c2ecf20Sopenharmony_ci /* decode */ 34158c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, sizeof(*h), bad); 34168c2ecf20Sopenharmony_ci h = p; 34178c2ecf20Sopenharmony_ci p += sizeof(*h); 34188c2ecf20Sopenharmony_ci 34198c2ecf20Sopenharmony_ci op = le32_to_cpu(h->op); 34208c2ecf20Sopenharmony_ci seq = le64_to_cpu(h->seq); 34218c2ecf20Sopenharmony_ci 34228c2ecf20Sopenharmony_ci if (msg_version >= 3) { 34238c2ecf20Sopenharmony_ci u32 len; 34248c2ecf20Sopenharmony_ci /* version >= 2, metadata */ 34258c2ecf20Sopenharmony_ci if (__decode_session_metadata(&p, end, &blocklisted) < 0) 34268c2ecf20Sopenharmony_ci goto bad; 34278c2ecf20Sopenharmony_ci /* version >= 3, feature bits */ 34288c2ecf20Sopenharmony_ci ceph_decode_32_safe(&p, end, len, bad); 34298c2ecf20Sopenharmony_ci if (len) { 34308c2ecf20Sopenharmony_ci ceph_decode_64_safe(&p, end, features, bad); 34318c2ecf20Sopenharmony_ci p += len - sizeof(features); 34328c2ecf20Sopenharmony_ci } 34338c2ecf20Sopenharmony_ci } 34348c2ecf20Sopenharmony_ci 34358c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 34368c2ecf20Sopenharmony_ci if (op == CEPH_SESSION_CLOSE) { 34378c2ecf20Sopenharmony_ci ceph_get_mds_session(session); 34388c2ecf20Sopenharmony_ci __unregister_session(mdsc, session); 34398c2ecf20Sopenharmony_ci } 34408c2ecf20Sopenharmony_ci /* FIXME: this ttl calculation is generous */ 34418c2ecf20Sopenharmony_ci session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose; 34428c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 34438c2ecf20Sopenharmony_ci 34448c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 34458c2ecf20Sopenharmony_ci 34468c2ecf20Sopenharmony_ci dout("handle_session mds%d %s %p state %s seq %llu\n", 34478c2ecf20Sopenharmony_ci mds, ceph_session_op_name(op), session, 34488c2ecf20Sopenharmony_ci ceph_session_state_name(session->s_state), seq); 34498c2ecf20Sopenharmony_ci 34508c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_HUNG) { 34518c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_OPEN; 34528c2ecf20Sopenharmony_ci pr_info("mds%d came back\n", session->s_mds); 34538c2ecf20Sopenharmony_ci } 34548c2ecf20Sopenharmony_ci 34558c2ecf20Sopenharmony_ci switch (op) { 34568c2ecf20Sopenharmony_ci case CEPH_SESSION_OPEN: 34578c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 34588c2ecf20Sopenharmony_ci pr_info("mds%d reconnect success\n", session->s_mds); 34598c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_OPEN; 34608c2ecf20Sopenharmony_ci session->s_features = features; 34618c2ecf20Sopenharmony_ci renewed_caps(mdsc, session, 0); 34628c2ecf20Sopenharmony_ci if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features)) 34638c2ecf20Sopenharmony_ci metric_schedule_delayed(&mdsc->metric); 34648c2ecf20Sopenharmony_ci wake = 1; 34658c2ecf20Sopenharmony_ci if (mdsc->stopping) 34668c2ecf20Sopenharmony_ci __close_session(mdsc, session); 34678c2ecf20Sopenharmony_ci break; 34688c2ecf20Sopenharmony_ci 34698c2ecf20Sopenharmony_ci case CEPH_SESSION_RENEWCAPS: 34708c2ecf20Sopenharmony_ci if (session->s_renew_seq == seq) 34718c2ecf20Sopenharmony_ci renewed_caps(mdsc, session, 1); 34728c2ecf20Sopenharmony_ci break; 34738c2ecf20Sopenharmony_ci 34748c2ecf20Sopenharmony_ci case CEPH_SESSION_CLOSE: 34758c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_RECONNECTING) 34768c2ecf20Sopenharmony_ci pr_info("mds%d reconnect denied\n", session->s_mds); 34778c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_CLOSED; 34788c2ecf20Sopenharmony_ci cleanup_session_requests(mdsc, session); 34798c2ecf20Sopenharmony_ci remove_session_caps(session); 34808c2ecf20Sopenharmony_ci wake = 2; /* for good measure */ 34818c2ecf20Sopenharmony_ci wake_up_all(&mdsc->session_close_wq); 34828c2ecf20Sopenharmony_ci break; 34838c2ecf20Sopenharmony_ci 34848c2ecf20Sopenharmony_ci case CEPH_SESSION_STALE: 34858c2ecf20Sopenharmony_ci pr_info("mds%d caps went stale, renewing\n", 34868c2ecf20Sopenharmony_ci session->s_mds); 34878c2ecf20Sopenharmony_ci spin_lock(&session->s_gen_ttl_lock); 34888c2ecf20Sopenharmony_ci session->s_cap_gen++; 34898c2ecf20Sopenharmony_ci session->s_cap_ttl = jiffies - 1; 34908c2ecf20Sopenharmony_ci spin_unlock(&session->s_gen_ttl_lock); 34918c2ecf20Sopenharmony_ci send_renew_caps(mdsc, session); 34928c2ecf20Sopenharmony_ci break; 34938c2ecf20Sopenharmony_ci 34948c2ecf20Sopenharmony_ci case CEPH_SESSION_RECALL_STATE: 34958c2ecf20Sopenharmony_ci ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps)); 34968c2ecf20Sopenharmony_ci break; 34978c2ecf20Sopenharmony_ci 34988c2ecf20Sopenharmony_ci case CEPH_SESSION_FLUSHMSG: 34998c2ecf20Sopenharmony_ci /* flush cap releases */ 35008c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 35018c2ecf20Sopenharmony_ci if (session->s_num_cap_releases) 35028c2ecf20Sopenharmony_ci ceph_flush_cap_releases(mdsc, session); 35038c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 35048c2ecf20Sopenharmony_ci 35058c2ecf20Sopenharmony_ci send_flushmsg_ack(mdsc, session, seq); 35068c2ecf20Sopenharmony_ci break; 35078c2ecf20Sopenharmony_ci 35088c2ecf20Sopenharmony_ci case CEPH_SESSION_FORCE_RO: 35098c2ecf20Sopenharmony_ci dout("force_session_readonly %p\n", session); 35108c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 35118c2ecf20Sopenharmony_ci session->s_readonly = true; 35128c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 35138c2ecf20Sopenharmony_ci wake_up_session_caps(session, FORCE_RO); 35148c2ecf20Sopenharmony_ci break; 35158c2ecf20Sopenharmony_ci 35168c2ecf20Sopenharmony_ci case CEPH_SESSION_REJECT: 35178c2ecf20Sopenharmony_ci WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING); 35188c2ecf20Sopenharmony_ci pr_info("mds%d rejected session\n", session->s_mds); 35198c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_REJECTED; 35208c2ecf20Sopenharmony_ci cleanup_session_requests(mdsc, session); 35218c2ecf20Sopenharmony_ci remove_session_caps(session); 35228c2ecf20Sopenharmony_ci if (blocklisted) 35238c2ecf20Sopenharmony_ci mdsc->fsc->blocklisted = true; 35248c2ecf20Sopenharmony_ci wake = 2; /* for good measure */ 35258c2ecf20Sopenharmony_ci break; 35268c2ecf20Sopenharmony_ci 35278c2ecf20Sopenharmony_ci default: 35288c2ecf20Sopenharmony_ci pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds); 35298c2ecf20Sopenharmony_ci WARN_ON(1); 35308c2ecf20Sopenharmony_ci } 35318c2ecf20Sopenharmony_ci 35328c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 35338c2ecf20Sopenharmony_ci if (wake) { 35348c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 35358c2ecf20Sopenharmony_ci __wake_requests(mdsc, &session->s_waiting); 35368c2ecf20Sopenharmony_ci if (wake == 2) 35378c2ecf20Sopenharmony_ci kick_requests(mdsc, mds); 35388c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 35398c2ecf20Sopenharmony_ci } 35408c2ecf20Sopenharmony_ci if (op == CEPH_SESSION_CLOSE) 35418c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 35428c2ecf20Sopenharmony_ci return; 35438c2ecf20Sopenharmony_ci 35448c2ecf20Sopenharmony_cibad: 35458c2ecf20Sopenharmony_ci pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds, 35468c2ecf20Sopenharmony_ci (int)msg->front.iov_len); 35478c2ecf20Sopenharmony_ci ceph_msg_dump(msg); 35488c2ecf20Sopenharmony_ci return; 35498c2ecf20Sopenharmony_ci} 35508c2ecf20Sopenharmony_ci 35518c2ecf20Sopenharmony_civoid ceph_mdsc_release_dir_caps(struct ceph_mds_request *req) 35528c2ecf20Sopenharmony_ci{ 35538c2ecf20Sopenharmony_ci int dcaps; 35548c2ecf20Sopenharmony_ci 35558c2ecf20Sopenharmony_ci dcaps = xchg(&req->r_dir_caps, 0); 35568c2ecf20Sopenharmony_ci if (dcaps) { 35578c2ecf20Sopenharmony_ci dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); 35588c2ecf20Sopenharmony_ci ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps); 35598c2ecf20Sopenharmony_ci } 35608c2ecf20Sopenharmony_ci} 35618c2ecf20Sopenharmony_ci 35628c2ecf20Sopenharmony_civoid ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req) 35638c2ecf20Sopenharmony_ci{ 35648c2ecf20Sopenharmony_ci int dcaps; 35658c2ecf20Sopenharmony_ci 35668c2ecf20Sopenharmony_ci dcaps = xchg(&req->r_dir_caps, 0); 35678c2ecf20Sopenharmony_ci if (dcaps) { 35688c2ecf20Sopenharmony_ci dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); 35698c2ecf20Sopenharmony_ci ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent), 35708c2ecf20Sopenharmony_ci dcaps); 35718c2ecf20Sopenharmony_ci } 35728c2ecf20Sopenharmony_ci} 35738c2ecf20Sopenharmony_ci 35748c2ecf20Sopenharmony_ci/* 35758c2ecf20Sopenharmony_ci * called under session->mutex. 35768c2ecf20Sopenharmony_ci */ 35778c2ecf20Sopenharmony_cistatic void replay_unsafe_requests(struct ceph_mds_client *mdsc, 35788c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 35798c2ecf20Sopenharmony_ci{ 35808c2ecf20Sopenharmony_ci struct ceph_mds_request *req, *nreq; 35818c2ecf20Sopenharmony_ci struct rb_node *p; 35828c2ecf20Sopenharmony_ci 35838c2ecf20Sopenharmony_ci dout("replay_unsafe_requests mds%d\n", session->s_mds); 35848c2ecf20Sopenharmony_ci 35858c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 35868c2ecf20Sopenharmony_ci list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) 35878c2ecf20Sopenharmony_ci __send_request(mdsc, session, req, true); 35888c2ecf20Sopenharmony_ci 35898c2ecf20Sopenharmony_ci /* 35908c2ecf20Sopenharmony_ci * also re-send old requests when MDS enters reconnect stage. So that MDS 35918c2ecf20Sopenharmony_ci * can process completed request in clientreplay stage. 35928c2ecf20Sopenharmony_ci */ 35938c2ecf20Sopenharmony_ci p = rb_first(&mdsc->request_tree); 35948c2ecf20Sopenharmony_ci while (p) { 35958c2ecf20Sopenharmony_ci req = rb_entry(p, struct ceph_mds_request, r_node); 35968c2ecf20Sopenharmony_ci p = rb_next(p); 35978c2ecf20Sopenharmony_ci if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) 35988c2ecf20Sopenharmony_ci continue; 35998c2ecf20Sopenharmony_ci if (req->r_attempts == 0) 36008c2ecf20Sopenharmony_ci continue; /* only old requests */ 36018c2ecf20Sopenharmony_ci if (!req->r_session) 36028c2ecf20Sopenharmony_ci continue; 36038c2ecf20Sopenharmony_ci if (req->r_session->s_mds != session->s_mds) 36048c2ecf20Sopenharmony_ci continue; 36058c2ecf20Sopenharmony_ci 36068c2ecf20Sopenharmony_ci ceph_mdsc_release_dir_caps_no_check(req); 36078c2ecf20Sopenharmony_ci 36088c2ecf20Sopenharmony_ci __send_request(mdsc, session, req, true); 36098c2ecf20Sopenharmony_ci } 36108c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 36118c2ecf20Sopenharmony_ci} 36128c2ecf20Sopenharmony_ci 36138c2ecf20Sopenharmony_cistatic int send_reconnect_partial(struct ceph_reconnect_state *recon_state) 36148c2ecf20Sopenharmony_ci{ 36158c2ecf20Sopenharmony_ci struct ceph_msg *reply; 36168c2ecf20Sopenharmony_ci struct ceph_pagelist *_pagelist; 36178c2ecf20Sopenharmony_ci struct page *page; 36188c2ecf20Sopenharmony_ci __le32 *addr; 36198c2ecf20Sopenharmony_ci int err = -ENOMEM; 36208c2ecf20Sopenharmony_ci 36218c2ecf20Sopenharmony_ci if (!recon_state->allow_multi) 36228c2ecf20Sopenharmony_ci return -ENOSPC; 36238c2ecf20Sopenharmony_ci 36248c2ecf20Sopenharmony_ci /* can't handle message that contains both caps and realm */ 36258c2ecf20Sopenharmony_ci BUG_ON(!recon_state->nr_caps == !recon_state->nr_realms); 36268c2ecf20Sopenharmony_ci 36278c2ecf20Sopenharmony_ci /* pre-allocate new pagelist */ 36288c2ecf20Sopenharmony_ci _pagelist = ceph_pagelist_alloc(GFP_NOFS); 36298c2ecf20Sopenharmony_ci if (!_pagelist) 36308c2ecf20Sopenharmony_ci return -ENOMEM; 36318c2ecf20Sopenharmony_ci 36328c2ecf20Sopenharmony_ci reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); 36338c2ecf20Sopenharmony_ci if (!reply) 36348c2ecf20Sopenharmony_ci goto fail_msg; 36358c2ecf20Sopenharmony_ci 36368c2ecf20Sopenharmony_ci /* placeholder for nr_caps */ 36378c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_32(_pagelist, 0); 36388c2ecf20Sopenharmony_ci if (err < 0) 36398c2ecf20Sopenharmony_ci goto fail; 36408c2ecf20Sopenharmony_ci 36418c2ecf20Sopenharmony_ci if (recon_state->nr_caps) { 36428c2ecf20Sopenharmony_ci /* currently encoding caps */ 36438c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_32(recon_state->pagelist, 0); 36448c2ecf20Sopenharmony_ci if (err) 36458c2ecf20Sopenharmony_ci goto fail; 36468c2ecf20Sopenharmony_ci } else { 36478c2ecf20Sopenharmony_ci /* placeholder for nr_realms (currently encoding relams) */ 36488c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_32(_pagelist, 0); 36498c2ecf20Sopenharmony_ci if (err < 0) 36508c2ecf20Sopenharmony_ci goto fail; 36518c2ecf20Sopenharmony_ci } 36528c2ecf20Sopenharmony_ci 36538c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_8(recon_state->pagelist, 1); 36548c2ecf20Sopenharmony_ci if (err) 36558c2ecf20Sopenharmony_ci goto fail; 36568c2ecf20Sopenharmony_ci 36578c2ecf20Sopenharmony_ci page = list_first_entry(&recon_state->pagelist->head, struct page, lru); 36588c2ecf20Sopenharmony_ci addr = kmap_atomic(page); 36598c2ecf20Sopenharmony_ci if (recon_state->nr_caps) { 36608c2ecf20Sopenharmony_ci /* currently encoding caps */ 36618c2ecf20Sopenharmony_ci *addr = cpu_to_le32(recon_state->nr_caps); 36628c2ecf20Sopenharmony_ci } else { 36638c2ecf20Sopenharmony_ci /* currently encoding relams */ 36648c2ecf20Sopenharmony_ci *(addr + 1) = cpu_to_le32(recon_state->nr_realms); 36658c2ecf20Sopenharmony_ci } 36668c2ecf20Sopenharmony_ci kunmap_atomic(addr); 36678c2ecf20Sopenharmony_ci 36688c2ecf20Sopenharmony_ci reply->hdr.version = cpu_to_le16(5); 36698c2ecf20Sopenharmony_ci reply->hdr.compat_version = cpu_to_le16(4); 36708c2ecf20Sopenharmony_ci 36718c2ecf20Sopenharmony_ci reply->hdr.data_len = cpu_to_le32(recon_state->pagelist->length); 36728c2ecf20Sopenharmony_ci ceph_msg_data_add_pagelist(reply, recon_state->pagelist); 36738c2ecf20Sopenharmony_ci 36748c2ecf20Sopenharmony_ci ceph_con_send(&recon_state->session->s_con, reply); 36758c2ecf20Sopenharmony_ci ceph_pagelist_release(recon_state->pagelist); 36768c2ecf20Sopenharmony_ci 36778c2ecf20Sopenharmony_ci recon_state->pagelist = _pagelist; 36788c2ecf20Sopenharmony_ci recon_state->nr_caps = 0; 36798c2ecf20Sopenharmony_ci recon_state->nr_realms = 0; 36808c2ecf20Sopenharmony_ci recon_state->msg_version = 5; 36818c2ecf20Sopenharmony_ci return 0; 36828c2ecf20Sopenharmony_cifail: 36838c2ecf20Sopenharmony_ci ceph_msg_put(reply); 36848c2ecf20Sopenharmony_cifail_msg: 36858c2ecf20Sopenharmony_ci ceph_pagelist_release(_pagelist); 36868c2ecf20Sopenharmony_ci return err; 36878c2ecf20Sopenharmony_ci} 36888c2ecf20Sopenharmony_ci 36898c2ecf20Sopenharmony_cistatic struct dentry* d_find_primary(struct inode *inode) 36908c2ecf20Sopenharmony_ci{ 36918c2ecf20Sopenharmony_ci struct dentry *alias, *dn = NULL; 36928c2ecf20Sopenharmony_ci 36938c2ecf20Sopenharmony_ci if (hlist_empty(&inode->i_dentry)) 36948c2ecf20Sopenharmony_ci return NULL; 36958c2ecf20Sopenharmony_ci 36968c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 36978c2ecf20Sopenharmony_ci if (hlist_empty(&inode->i_dentry)) 36988c2ecf20Sopenharmony_ci goto out_unlock; 36998c2ecf20Sopenharmony_ci 37008c2ecf20Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 37018c2ecf20Sopenharmony_ci alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias); 37028c2ecf20Sopenharmony_ci if (!IS_ROOT(alias)) 37038c2ecf20Sopenharmony_ci dn = dget(alias); 37048c2ecf20Sopenharmony_ci goto out_unlock; 37058c2ecf20Sopenharmony_ci } 37068c2ecf20Sopenharmony_ci 37078c2ecf20Sopenharmony_ci hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) { 37088c2ecf20Sopenharmony_ci spin_lock(&alias->d_lock); 37098c2ecf20Sopenharmony_ci if (!d_unhashed(alias) && 37108c2ecf20Sopenharmony_ci (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) { 37118c2ecf20Sopenharmony_ci dn = dget_dlock(alias); 37128c2ecf20Sopenharmony_ci } 37138c2ecf20Sopenharmony_ci spin_unlock(&alias->d_lock); 37148c2ecf20Sopenharmony_ci if (dn) 37158c2ecf20Sopenharmony_ci break; 37168c2ecf20Sopenharmony_ci } 37178c2ecf20Sopenharmony_ciout_unlock: 37188c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 37198c2ecf20Sopenharmony_ci return dn; 37208c2ecf20Sopenharmony_ci} 37218c2ecf20Sopenharmony_ci 37228c2ecf20Sopenharmony_ci/* 37238c2ecf20Sopenharmony_ci * Encode information about a cap for a reconnect with the MDS. 37248c2ecf20Sopenharmony_ci */ 37258c2ecf20Sopenharmony_cistatic int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, 37268c2ecf20Sopenharmony_ci void *arg) 37278c2ecf20Sopenharmony_ci{ 37288c2ecf20Sopenharmony_ci union { 37298c2ecf20Sopenharmony_ci struct ceph_mds_cap_reconnect v2; 37308c2ecf20Sopenharmony_ci struct ceph_mds_cap_reconnect_v1 v1; 37318c2ecf20Sopenharmony_ci } rec; 37328c2ecf20Sopenharmony_ci struct ceph_inode_info *ci = cap->ci; 37338c2ecf20Sopenharmony_ci struct ceph_reconnect_state *recon_state = arg; 37348c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist = recon_state->pagelist; 37358c2ecf20Sopenharmony_ci struct dentry *dentry; 37368c2ecf20Sopenharmony_ci char *path; 37378c2ecf20Sopenharmony_ci int pathlen = 0, err; 37388c2ecf20Sopenharmony_ci u64 pathbase; 37398c2ecf20Sopenharmony_ci u64 snap_follows; 37408c2ecf20Sopenharmony_ci 37418c2ecf20Sopenharmony_ci dout(" adding %p ino %llx.%llx cap %p %lld %s\n", 37428c2ecf20Sopenharmony_ci inode, ceph_vinop(inode), cap, cap->cap_id, 37438c2ecf20Sopenharmony_ci ceph_cap_string(cap->issued)); 37448c2ecf20Sopenharmony_ci 37458c2ecf20Sopenharmony_ci dentry = d_find_primary(inode); 37468c2ecf20Sopenharmony_ci if (dentry) { 37478c2ecf20Sopenharmony_ci /* set pathbase to parent dir when msg_version >= 2 */ 37488c2ecf20Sopenharmony_ci path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, 37498c2ecf20Sopenharmony_ci recon_state->msg_version >= 2); 37508c2ecf20Sopenharmony_ci dput(dentry); 37518c2ecf20Sopenharmony_ci if (IS_ERR(path)) { 37528c2ecf20Sopenharmony_ci err = PTR_ERR(path); 37538c2ecf20Sopenharmony_ci goto out_err; 37548c2ecf20Sopenharmony_ci } 37558c2ecf20Sopenharmony_ci } else { 37568c2ecf20Sopenharmony_ci path = NULL; 37578c2ecf20Sopenharmony_ci pathbase = 0; 37588c2ecf20Sopenharmony_ci } 37598c2ecf20Sopenharmony_ci 37608c2ecf20Sopenharmony_ci spin_lock(&ci->i_ceph_lock); 37618c2ecf20Sopenharmony_ci cap->seq = 0; /* reset cap seq */ 37628c2ecf20Sopenharmony_ci cap->issue_seq = 0; /* and issue_seq */ 37638c2ecf20Sopenharmony_ci cap->mseq = 0; /* and migrate_seq */ 37648c2ecf20Sopenharmony_ci cap->cap_gen = cap->session->s_cap_gen; 37658c2ecf20Sopenharmony_ci 37668c2ecf20Sopenharmony_ci /* These are lost when the session goes away */ 37678c2ecf20Sopenharmony_ci if (S_ISDIR(inode->i_mode)) { 37688c2ecf20Sopenharmony_ci if (cap->issued & CEPH_CAP_DIR_CREATE) { 37698c2ecf20Sopenharmony_ci ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns)); 37708c2ecf20Sopenharmony_ci memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout)); 37718c2ecf20Sopenharmony_ci } 37728c2ecf20Sopenharmony_ci cap->issued &= ~CEPH_CAP_ANY_DIR_OPS; 37738c2ecf20Sopenharmony_ci } 37748c2ecf20Sopenharmony_ci 37758c2ecf20Sopenharmony_ci if (recon_state->msg_version >= 2) { 37768c2ecf20Sopenharmony_ci rec.v2.cap_id = cpu_to_le64(cap->cap_id); 37778c2ecf20Sopenharmony_ci rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); 37788c2ecf20Sopenharmony_ci rec.v2.issued = cpu_to_le32(cap->issued); 37798c2ecf20Sopenharmony_ci rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); 37808c2ecf20Sopenharmony_ci rec.v2.pathbase = cpu_to_le64(pathbase); 37818c2ecf20Sopenharmony_ci rec.v2.flock_len = (__force __le32) 37828c2ecf20Sopenharmony_ci ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); 37838c2ecf20Sopenharmony_ci } else { 37848c2ecf20Sopenharmony_ci rec.v1.cap_id = cpu_to_le64(cap->cap_id); 37858c2ecf20Sopenharmony_ci rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); 37868c2ecf20Sopenharmony_ci rec.v1.issued = cpu_to_le32(cap->issued); 37878c2ecf20Sopenharmony_ci rec.v1.size = cpu_to_le64(inode->i_size); 37888c2ecf20Sopenharmony_ci ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); 37898c2ecf20Sopenharmony_ci ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); 37908c2ecf20Sopenharmony_ci rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); 37918c2ecf20Sopenharmony_ci rec.v1.pathbase = cpu_to_le64(pathbase); 37928c2ecf20Sopenharmony_ci } 37938c2ecf20Sopenharmony_ci 37948c2ecf20Sopenharmony_ci if (list_empty(&ci->i_cap_snaps)) { 37958c2ecf20Sopenharmony_ci snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0; 37968c2ecf20Sopenharmony_ci } else { 37978c2ecf20Sopenharmony_ci struct ceph_cap_snap *capsnap = 37988c2ecf20Sopenharmony_ci list_first_entry(&ci->i_cap_snaps, 37998c2ecf20Sopenharmony_ci struct ceph_cap_snap, ci_item); 38008c2ecf20Sopenharmony_ci snap_follows = capsnap->follows; 38018c2ecf20Sopenharmony_ci } 38028c2ecf20Sopenharmony_ci spin_unlock(&ci->i_ceph_lock); 38038c2ecf20Sopenharmony_ci 38048c2ecf20Sopenharmony_ci if (recon_state->msg_version >= 2) { 38058c2ecf20Sopenharmony_ci int num_fcntl_locks, num_flock_locks; 38068c2ecf20Sopenharmony_ci struct ceph_filelock *flocks = NULL; 38078c2ecf20Sopenharmony_ci size_t struct_len, total_len = sizeof(u64); 38088c2ecf20Sopenharmony_ci u8 struct_v = 0; 38098c2ecf20Sopenharmony_ci 38108c2ecf20Sopenharmony_ciencode_again: 38118c2ecf20Sopenharmony_ci if (rec.v2.flock_len) { 38128c2ecf20Sopenharmony_ci ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); 38138c2ecf20Sopenharmony_ci } else { 38148c2ecf20Sopenharmony_ci num_fcntl_locks = 0; 38158c2ecf20Sopenharmony_ci num_flock_locks = 0; 38168c2ecf20Sopenharmony_ci } 38178c2ecf20Sopenharmony_ci if (num_fcntl_locks + num_flock_locks > 0) { 38188c2ecf20Sopenharmony_ci flocks = kmalloc_array(num_fcntl_locks + num_flock_locks, 38198c2ecf20Sopenharmony_ci sizeof(struct ceph_filelock), 38208c2ecf20Sopenharmony_ci GFP_NOFS); 38218c2ecf20Sopenharmony_ci if (!flocks) { 38228c2ecf20Sopenharmony_ci err = -ENOMEM; 38238c2ecf20Sopenharmony_ci goto out_err; 38248c2ecf20Sopenharmony_ci } 38258c2ecf20Sopenharmony_ci err = ceph_encode_locks_to_buffer(inode, flocks, 38268c2ecf20Sopenharmony_ci num_fcntl_locks, 38278c2ecf20Sopenharmony_ci num_flock_locks); 38288c2ecf20Sopenharmony_ci if (err) { 38298c2ecf20Sopenharmony_ci kfree(flocks); 38308c2ecf20Sopenharmony_ci flocks = NULL; 38318c2ecf20Sopenharmony_ci if (err == -ENOSPC) 38328c2ecf20Sopenharmony_ci goto encode_again; 38338c2ecf20Sopenharmony_ci goto out_err; 38348c2ecf20Sopenharmony_ci } 38358c2ecf20Sopenharmony_ci } else { 38368c2ecf20Sopenharmony_ci kfree(flocks); 38378c2ecf20Sopenharmony_ci flocks = NULL; 38388c2ecf20Sopenharmony_ci } 38398c2ecf20Sopenharmony_ci 38408c2ecf20Sopenharmony_ci if (recon_state->msg_version >= 3) { 38418c2ecf20Sopenharmony_ci /* version, compat_version and struct_len */ 38428c2ecf20Sopenharmony_ci total_len += 2 * sizeof(u8) + sizeof(u32); 38438c2ecf20Sopenharmony_ci struct_v = 2; 38448c2ecf20Sopenharmony_ci } 38458c2ecf20Sopenharmony_ci /* 38468c2ecf20Sopenharmony_ci * number of encoded locks is stable, so copy to pagelist 38478c2ecf20Sopenharmony_ci */ 38488c2ecf20Sopenharmony_ci struct_len = 2 * sizeof(u32) + 38498c2ecf20Sopenharmony_ci (num_fcntl_locks + num_flock_locks) * 38508c2ecf20Sopenharmony_ci sizeof(struct ceph_filelock); 38518c2ecf20Sopenharmony_ci rec.v2.flock_len = cpu_to_le32(struct_len); 38528c2ecf20Sopenharmony_ci 38538c2ecf20Sopenharmony_ci struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); 38548c2ecf20Sopenharmony_ci 38558c2ecf20Sopenharmony_ci if (struct_v >= 2) 38568c2ecf20Sopenharmony_ci struct_len += sizeof(u64); /* snap_follows */ 38578c2ecf20Sopenharmony_ci 38588c2ecf20Sopenharmony_ci total_len += struct_len; 38598c2ecf20Sopenharmony_ci 38608c2ecf20Sopenharmony_ci if (pagelist->length + total_len > RECONNECT_MAX_SIZE) { 38618c2ecf20Sopenharmony_ci err = send_reconnect_partial(recon_state); 38628c2ecf20Sopenharmony_ci if (err) 38638c2ecf20Sopenharmony_ci goto out_freeflocks; 38648c2ecf20Sopenharmony_ci pagelist = recon_state->pagelist; 38658c2ecf20Sopenharmony_ci } 38668c2ecf20Sopenharmony_ci 38678c2ecf20Sopenharmony_ci err = ceph_pagelist_reserve(pagelist, total_len); 38688c2ecf20Sopenharmony_ci if (err) 38698c2ecf20Sopenharmony_ci goto out_freeflocks; 38708c2ecf20Sopenharmony_ci 38718c2ecf20Sopenharmony_ci ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); 38728c2ecf20Sopenharmony_ci if (recon_state->msg_version >= 3) { 38738c2ecf20Sopenharmony_ci ceph_pagelist_encode_8(pagelist, struct_v); 38748c2ecf20Sopenharmony_ci ceph_pagelist_encode_8(pagelist, 1); 38758c2ecf20Sopenharmony_ci ceph_pagelist_encode_32(pagelist, struct_len); 38768c2ecf20Sopenharmony_ci } 38778c2ecf20Sopenharmony_ci ceph_pagelist_encode_string(pagelist, path, pathlen); 38788c2ecf20Sopenharmony_ci ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); 38798c2ecf20Sopenharmony_ci ceph_locks_to_pagelist(flocks, pagelist, 38808c2ecf20Sopenharmony_ci num_fcntl_locks, num_flock_locks); 38818c2ecf20Sopenharmony_ci if (struct_v >= 2) 38828c2ecf20Sopenharmony_ci ceph_pagelist_encode_64(pagelist, snap_follows); 38838c2ecf20Sopenharmony_ciout_freeflocks: 38848c2ecf20Sopenharmony_ci kfree(flocks); 38858c2ecf20Sopenharmony_ci } else { 38868c2ecf20Sopenharmony_ci err = ceph_pagelist_reserve(pagelist, 38878c2ecf20Sopenharmony_ci sizeof(u64) + sizeof(u32) + 38888c2ecf20Sopenharmony_ci pathlen + sizeof(rec.v1)); 38898c2ecf20Sopenharmony_ci if (err) 38908c2ecf20Sopenharmony_ci goto out_err; 38918c2ecf20Sopenharmony_ci 38928c2ecf20Sopenharmony_ci ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); 38938c2ecf20Sopenharmony_ci ceph_pagelist_encode_string(pagelist, path, pathlen); 38948c2ecf20Sopenharmony_ci ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); 38958c2ecf20Sopenharmony_ci } 38968c2ecf20Sopenharmony_ci 38978c2ecf20Sopenharmony_ciout_err: 38988c2ecf20Sopenharmony_ci ceph_mdsc_free_path(path, pathlen); 38998c2ecf20Sopenharmony_ci if (!err) 39008c2ecf20Sopenharmony_ci recon_state->nr_caps++; 39018c2ecf20Sopenharmony_ci return err; 39028c2ecf20Sopenharmony_ci} 39038c2ecf20Sopenharmony_ci 39048c2ecf20Sopenharmony_cistatic int encode_snap_realms(struct ceph_mds_client *mdsc, 39058c2ecf20Sopenharmony_ci struct ceph_reconnect_state *recon_state) 39068c2ecf20Sopenharmony_ci{ 39078c2ecf20Sopenharmony_ci struct rb_node *p; 39088c2ecf20Sopenharmony_ci struct ceph_pagelist *pagelist = recon_state->pagelist; 39098c2ecf20Sopenharmony_ci int err = 0; 39108c2ecf20Sopenharmony_ci 39118c2ecf20Sopenharmony_ci if (recon_state->msg_version >= 4) { 39128c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_32(pagelist, mdsc->num_snap_realms); 39138c2ecf20Sopenharmony_ci if (err < 0) 39148c2ecf20Sopenharmony_ci goto fail; 39158c2ecf20Sopenharmony_ci } 39168c2ecf20Sopenharmony_ci 39178c2ecf20Sopenharmony_ci /* 39188c2ecf20Sopenharmony_ci * snaprealms. we provide mds with the ino, seq (version), and 39198c2ecf20Sopenharmony_ci * parent for all of our realms. If the mds has any newer info, 39208c2ecf20Sopenharmony_ci * it will tell us. 39218c2ecf20Sopenharmony_ci */ 39228c2ecf20Sopenharmony_ci for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) { 39238c2ecf20Sopenharmony_ci struct ceph_snap_realm *realm = 39248c2ecf20Sopenharmony_ci rb_entry(p, struct ceph_snap_realm, node); 39258c2ecf20Sopenharmony_ci struct ceph_mds_snaprealm_reconnect sr_rec; 39268c2ecf20Sopenharmony_ci 39278c2ecf20Sopenharmony_ci if (recon_state->msg_version >= 4) { 39288c2ecf20Sopenharmony_ci size_t need = sizeof(u8) * 2 + sizeof(u32) + 39298c2ecf20Sopenharmony_ci sizeof(sr_rec); 39308c2ecf20Sopenharmony_ci 39318c2ecf20Sopenharmony_ci if (pagelist->length + need > RECONNECT_MAX_SIZE) { 39328c2ecf20Sopenharmony_ci err = send_reconnect_partial(recon_state); 39338c2ecf20Sopenharmony_ci if (err) 39348c2ecf20Sopenharmony_ci goto fail; 39358c2ecf20Sopenharmony_ci pagelist = recon_state->pagelist; 39368c2ecf20Sopenharmony_ci } 39378c2ecf20Sopenharmony_ci 39388c2ecf20Sopenharmony_ci err = ceph_pagelist_reserve(pagelist, need); 39398c2ecf20Sopenharmony_ci if (err) 39408c2ecf20Sopenharmony_ci goto fail; 39418c2ecf20Sopenharmony_ci 39428c2ecf20Sopenharmony_ci ceph_pagelist_encode_8(pagelist, 1); 39438c2ecf20Sopenharmony_ci ceph_pagelist_encode_8(pagelist, 1); 39448c2ecf20Sopenharmony_ci ceph_pagelist_encode_32(pagelist, sizeof(sr_rec)); 39458c2ecf20Sopenharmony_ci } 39468c2ecf20Sopenharmony_ci 39478c2ecf20Sopenharmony_ci dout(" adding snap realm %llx seq %lld parent %llx\n", 39488c2ecf20Sopenharmony_ci realm->ino, realm->seq, realm->parent_ino); 39498c2ecf20Sopenharmony_ci sr_rec.ino = cpu_to_le64(realm->ino); 39508c2ecf20Sopenharmony_ci sr_rec.seq = cpu_to_le64(realm->seq); 39518c2ecf20Sopenharmony_ci sr_rec.parent = cpu_to_le64(realm->parent_ino); 39528c2ecf20Sopenharmony_ci 39538c2ecf20Sopenharmony_ci err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec)); 39548c2ecf20Sopenharmony_ci if (err) 39558c2ecf20Sopenharmony_ci goto fail; 39568c2ecf20Sopenharmony_ci 39578c2ecf20Sopenharmony_ci recon_state->nr_realms++; 39588c2ecf20Sopenharmony_ci } 39598c2ecf20Sopenharmony_cifail: 39608c2ecf20Sopenharmony_ci return err; 39618c2ecf20Sopenharmony_ci} 39628c2ecf20Sopenharmony_ci 39638c2ecf20Sopenharmony_ci 39648c2ecf20Sopenharmony_ci/* 39658c2ecf20Sopenharmony_ci * If an MDS fails and recovers, clients need to reconnect in order to 39668c2ecf20Sopenharmony_ci * reestablish shared state. This includes all caps issued through 39678c2ecf20Sopenharmony_ci * this session _and_ the snap_realm hierarchy. Because it's not 39688c2ecf20Sopenharmony_ci * clear which snap realms the mds cares about, we send everything we 39698c2ecf20Sopenharmony_ci * know about.. that ensures we'll then get any new info the 39708c2ecf20Sopenharmony_ci * recovering MDS might have. 39718c2ecf20Sopenharmony_ci * 39728c2ecf20Sopenharmony_ci * This is a relatively heavyweight operation, but it's rare. 39738c2ecf20Sopenharmony_ci */ 39748c2ecf20Sopenharmony_cistatic void send_mds_reconnect(struct ceph_mds_client *mdsc, 39758c2ecf20Sopenharmony_ci struct ceph_mds_session *session) 39768c2ecf20Sopenharmony_ci{ 39778c2ecf20Sopenharmony_ci struct ceph_msg *reply; 39788c2ecf20Sopenharmony_ci int mds = session->s_mds; 39798c2ecf20Sopenharmony_ci int err = -ENOMEM; 39808c2ecf20Sopenharmony_ci struct ceph_reconnect_state recon_state = { 39818c2ecf20Sopenharmony_ci .session = session, 39828c2ecf20Sopenharmony_ci }; 39838c2ecf20Sopenharmony_ci LIST_HEAD(dispose); 39848c2ecf20Sopenharmony_ci 39858c2ecf20Sopenharmony_ci pr_info("mds%d reconnect start\n", mds); 39868c2ecf20Sopenharmony_ci 39878c2ecf20Sopenharmony_ci recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS); 39888c2ecf20Sopenharmony_ci if (!recon_state.pagelist) 39898c2ecf20Sopenharmony_ci goto fail_nopagelist; 39908c2ecf20Sopenharmony_ci 39918c2ecf20Sopenharmony_ci reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false); 39928c2ecf20Sopenharmony_ci if (!reply) 39938c2ecf20Sopenharmony_ci goto fail_nomsg; 39948c2ecf20Sopenharmony_ci 39958c2ecf20Sopenharmony_ci xa_destroy(&session->s_delegated_inos); 39968c2ecf20Sopenharmony_ci 39978c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 39988c2ecf20Sopenharmony_ci session->s_state = CEPH_MDS_SESSION_RECONNECTING; 39998c2ecf20Sopenharmony_ci session->s_seq = 0; 40008c2ecf20Sopenharmony_ci 40018c2ecf20Sopenharmony_ci dout("session %p state %s\n", session, 40028c2ecf20Sopenharmony_ci ceph_session_state_name(session->s_state)); 40038c2ecf20Sopenharmony_ci 40048c2ecf20Sopenharmony_ci spin_lock(&session->s_gen_ttl_lock); 40058c2ecf20Sopenharmony_ci session->s_cap_gen++; 40068c2ecf20Sopenharmony_ci spin_unlock(&session->s_gen_ttl_lock); 40078c2ecf20Sopenharmony_ci 40088c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 40098c2ecf20Sopenharmony_ci /* don't know if session is readonly */ 40108c2ecf20Sopenharmony_ci session->s_readonly = 0; 40118c2ecf20Sopenharmony_ci /* 40128c2ecf20Sopenharmony_ci * notify __ceph_remove_cap() that we are composing cap reconnect. 40138c2ecf20Sopenharmony_ci * If a cap get released before being added to the cap reconnect, 40148c2ecf20Sopenharmony_ci * __ceph_remove_cap() should skip queuing cap release. 40158c2ecf20Sopenharmony_ci */ 40168c2ecf20Sopenharmony_ci session->s_cap_reconnect = 1; 40178c2ecf20Sopenharmony_ci /* drop old cap expires; we're about to reestablish that state */ 40188c2ecf20Sopenharmony_ci detach_cap_releases(session, &dispose); 40198c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 40208c2ecf20Sopenharmony_ci dispose_cap_releases(mdsc, &dispose); 40218c2ecf20Sopenharmony_ci 40228c2ecf20Sopenharmony_ci /* trim unused caps to reduce MDS's cache rejoin time */ 40238c2ecf20Sopenharmony_ci if (mdsc->fsc->sb->s_root) 40248c2ecf20Sopenharmony_ci shrink_dcache_parent(mdsc->fsc->sb->s_root); 40258c2ecf20Sopenharmony_ci 40268c2ecf20Sopenharmony_ci ceph_con_close(&session->s_con); 40278c2ecf20Sopenharmony_ci ceph_con_open(&session->s_con, 40288c2ecf20Sopenharmony_ci CEPH_ENTITY_TYPE_MDS, mds, 40298c2ecf20Sopenharmony_ci ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); 40308c2ecf20Sopenharmony_ci 40318c2ecf20Sopenharmony_ci /* replay unsafe requests */ 40328c2ecf20Sopenharmony_ci replay_unsafe_requests(mdsc, session); 40338c2ecf20Sopenharmony_ci 40348c2ecf20Sopenharmony_ci ceph_early_kick_flushing_caps(mdsc, session); 40358c2ecf20Sopenharmony_ci 40368c2ecf20Sopenharmony_ci down_read(&mdsc->snap_rwsem); 40378c2ecf20Sopenharmony_ci 40388c2ecf20Sopenharmony_ci /* placeholder for nr_caps */ 40398c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_32(recon_state.pagelist, 0); 40408c2ecf20Sopenharmony_ci if (err) 40418c2ecf20Sopenharmony_ci goto fail; 40428c2ecf20Sopenharmony_ci 40438c2ecf20Sopenharmony_ci if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) { 40448c2ecf20Sopenharmony_ci recon_state.msg_version = 3; 40458c2ecf20Sopenharmony_ci recon_state.allow_multi = true; 40468c2ecf20Sopenharmony_ci } else if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) { 40478c2ecf20Sopenharmony_ci recon_state.msg_version = 3; 40488c2ecf20Sopenharmony_ci } else { 40498c2ecf20Sopenharmony_ci recon_state.msg_version = 2; 40508c2ecf20Sopenharmony_ci } 40518c2ecf20Sopenharmony_ci /* trsaverse this session's caps */ 40528c2ecf20Sopenharmony_ci err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state); 40538c2ecf20Sopenharmony_ci 40548c2ecf20Sopenharmony_ci spin_lock(&session->s_cap_lock); 40558c2ecf20Sopenharmony_ci session->s_cap_reconnect = 0; 40568c2ecf20Sopenharmony_ci spin_unlock(&session->s_cap_lock); 40578c2ecf20Sopenharmony_ci 40588c2ecf20Sopenharmony_ci if (err < 0) 40598c2ecf20Sopenharmony_ci goto fail; 40608c2ecf20Sopenharmony_ci 40618c2ecf20Sopenharmony_ci /* check if all realms can be encoded into current message */ 40628c2ecf20Sopenharmony_ci if (mdsc->num_snap_realms) { 40638c2ecf20Sopenharmony_ci size_t total_len = 40648c2ecf20Sopenharmony_ci recon_state.pagelist->length + 40658c2ecf20Sopenharmony_ci mdsc->num_snap_realms * 40668c2ecf20Sopenharmony_ci sizeof(struct ceph_mds_snaprealm_reconnect); 40678c2ecf20Sopenharmony_ci if (recon_state.msg_version >= 4) { 40688c2ecf20Sopenharmony_ci /* number of realms */ 40698c2ecf20Sopenharmony_ci total_len += sizeof(u32); 40708c2ecf20Sopenharmony_ci /* version, compat_version and struct_len */ 40718c2ecf20Sopenharmony_ci total_len += mdsc->num_snap_realms * 40728c2ecf20Sopenharmony_ci (2 * sizeof(u8) + sizeof(u32)); 40738c2ecf20Sopenharmony_ci } 40748c2ecf20Sopenharmony_ci if (total_len > RECONNECT_MAX_SIZE) { 40758c2ecf20Sopenharmony_ci if (!recon_state.allow_multi) { 40768c2ecf20Sopenharmony_ci err = -ENOSPC; 40778c2ecf20Sopenharmony_ci goto fail; 40788c2ecf20Sopenharmony_ci } 40798c2ecf20Sopenharmony_ci if (recon_state.nr_caps) { 40808c2ecf20Sopenharmony_ci err = send_reconnect_partial(&recon_state); 40818c2ecf20Sopenharmony_ci if (err) 40828c2ecf20Sopenharmony_ci goto fail; 40838c2ecf20Sopenharmony_ci } 40848c2ecf20Sopenharmony_ci recon_state.msg_version = 5; 40858c2ecf20Sopenharmony_ci } 40868c2ecf20Sopenharmony_ci } 40878c2ecf20Sopenharmony_ci 40888c2ecf20Sopenharmony_ci err = encode_snap_realms(mdsc, &recon_state); 40898c2ecf20Sopenharmony_ci if (err < 0) 40908c2ecf20Sopenharmony_ci goto fail; 40918c2ecf20Sopenharmony_ci 40928c2ecf20Sopenharmony_ci if (recon_state.msg_version >= 5) { 40938c2ecf20Sopenharmony_ci err = ceph_pagelist_encode_8(recon_state.pagelist, 0); 40948c2ecf20Sopenharmony_ci if (err < 0) 40958c2ecf20Sopenharmony_ci goto fail; 40968c2ecf20Sopenharmony_ci } 40978c2ecf20Sopenharmony_ci 40988c2ecf20Sopenharmony_ci if (recon_state.nr_caps || recon_state.nr_realms) { 40998c2ecf20Sopenharmony_ci struct page *page = 41008c2ecf20Sopenharmony_ci list_first_entry(&recon_state.pagelist->head, 41018c2ecf20Sopenharmony_ci struct page, lru); 41028c2ecf20Sopenharmony_ci __le32 *addr = kmap_atomic(page); 41038c2ecf20Sopenharmony_ci if (recon_state.nr_caps) { 41048c2ecf20Sopenharmony_ci WARN_ON(recon_state.nr_realms != mdsc->num_snap_realms); 41058c2ecf20Sopenharmony_ci *addr = cpu_to_le32(recon_state.nr_caps); 41068c2ecf20Sopenharmony_ci } else if (recon_state.msg_version >= 4) { 41078c2ecf20Sopenharmony_ci *(addr + 1) = cpu_to_le32(recon_state.nr_realms); 41088c2ecf20Sopenharmony_ci } 41098c2ecf20Sopenharmony_ci kunmap_atomic(addr); 41108c2ecf20Sopenharmony_ci } 41118c2ecf20Sopenharmony_ci 41128c2ecf20Sopenharmony_ci reply->hdr.version = cpu_to_le16(recon_state.msg_version); 41138c2ecf20Sopenharmony_ci if (recon_state.msg_version >= 4) 41148c2ecf20Sopenharmony_ci reply->hdr.compat_version = cpu_to_le16(4); 41158c2ecf20Sopenharmony_ci 41168c2ecf20Sopenharmony_ci reply->hdr.data_len = cpu_to_le32(recon_state.pagelist->length); 41178c2ecf20Sopenharmony_ci ceph_msg_data_add_pagelist(reply, recon_state.pagelist); 41188c2ecf20Sopenharmony_ci 41198c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, reply); 41208c2ecf20Sopenharmony_ci 41218c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 41228c2ecf20Sopenharmony_ci 41238c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 41248c2ecf20Sopenharmony_ci __wake_requests(mdsc, &session->s_waiting); 41258c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 41268c2ecf20Sopenharmony_ci 41278c2ecf20Sopenharmony_ci up_read(&mdsc->snap_rwsem); 41288c2ecf20Sopenharmony_ci ceph_pagelist_release(recon_state.pagelist); 41298c2ecf20Sopenharmony_ci return; 41308c2ecf20Sopenharmony_ci 41318c2ecf20Sopenharmony_cifail: 41328c2ecf20Sopenharmony_ci ceph_msg_put(reply); 41338c2ecf20Sopenharmony_ci up_read(&mdsc->snap_rwsem); 41348c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 41358c2ecf20Sopenharmony_cifail_nomsg: 41368c2ecf20Sopenharmony_ci ceph_pagelist_release(recon_state.pagelist); 41378c2ecf20Sopenharmony_cifail_nopagelist: 41388c2ecf20Sopenharmony_ci pr_err("error %d preparing reconnect for mds%d\n", err, mds); 41398c2ecf20Sopenharmony_ci return; 41408c2ecf20Sopenharmony_ci} 41418c2ecf20Sopenharmony_ci 41428c2ecf20Sopenharmony_ci 41438c2ecf20Sopenharmony_ci/* 41448c2ecf20Sopenharmony_ci * compare old and new mdsmaps, kicking requests 41458c2ecf20Sopenharmony_ci * and closing out old connections as necessary 41468c2ecf20Sopenharmony_ci * 41478c2ecf20Sopenharmony_ci * called under mdsc->mutex. 41488c2ecf20Sopenharmony_ci */ 41498c2ecf20Sopenharmony_cistatic void check_new_map(struct ceph_mds_client *mdsc, 41508c2ecf20Sopenharmony_ci struct ceph_mdsmap *newmap, 41518c2ecf20Sopenharmony_ci struct ceph_mdsmap *oldmap) 41528c2ecf20Sopenharmony_ci{ 41538c2ecf20Sopenharmony_ci int i; 41548c2ecf20Sopenharmony_ci int oldstate, newstate; 41558c2ecf20Sopenharmony_ci struct ceph_mds_session *s; 41568c2ecf20Sopenharmony_ci 41578c2ecf20Sopenharmony_ci dout("check_new_map new %u old %u\n", 41588c2ecf20Sopenharmony_ci newmap->m_epoch, oldmap->m_epoch); 41598c2ecf20Sopenharmony_ci 41608c2ecf20Sopenharmony_ci for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) { 41618c2ecf20Sopenharmony_ci if (!mdsc->sessions[i]) 41628c2ecf20Sopenharmony_ci continue; 41638c2ecf20Sopenharmony_ci s = mdsc->sessions[i]; 41648c2ecf20Sopenharmony_ci oldstate = ceph_mdsmap_get_state(oldmap, i); 41658c2ecf20Sopenharmony_ci newstate = ceph_mdsmap_get_state(newmap, i); 41668c2ecf20Sopenharmony_ci 41678c2ecf20Sopenharmony_ci dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n", 41688c2ecf20Sopenharmony_ci i, ceph_mds_state_name(oldstate), 41698c2ecf20Sopenharmony_ci ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "", 41708c2ecf20Sopenharmony_ci ceph_mds_state_name(newstate), 41718c2ecf20Sopenharmony_ci ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", 41728c2ecf20Sopenharmony_ci ceph_session_state_name(s->s_state)); 41738c2ecf20Sopenharmony_ci 41748c2ecf20Sopenharmony_ci if (i >= newmap->possible_max_rank) { 41758c2ecf20Sopenharmony_ci /* force close session for stopped mds */ 41768c2ecf20Sopenharmony_ci ceph_get_mds_session(s); 41778c2ecf20Sopenharmony_ci __unregister_session(mdsc, s); 41788c2ecf20Sopenharmony_ci __wake_requests(mdsc, &s->s_waiting); 41798c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 41808c2ecf20Sopenharmony_ci 41818c2ecf20Sopenharmony_ci mutex_lock(&s->s_mutex); 41828c2ecf20Sopenharmony_ci cleanup_session_requests(mdsc, s); 41838c2ecf20Sopenharmony_ci remove_session_caps(s); 41848c2ecf20Sopenharmony_ci mutex_unlock(&s->s_mutex); 41858c2ecf20Sopenharmony_ci 41868c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 41878c2ecf20Sopenharmony_ci 41888c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 41898c2ecf20Sopenharmony_ci kick_requests(mdsc, i); 41908c2ecf20Sopenharmony_ci continue; 41918c2ecf20Sopenharmony_ci } 41928c2ecf20Sopenharmony_ci 41938c2ecf20Sopenharmony_ci if (memcmp(ceph_mdsmap_get_addr(oldmap, i), 41948c2ecf20Sopenharmony_ci ceph_mdsmap_get_addr(newmap, i), 41958c2ecf20Sopenharmony_ci sizeof(struct ceph_entity_addr))) { 41968c2ecf20Sopenharmony_ci /* just close it */ 41978c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 41988c2ecf20Sopenharmony_ci mutex_lock(&s->s_mutex); 41998c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 42008c2ecf20Sopenharmony_ci ceph_con_close(&s->s_con); 42018c2ecf20Sopenharmony_ci mutex_unlock(&s->s_mutex); 42028c2ecf20Sopenharmony_ci s->s_state = CEPH_MDS_SESSION_RESTARTING; 42038c2ecf20Sopenharmony_ci } else if (oldstate == newstate) { 42048c2ecf20Sopenharmony_ci continue; /* nothing new with this mds */ 42058c2ecf20Sopenharmony_ci } 42068c2ecf20Sopenharmony_ci 42078c2ecf20Sopenharmony_ci /* 42088c2ecf20Sopenharmony_ci * send reconnect? 42098c2ecf20Sopenharmony_ci */ 42108c2ecf20Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_RESTARTING && 42118c2ecf20Sopenharmony_ci newstate >= CEPH_MDS_STATE_RECONNECT) { 42128c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 42138c2ecf20Sopenharmony_ci send_mds_reconnect(mdsc, s); 42148c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 42158c2ecf20Sopenharmony_ci } 42168c2ecf20Sopenharmony_ci 42178c2ecf20Sopenharmony_ci /* 42188c2ecf20Sopenharmony_ci * kick request on any mds that has gone active. 42198c2ecf20Sopenharmony_ci */ 42208c2ecf20Sopenharmony_ci if (oldstate < CEPH_MDS_STATE_ACTIVE && 42218c2ecf20Sopenharmony_ci newstate >= CEPH_MDS_STATE_ACTIVE) { 42228c2ecf20Sopenharmony_ci if (oldstate != CEPH_MDS_STATE_CREATING && 42238c2ecf20Sopenharmony_ci oldstate != CEPH_MDS_STATE_STARTING) 42248c2ecf20Sopenharmony_ci pr_info("mds%d recovery completed\n", s->s_mds); 42258c2ecf20Sopenharmony_ci kick_requests(mdsc, i); 42268c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 42278c2ecf20Sopenharmony_ci mutex_lock(&s->s_mutex); 42288c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 42298c2ecf20Sopenharmony_ci ceph_kick_flushing_caps(mdsc, s); 42308c2ecf20Sopenharmony_ci mutex_unlock(&s->s_mutex); 42318c2ecf20Sopenharmony_ci wake_up_session_caps(s, RECONNECT); 42328c2ecf20Sopenharmony_ci } 42338c2ecf20Sopenharmony_ci } 42348c2ecf20Sopenharmony_ci 42358c2ecf20Sopenharmony_ci for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) { 42368c2ecf20Sopenharmony_ci s = mdsc->sessions[i]; 42378c2ecf20Sopenharmony_ci if (!s) 42388c2ecf20Sopenharmony_ci continue; 42398c2ecf20Sopenharmony_ci if (!ceph_mdsmap_is_laggy(newmap, i)) 42408c2ecf20Sopenharmony_ci continue; 42418c2ecf20Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_OPEN || 42428c2ecf20Sopenharmony_ci s->s_state == CEPH_MDS_SESSION_HUNG || 42438c2ecf20Sopenharmony_ci s->s_state == CEPH_MDS_SESSION_CLOSING) { 42448c2ecf20Sopenharmony_ci dout(" connecting to export targets of laggy mds%d\n", 42458c2ecf20Sopenharmony_ci i); 42468c2ecf20Sopenharmony_ci __open_export_target_sessions(mdsc, s); 42478c2ecf20Sopenharmony_ci } 42488c2ecf20Sopenharmony_ci } 42498c2ecf20Sopenharmony_ci} 42508c2ecf20Sopenharmony_ci 42518c2ecf20Sopenharmony_ci 42528c2ecf20Sopenharmony_ci 42538c2ecf20Sopenharmony_ci/* 42548c2ecf20Sopenharmony_ci * leases 42558c2ecf20Sopenharmony_ci */ 42568c2ecf20Sopenharmony_ci 42578c2ecf20Sopenharmony_ci/* 42588c2ecf20Sopenharmony_ci * caller must hold session s_mutex, dentry->d_lock 42598c2ecf20Sopenharmony_ci */ 42608c2ecf20Sopenharmony_civoid __ceph_mdsc_drop_dentry_lease(struct dentry *dentry) 42618c2ecf20Sopenharmony_ci{ 42628c2ecf20Sopenharmony_ci struct ceph_dentry_info *di = ceph_dentry(dentry); 42638c2ecf20Sopenharmony_ci 42648c2ecf20Sopenharmony_ci ceph_put_mds_session(di->lease_session); 42658c2ecf20Sopenharmony_ci di->lease_session = NULL; 42668c2ecf20Sopenharmony_ci} 42678c2ecf20Sopenharmony_ci 42688c2ecf20Sopenharmony_cistatic void handle_lease(struct ceph_mds_client *mdsc, 42698c2ecf20Sopenharmony_ci struct ceph_mds_session *session, 42708c2ecf20Sopenharmony_ci struct ceph_msg *msg) 42718c2ecf20Sopenharmony_ci{ 42728c2ecf20Sopenharmony_ci struct super_block *sb = mdsc->fsc->sb; 42738c2ecf20Sopenharmony_ci struct inode *inode; 42748c2ecf20Sopenharmony_ci struct dentry *parent, *dentry; 42758c2ecf20Sopenharmony_ci struct ceph_dentry_info *di; 42768c2ecf20Sopenharmony_ci int mds = session->s_mds; 42778c2ecf20Sopenharmony_ci struct ceph_mds_lease *h = msg->front.iov_base; 42788c2ecf20Sopenharmony_ci u32 seq; 42798c2ecf20Sopenharmony_ci struct ceph_vino vino; 42808c2ecf20Sopenharmony_ci struct qstr dname; 42818c2ecf20Sopenharmony_ci int release = 0; 42828c2ecf20Sopenharmony_ci 42838c2ecf20Sopenharmony_ci dout("handle_lease from mds%d\n", mds); 42848c2ecf20Sopenharmony_ci 42858c2ecf20Sopenharmony_ci /* decode */ 42868c2ecf20Sopenharmony_ci if (msg->front.iov_len < sizeof(*h) + sizeof(u32)) 42878c2ecf20Sopenharmony_ci goto bad; 42888c2ecf20Sopenharmony_ci vino.ino = le64_to_cpu(h->ino); 42898c2ecf20Sopenharmony_ci vino.snap = CEPH_NOSNAP; 42908c2ecf20Sopenharmony_ci seq = le32_to_cpu(h->seq); 42918c2ecf20Sopenharmony_ci dname.len = get_unaligned_le32(h + 1); 42928c2ecf20Sopenharmony_ci if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len) 42938c2ecf20Sopenharmony_ci goto bad; 42948c2ecf20Sopenharmony_ci dname.name = (void *)(h + 1) + sizeof(u32); 42958c2ecf20Sopenharmony_ci 42968c2ecf20Sopenharmony_ci /* lookup inode */ 42978c2ecf20Sopenharmony_ci inode = ceph_find_inode(sb, vino); 42988c2ecf20Sopenharmony_ci dout("handle_lease %s, ino %llx %p %.*s\n", 42998c2ecf20Sopenharmony_ci ceph_lease_op_name(h->action), vino.ino, inode, 43008c2ecf20Sopenharmony_ci dname.len, dname.name); 43018c2ecf20Sopenharmony_ci 43028c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 43038c2ecf20Sopenharmony_ci inc_session_sequence(session); 43048c2ecf20Sopenharmony_ci 43058c2ecf20Sopenharmony_ci if (!inode) { 43068c2ecf20Sopenharmony_ci dout("handle_lease no inode %llx\n", vino.ino); 43078c2ecf20Sopenharmony_ci goto release; 43088c2ecf20Sopenharmony_ci } 43098c2ecf20Sopenharmony_ci 43108c2ecf20Sopenharmony_ci /* dentry */ 43118c2ecf20Sopenharmony_ci parent = d_find_alias(inode); 43128c2ecf20Sopenharmony_ci if (!parent) { 43138c2ecf20Sopenharmony_ci dout("no parent dentry on inode %p\n", inode); 43148c2ecf20Sopenharmony_ci WARN_ON(1); 43158c2ecf20Sopenharmony_ci goto release; /* hrm... */ 43168c2ecf20Sopenharmony_ci } 43178c2ecf20Sopenharmony_ci dname.hash = full_name_hash(parent, dname.name, dname.len); 43188c2ecf20Sopenharmony_ci dentry = d_lookup(parent, &dname); 43198c2ecf20Sopenharmony_ci dput(parent); 43208c2ecf20Sopenharmony_ci if (!dentry) 43218c2ecf20Sopenharmony_ci goto release; 43228c2ecf20Sopenharmony_ci 43238c2ecf20Sopenharmony_ci spin_lock(&dentry->d_lock); 43248c2ecf20Sopenharmony_ci di = ceph_dentry(dentry); 43258c2ecf20Sopenharmony_ci switch (h->action) { 43268c2ecf20Sopenharmony_ci case CEPH_MDS_LEASE_REVOKE: 43278c2ecf20Sopenharmony_ci if (di->lease_session == session) { 43288c2ecf20Sopenharmony_ci if (ceph_seq_cmp(di->lease_seq, seq) > 0) 43298c2ecf20Sopenharmony_ci h->seq = cpu_to_le32(di->lease_seq); 43308c2ecf20Sopenharmony_ci __ceph_mdsc_drop_dentry_lease(dentry); 43318c2ecf20Sopenharmony_ci } 43328c2ecf20Sopenharmony_ci release = 1; 43338c2ecf20Sopenharmony_ci break; 43348c2ecf20Sopenharmony_ci 43358c2ecf20Sopenharmony_ci case CEPH_MDS_LEASE_RENEW: 43368c2ecf20Sopenharmony_ci if (di->lease_session == session && 43378c2ecf20Sopenharmony_ci di->lease_gen == session->s_cap_gen && 43388c2ecf20Sopenharmony_ci di->lease_renew_from && 43398c2ecf20Sopenharmony_ci di->lease_renew_after == 0) { 43408c2ecf20Sopenharmony_ci unsigned long duration = 43418c2ecf20Sopenharmony_ci msecs_to_jiffies(le32_to_cpu(h->duration_ms)); 43428c2ecf20Sopenharmony_ci 43438c2ecf20Sopenharmony_ci di->lease_seq = seq; 43448c2ecf20Sopenharmony_ci di->time = di->lease_renew_from + duration; 43458c2ecf20Sopenharmony_ci di->lease_renew_after = di->lease_renew_from + 43468c2ecf20Sopenharmony_ci (duration >> 1); 43478c2ecf20Sopenharmony_ci di->lease_renew_from = 0; 43488c2ecf20Sopenharmony_ci } 43498c2ecf20Sopenharmony_ci break; 43508c2ecf20Sopenharmony_ci } 43518c2ecf20Sopenharmony_ci spin_unlock(&dentry->d_lock); 43528c2ecf20Sopenharmony_ci dput(dentry); 43538c2ecf20Sopenharmony_ci 43548c2ecf20Sopenharmony_ci if (!release) 43558c2ecf20Sopenharmony_ci goto out; 43568c2ecf20Sopenharmony_ci 43578c2ecf20Sopenharmony_cirelease: 43588c2ecf20Sopenharmony_ci /* let's just reuse the same message */ 43598c2ecf20Sopenharmony_ci h->action = CEPH_MDS_LEASE_REVOKE_ACK; 43608c2ecf20Sopenharmony_ci ceph_msg_get(msg); 43618c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 43628c2ecf20Sopenharmony_ci 43638c2ecf20Sopenharmony_ciout: 43648c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 43658c2ecf20Sopenharmony_ci /* avoid calling iput_final() in mds dispatch threads */ 43668c2ecf20Sopenharmony_ci ceph_async_iput(inode); 43678c2ecf20Sopenharmony_ci return; 43688c2ecf20Sopenharmony_ci 43698c2ecf20Sopenharmony_cibad: 43708c2ecf20Sopenharmony_ci pr_err("corrupt lease message\n"); 43718c2ecf20Sopenharmony_ci ceph_msg_dump(msg); 43728c2ecf20Sopenharmony_ci} 43738c2ecf20Sopenharmony_ci 43748c2ecf20Sopenharmony_civoid ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, 43758c2ecf20Sopenharmony_ci struct dentry *dentry, char action, 43768c2ecf20Sopenharmony_ci u32 seq) 43778c2ecf20Sopenharmony_ci{ 43788c2ecf20Sopenharmony_ci struct ceph_msg *msg; 43798c2ecf20Sopenharmony_ci struct ceph_mds_lease *lease; 43808c2ecf20Sopenharmony_ci struct inode *dir; 43818c2ecf20Sopenharmony_ci int len = sizeof(*lease) + sizeof(u32) + NAME_MAX; 43828c2ecf20Sopenharmony_ci 43838c2ecf20Sopenharmony_ci dout("lease_send_msg identry %p %s to mds%d\n", 43848c2ecf20Sopenharmony_ci dentry, ceph_lease_op_name(action), session->s_mds); 43858c2ecf20Sopenharmony_ci 43868c2ecf20Sopenharmony_ci msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false); 43878c2ecf20Sopenharmony_ci if (!msg) 43888c2ecf20Sopenharmony_ci return; 43898c2ecf20Sopenharmony_ci lease = msg->front.iov_base; 43908c2ecf20Sopenharmony_ci lease->action = action; 43918c2ecf20Sopenharmony_ci lease->seq = cpu_to_le32(seq); 43928c2ecf20Sopenharmony_ci 43938c2ecf20Sopenharmony_ci spin_lock(&dentry->d_lock); 43948c2ecf20Sopenharmony_ci dir = d_inode(dentry->d_parent); 43958c2ecf20Sopenharmony_ci lease->ino = cpu_to_le64(ceph_ino(dir)); 43968c2ecf20Sopenharmony_ci lease->first = lease->last = cpu_to_le64(ceph_snap(dir)); 43978c2ecf20Sopenharmony_ci 43988c2ecf20Sopenharmony_ci put_unaligned_le32(dentry->d_name.len, lease + 1); 43998c2ecf20Sopenharmony_ci memcpy((void *)(lease + 1) + 4, 44008c2ecf20Sopenharmony_ci dentry->d_name.name, dentry->d_name.len); 44018c2ecf20Sopenharmony_ci spin_unlock(&dentry->d_lock); 44028c2ecf20Sopenharmony_ci /* 44038c2ecf20Sopenharmony_ci * if this is a preemptive lease RELEASE, no need to 44048c2ecf20Sopenharmony_ci * flush request stream, since the actual request will 44058c2ecf20Sopenharmony_ci * soon follow. 44068c2ecf20Sopenharmony_ci */ 44078c2ecf20Sopenharmony_ci msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE); 44088c2ecf20Sopenharmony_ci 44098c2ecf20Sopenharmony_ci ceph_con_send(&session->s_con, msg); 44108c2ecf20Sopenharmony_ci} 44118c2ecf20Sopenharmony_ci 44128c2ecf20Sopenharmony_ci/* 44138c2ecf20Sopenharmony_ci * lock unlock the session, to wait ongoing session activities 44148c2ecf20Sopenharmony_ci */ 44158c2ecf20Sopenharmony_cistatic void lock_unlock_session(struct ceph_mds_session *s) 44168c2ecf20Sopenharmony_ci{ 44178c2ecf20Sopenharmony_ci mutex_lock(&s->s_mutex); 44188c2ecf20Sopenharmony_ci mutex_unlock(&s->s_mutex); 44198c2ecf20Sopenharmony_ci} 44208c2ecf20Sopenharmony_ci 44218c2ecf20Sopenharmony_cistatic void maybe_recover_session(struct ceph_mds_client *mdsc) 44228c2ecf20Sopenharmony_ci{ 44238c2ecf20Sopenharmony_ci struct ceph_fs_client *fsc = mdsc->fsc; 44248c2ecf20Sopenharmony_ci 44258c2ecf20Sopenharmony_ci if (!ceph_test_mount_opt(fsc, CLEANRECOVER)) 44268c2ecf20Sopenharmony_ci return; 44278c2ecf20Sopenharmony_ci 44288c2ecf20Sopenharmony_ci if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED) 44298c2ecf20Sopenharmony_ci return; 44308c2ecf20Sopenharmony_ci 44318c2ecf20Sopenharmony_ci if (!READ_ONCE(fsc->blocklisted)) 44328c2ecf20Sopenharmony_ci return; 44338c2ecf20Sopenharmony_ci 44348c2ecf20Sopenharmony_ci if (fsc->last_auto_reconnect && 44358c2ecf20Sopenharmony_ci time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30)) 44368c2ecf20Sopenharmony_ci return; 44378c2ecf20Sopenharmony_ci 44388c2ecf20Sopenharmony_ci pr_info("auto reconnect after blocklisted\n"); 44398c2ecf20Sopenharmony_ci fsc->last_auto_reconnect = jiffies; 44408c2ecf20Sopenharmony_ci ceph_force_reconnect(fsc->sb); 44418c2ecf20Sopenharmony_ci} 44428c2ecf20Sopenharmony_ci 44438c2ecf20Sopenharmony_cibool check_session_state(struct ceph_mds_session *s) 44448c2ecf20Sopenharmony_ci{ 44458c2ecf20Sopenharmony_ci switch (s->s_state) { 44468c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_OPEN: 44478c2ecf20Sopenharmony_ci if (s->s_ttl && time_after(jiffies, s->s_ttl)) { 44488c2ecf20Sopenharmony_ci s->s_state = CEPH_MDS_SESSION_HUNG; 44498c2ecf20Sopenharmony_ci pr_info("mds%d hung\n", s->s_mds); 44508c2ecf20Sopenharmony_ci } 44518c2ecf20Sopenharmony_ci break; 44528c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_CLOSING: 44538c2ecf20Sopenharmony_ci /* Should never reach this when we're unmounting */ 44548c2ecf20Sopenharmony_ci WARN_ON_ONCE(s->s_ttl); 44558c2ecf20Sopenharmony_ci fallthrough; 44568c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_NEW: 44578c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_RESTARTING: 44588c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_CLOSED: 44598c2ecf20Sopenharmony_ci case CEPH_MDS_SESSION_REJECTED: 44608c2ecf20Sopenharmony_ci return false; 44618c2ecf20Sopenharmony_ci } 44628c2ecf20Sopenharmony_ci 44638c2ecf20Sopenharmony_ci return true; 44648c2ecf20Sopenharmony_ci} 44658c2ecf20Sopenharmony_ci 44668c2ecf20Sopenharmony_ci/* 44678c2ecf20Sopenharmony_ci * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply, 44688c2ecf20Sopenharmony_ci * then we need to retransmit that request. 44698c2ecf20Sopenharmony_ci */ 44708c2ecf20Sopenharmony_civoid inc_session_sequence(struct ceph_mds_session *s) 44718c2ecf20Sopenharmony_ci{ 44728c2ecf20Sopenharmony_ci lockdep_assert_held(&s->s_mutex); 44738c2ecf20Sopenharmony_ci 44748c2ecf20Sopenharmony_ci s->s_seq++; 44758c2ecf20Sopenharmony_ci 44768c2ecf20Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_CLOSING) { 44778c2ecf20Sopenharmony_ci int ret; 44788c2ecf20Sopenharmony_ci 44798c2ecf20Sopenharmony_ci dout("resending session close request for mds%d\n", s->s_mds); 44808c2ecf20Sopenharmony_ci ret = request_close_session(s); 44818c2ecf20Sopenharmony_ci if (ret < 0) 44828c2ecf20Sopenharmony_ci pr_err("unable to close session to mds%d: %d\n", 44838c2ecf20Sopenharmony_ci s->s_mds, ret); 44848c2ecf20Sopenharmony_ci } 44858c2ecf20Sopenharmony_ci} 44868c2ecf20Sopenharmony_ci 44878c2ecf20Sopenharmony_ci/* 44888c2ecf20Sopenharmony_ci * delayed work -- periodically trim expired leases, renew caps with mds. If 44898c2ecf20Sopenharmony_ci * the @delay parameter is set to 0 or if it's more than 5 secs, the default 44908c2ecf20Sopenharmony_ci * workqueue delay value of 5 secs will be used. 44918c2ecf20Sopenharmony_ci */ 44928c2ecf20Sopenharmony_cistatic void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay) 44938c2ecf20Sopenharmony_ci{ 44948c2ecf20Sopenharmony_ci unsigned long max_delay = HZ * 5; 44958c2ecf20Sopenharmony_ci 44968c2ecf20Sopenharmony_ci /* 5 secs default delay */ 44978c2ecf20Sopenharmony_ci if (!delay || (delay > max_delay)) 44988c2ecf20Sopenharmony_ci delay = max_delay; 44998c2ecf20Sopenharmony_ci schedule_delayed_work(&mdsc->delayed_work, 45008c2ecf20Sopenharmony_ci round_jiffies_relative(delay)); 45018c2ecf20Sopenharmony_ci} 45028c2ecf20Sopenharmony_ci 45038c2ecf20Sopenharmony_cistatic void delayed_work(struct work_struct *work) 45048c2ecf20Sopenharmony_ci{ 45058c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = 45068c2ecf20Sopenharmony_ci container_of(work, struct ceph_mds_client, delayed_work.work); 45078c2ecf20Sopenharmony_ci unsigned long delay; 45088c2ecf20Sopenharmony_ci int renew_interval; 45098c2ecf20Sopenharmony_ci int renew_caps; 45108c2ecf20Sopenharmony_ci int i; 45118c2ecf20Sopenharmony_ci 45128c2ecf20Sopenharmony_ci dout("mdsc delayed_work\n"); 45138c2ecf20Sopenharmony_ci 45148c2ecf20Sopenharmony_ci if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED) 45158c2ecf20Sopenharmony_ci return; 45168c2ecf20Sopenharmony_ci 45178c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 45188c2ecf20Sopenharmony_ci renew_interval = mdsc->mdsmap->m_session_timeout >> 2; 45198c2ecf20Sopenharmony_ci renew_caps = time_after_eq(jiffies, HZ*renew_interval + 45208c2ecf20Sopenharmony_ci mdsc->last_renew_caps); 45218c2ecf20Sopenharmony_ci if (renew_caps) 45228c2ecf20Sopenharmony_ci mdsc->last_renew_caps = jiffies; 45238c2ecf20Sopenharmony_ci 45248c2ecf20Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 45258c2ecf20Sopenharmony_ci struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i); 45268c2ecf20Sopenharmony_ci if (!s) 45278c2ecf20Sopenharmony_ci continue; 45288c2ecf20Sopenharmony_ci 45298c2ecf20Sopenharmony_ci if (!check_session_state(s)) { 45308c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 45318c2ecf20Sopenharmony_ci continue; 45328c2ecf20Sopenharmony_ci } 45338c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 45348c2ecf20Sopenharmony_ci 45358c2ecf20Sopenharmony_ci mutex_lock(&s->s_mutex); 45368c2ecf20Sopenharmony_ci if (renew_caps) 45378c2ecf20Sopenharmony_ci send_renew_caps(mdsc, s); 45388c2ecf20Sopenharmony_ci else 45398c2ecf20Sopenharmony_ci ceph_con_keepalive(&s->s_con); 45408c2ecf20Sopenharmony_ci if (s->s_state == CEPH_MDS_SESSION_OPEN || 45418c2ecf20Sopenharmony_ci s->s_state == CEPH_MDS_SESSION_HUNG) 45428c2ecf20Sopenharmony_ci ceph_send_cap_releases(mdsc, s); 45438c2ecf20Sopenharmony_ci mutex_unlock(&s->s_mutex); 45448c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 45458c2ecf20Sopenharmony_ci 45468c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 45478c2ecf20Sopenharmony_ci } 45488c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 45498c2ecf20Sopenharmony_ci 45508c2ecf20Sopenharmony_ci delay = ceph_check_delayed_caps(mdsc); 45518c2ecf20Sopenharmony_ci 45528c2ecf20Sopenharmony_ci ceph_queue_cap_reclaim_work(mdsc); 45538c2ecf20Sopenharmony_ci 45548c2ecf20Sopenharmony_ci ceph_trim_snapid_map(mdsc); 45558c2ecf20Sopenharmony_ci 45568c2ecf20Sopenharmony_ci maybe_recover_session(mdsc); 45578c2ecf20Sopenharmony_ci 45588c2ecf20Sopenharmony_ci schedule_delayed(mdsc, delay); 45598c2ecf20Sopenharmony_ci} 45608c2ecf20Sopenharmony_ci 45618c2ecf20Sopenharmony_ciint ceph_mdsc_init(struct ceph_fs_client *fsc) 45628c2ecf20Sopenharmony_ci 45638c2ecf20Sopenharmony_ci{ 45648c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc; 45658c2ecf20Sopenharmony_ci int err; 45668c2ecf20Sopenharmony_ci 45678c2ecf20Sopenharmony_ci mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); 45688c2ecf20Sopenharmony_ci if (!mdsc) 45698c2ecf20Sopenharmony_ci return -ENOMEM; 45708c2ecf20Sopenharmony_ci mdsc->fsc = fsc; 45718c2ecf20Sopenharmony_ci mutex_init(&mdsc->mutex); 45728c2ecf20Sopenharmony_ci mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); 45738c2ecf20Sopenharmony_ci if (!mdsc->mdsmap) { 45748c2ecf20Sopenharmony_ci err = -ENOMEM; 45758c2ecf20Sopenharmony_ci goto err_mdsc; 45768c2ecf20Sopenharmony_ci } 45778c2ecf20Sopenharmony_ci 45788c2ecf20Sopenharmony_ci init_completion(&mdsc->safe_umount_waiters); 45798c2ecf20Sopenharmony_ci init_waitqueue_head(&mdsc->session_close_wq); 45808c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->waiting_for_map); 45818c2ecf20Sopenharmony_ci mdsc->sessions = NULL; 45828c2ecf20Sopenharmony_ci atomic_set(&mdsc->num_sessions, 0); 45838c2ecf20Sopenharmony_ci mdsc->max_sessions = 0; 45848c2ecf20Sopenharmony_ci mdsc->stopping = 0; 45858c2ecf20Sopenharmony_ci atomic64_set(&mdsc->quotarealms_count, 0); 45868c2ecf20Sopenharmony_ci mdsc->quotarealms_inodes = RB_ROOT; 45878c2ecf20Sopenharmony_ci mutex_init(&mdsc->quotarealms_inodes_mutex); 45888c2ecf20Sopenharmony_ci mdsc->last_snap_seq = 0; 45898c2ecf20Sopenharmony_ci init_rwsem(&mdsc->snap_rwsem); 45908c2ecf20Sopenharmony_ci mdsc->snap_realms = RB_ROOT; 45918c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->snap_empty); 45928c2ecf20Sopenharmony_ci mdsc->num_snap_realms = 0; 45938c2ecf20Sopenharmony_ci spin_lock_init(&mdsc->snap_empty_lock); 45948c2ecf20Sopenharmony_ci mdsc->last_tid = 0; 45958c2ecf20Sopenharmony_ci mdsc->oldest_tid = 0; 45968c2ecf20Sopenharmony_ci mdsc->request_tree = RB_ROOT; 45978c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); 45988c2ecf20Sopenharmony_ci mdsc->last_renew_caps = jiffies; 45998c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_delay_list); 46008c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_wait_list); 46018c2ecf20Sopenharmony_ci spin_lock_init(&mdsc->cap_delay_lock); 46028c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->snap_flush_list); 46038c2ecf20Sopenharmony_ci spin_lock_init(&mdsc->snap_flush_lock); 46048c2ecf20Sopenharmony_ci mdsc->last_cap_flush_tid = 1; 46058c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_flush_list); 46068c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); 46078c2ecf20Sopenharmony_ci mdsc->num_cap_flushing = 0; 46088c2ecf20Sopenharmony_ci spin_lock_init(&mdsc->cap_dirty_lock); 46098c2ecf20Sopenharmony_ci init_waitqueue_head(&mdsc->cap_flushing_wq); 46108c2ecf20Sopenharmony_ci INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work); 46118c2ecf20Sopenharmony_ci atomic_set(&mdsc->cap_reclaim_pending, 0); 46128c2ecf20Sopenharmony_ci err = ceph_metric_init(&mdsc->metric); 46138c2ecf20Sopenharmony_ci if (err) 46148c2ecf20Sopenharmony_ci goto err_mdsmap; 46158c2ecf20Sopenharmony_ci 46168c2ecf20Sopenharmony_ci spin_lock_init(&mdsc->dentry_list_lock); 46178c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->dentry_leases); 46188c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->dentry_dir_leases); 46198c2ecf20Sopenharmony_ci 46208c2ecf20Sopenharmony_ci ceph_caps_init(mdsc); 46218c2ecf20Sopenharmony_ci ceph_adjust_caps_max_min(mdsc, fsc->mount_options); 46228c2ecf20Sopenharmony_ci 46238c2ecf20Sopenharmony_ci spin_lock_init(&mdsc->snapid_map_lock); 46248c2ecf20Sopenharmony_ci mdsc->snapid_map_tree = RB_ROOT; 46258c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&mdsc->snapid_map_lru); 46268c2ecf20Sopenharmony_ci 46278c2ecf20Sopenharmony_ci init_rwsem(&mdsc->pool_perm_rwsem); 46288c2ecf20Sopenharmony_ci mdsc->pool_perm_tree = RB_ROOT; 46298c2ecf20Sopenharmony_ci 46308c2ecf20Sopenharmony_ci strscpy(mdsc->nodename, utsname()->nodename, 46318c2ecf20Sopenharmony_ci sizeof(mdsc->nodename)); 46328c2ecf20Sopenharmony_ci 46338c2ecf20Sopenharmony_ci fsc->mdsc = mdsc; 46348c2ecf20Sopenharmony_ci return 0; 46358c2ecf20Sopenharmony_ci 46368c2ecf20Sopenharmony_cierr_mdsmap: 46378c2ecf20Sopenharmony_ci kfree(mdsc->mdsmap); 46388c2ecf20Sopenharmony_cierr_mdsc: 46398c2ecf20Sopenharmony_ci kfree(mdsc); 46408c2ecf20Sopenharmony_ci return err; 46418c2ecf20Sopenharmony_ci} 46428c2ecf20Sopenharmony_ci 46438c2ecf20Sopenharmony_ci/* 46448c2ecf20Sopenharmony_ci * Wait for safe replies on open mds requests. If we time out, drop 46458c2ecf20Sopenharmony_ci * all requests from the tree to avoid dangling dentry refs. 46468c2ecf20Sopenharmony_ci */ 46478c2ecf20Sopenharmony_cistatic void wait_requests(struct ceph_mds_client *mdsc) 46488c2ecf20Sopenharmony_ci{ 46498c2ecf20Sopenharmony_ci struct ceph_options *opts = mdsc->fsc->client->options; 46508c2ecf20Sopenharmony_ci struct ceph_mds_request *req; 46518c2ecf20Sopenharmony_ci 46528c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 46538c2ecf20Sopenharmony_ci if (__get_oldest_req(mdsc)) { 46548c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 46558c2ecf20Sopenharmony_ci 46568c2ecf20Sopenharmony_ci dout("wait_requests waiting for requests\n"); 46578c2ecf20Sopenharmony_ci wait_for_completion_timeout(&mdsc->safe_umount_waiters, 46588c2ecf20Sopenharmony_ci ceph_timeout_jiffies(opts->mount_timeout)); 46598c2ecf20Sopenharmony_ci 46608c2ecf20Sopenharmony_ci /* tear down remaining requests */ 46618c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 46628c2ecf20Sopenharmony_ci while ((req = __get_oldest_req(mdsc))) { 46638c2ecf20Sopenharmony_ci dout("wait_requests timed out on tid %llu\n", 46648c2ecf20Sopenharmony_ci req->r_tid); 46658c2ecf20Sopenharmony_ci list_del_init(&req->r_wait); 46668c2ecf20Sopenharmony_ci __unregister_request(mdsc, req); 46678c2ecf20Sopenharmony_ci } 46688c2ecf20Sopenharmony_ci } 46698c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 46708c2ecf20Sopenharmony_ci dout("wait_requests done\n"); 46718c2ecf20Sopenharmony_ci} 46728c2ecf20Sopenharmony_ci 46738c2ecf20Sopenharmony_civoid send_flush_mdlog(struct ceph_mds_session *s) 46748c2ecf20Sopenharmony_ci{ 46758c2ecf20Sopenharmony_ci struct ceph_msg *msg; 46768c2ecf20Sopenharmony_ci 46778c2ecf20Sopenharmony_ci /* 46788c2ecf20Sopenharmony_ci * Pre-luminous MDS crashes when it sees an unknown session request 46798c2ecf20Sopenharmony_ci */ 46808c2ecf20Sopenharmony_ci if (!CEPH_HAVE_FEATURE(s->s_con.peer_features, SERVER_LUMINOUS)) 46818c2ecf20Sopenharmony_ci return; 46828c2ecf20Sopenharmony_ci 46838c2ecf20Sopenharmony_ci mutex_lock(&s->s_mutex); 46848c2ecf20Sopenharmony_ci dout("request mdlog flush to mds%d (%s)s seq %lld\n", s->s_mds, 46858c2ecf20Sopenharmony_ci ceph_session_state_name(s->s_state), s->s_seq); 46868c2ecf20Sopenharmony_ci msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_FLUSH_MDLOG, 46878c2ecf20Sopenharmony_ci s->s_seq); 46888c2ecf20Sopenharmony_ci if (!msg) { 46898c2ecf20Sopenharmony_ci pr_err("failed to request mdlog flush to mds%d (%s) seq %lld\n", 46908c2ecf20Sopenharmony_ci s->s_mds, ceph_session_state_name(s->s_state), s->s_seq); 46918c2ecf20Sopenharmony_ci } else { 46928c2ecf20Sopenharmony_ci ceph_con_send(&s->s_con, msg); 46938c2ecf20Sopenharmony_ci } 46948c2ecf20Sopenharmony_ci mutex_unlock(&s->s_mutex); 46958c2ecf20Sopenharmony_ci} 46968c2ecf20Sopenharmony_ci 46978c2ecf20Sopenharmony_ci/* 46988c2ecf20Sopenharmony_ci * called before mount is ro, and before dentries are torn down. 46998c2ecf20Sopenharmony_ci * (hmm, does this still race with new lookups?) 47008c2ecf20Sopenharmony_ci */ 47018c2ecf20Sopenharmony_civoid ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc) 47028c2ecf20Sopenharmony_ci{ 47038c2ecf20Sopenharmony_ci dout("pre_umount\n"); 47048c2ecf20Sopenharmony_ci mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN; 47058c2ecf20Sopenharmony_ci 47068c2ecf20Sopenharmony_ci ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true); 47078c2ecf20Sopenharmony_ci ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false); 47088c2ecf20Sopenharmony_ci ceph_flush_dirty_caps(mdsc); 47098c2ecf20Sopenharmony_ci wait_requests(mdsc); 47108c2ecf20Sopenharmony_ci 47118c2ecf20Sopenharmony_ci /* 47128c2ecf20Sopenharmony_ci * wait for reply handlers to drop their request refs and 47138c2ecf20Sopenharmony_ci * their inode/dcache refs 47148c2ecf20Sopenharmony_ci */ 47158c2ecf20Sopenharmony_ci ceph_msgr_flush(); 47168c2ecf20Sopenharmony_ci 47178c2ecf20Sopenharmony_ci ceph_cleanup_quotarealms_inodes(mdsc); 47188c2ecf20Sopenharmony_ci} 47198c2ecf20Sopenharmony_ci 47208c2ecf20Sopenharmony_ci/* 47218c2ecf20Sopenharmony_ci * wait for all write mds requests to flush. 47228c2ecf20Sopenharmony_ci */ 47238c2ecf20Sopenharmony_cistatic void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) 47248c2ecf20Sopenharmony_ci{ 47258c2ecf20Sopenharmony_ci struct ceph_mds_request *req = NULL, *nextreq; 47268c2ecf20Sopenharmony_ci struct rb_node *n; 47278c2ecf20Sopenharmony_ci 47288c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 47298c2ecf20Sopenharmony_ci dout("wait_unsafe_requests want %lld\n", want_tid); 47308c2ecf20Sopenharmony_cirestart: 47318c2ecf20Sopenharmony_ci req = __get_oldest_req(mdsc); 47328c2ecf20Sopenharmony_ci while (req && req->r_tid <= want_tid) { 47338c2ecf20Sopenharmony_ci /* find next request */ 47348c2ecf20Sopenharmony_ci n = rb_next(&req->r_node); 47358c2ecf20Sopenharmony_ci if (n) 47368c2ecf20Sopenharmony_ci nextreq = rb_entry(n, struct ceph_mds_request, r_node); 47378c2ecf20Sopenharmony_ci else 47388c2ecf20Sopenharmony_ci nextreq = NULL; 47398c2ecf20Sopenharmony_ci if (req->r_op != CEPH_MDS_OP_SETFILELOCK && 47408c2ecf20Sopenharmony_ci (req->r_op & CEPH_MDS_OP_WRITE)) { 47418c2ecf20Sopenharmony_ci /* write op */ 47428c2ecf20Sopenharmony_ci ceph_mdsc_get_request(req); 47438c2ecf20Sopenharmony_ci if (nextreq) 47448c2ecf20Sopenharmony_ci ceph_mdsc_get_request(nextreq); 47458c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 47468c2ecf20Sopenharmony_ci dout("wait_unsafe_requests wait on %llu (want %llu)\n", 47478c2ecf20Sopenharmony_ci req->r_tid, want_tid); 47488c2ecf20Sopenharmony_ci wait_for_completion(&req->r_safe_completion); 47498c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 47508c2ecf20Sopenharmony_ci ceph_mdsc_put_request(req); 47518c2ecf20Sopenharmony_ci if (!nextreq) 47528c2ecf20Sopenharmony_ci break; /* next dne before, so we're done! */ 47538c2ecf20Sopenharmony_ci if (RB_EMPTY_NODE(&nextreq->r_node)) { 47548c2ecf20Sopenharmony_ci /* next request was removed from tree */ 47558c2ecf20Sopenharmony_ci ceph_mdsc_put_request(nextreq); 47568c2ecf20Sopenharmony_ci goto restart; 47578c2ecf20Sopenharmony_ci } 47588c2ecf20Sopenharmony_ci ceph_mdsc_put_request(nextreq); /* won't go away */ 47598c2ecf20Sopenharmony_ci } 47608c2ecf20Sopenharmony_ci req = nextreq; 47618c2ecf20Sopenharmony_ci } 47628c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 47638c2ecf20Sopenharmony_ci dout("wait_unsafe_requests done\n"); 47648c2ecf20Sopenharmony_ci} 47658c2ecf20Sopenharmony_ci 47668c2ecf20Sopenharmony_civoid ceph_mdsc_sync(struct ceph_mds_client *mdsc) 47678c2ecf20Sopenharmony_ci{ 47688c2ecf20Sopenharmony_ci u64 want_tid, want_flush; 47698c2ecf20Sopenharmony_ci 47708c2ecf20Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) 47718c2ecf20Sopenharmony_ci return; 47728c2ecf20Sopenharmony_ci 47738c2ecf20Sopenharmony_ci dout("sync\n"); 47748c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 47758c2ecf20Sopenharmony_ci want_tid = mdsc->last_tid; 47768c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 47778c2ecf20Sopenharmony_ci 47788c2ecf20Sopenharmony_ci ceph_flush_dirty_caps(mdsc); 47798c2ecf20Sopenharmony_ci spin_lock(&mdsc->cap_dirty_lock); 47808c2ecf20Sopenharmony_ci want_flush = mdsc->last_cap_flush_tid; 47818c2ecf20Sopenharmony_ci if (!list_empty(&mdsc->cap_flush_list)) { 47828c2ecf20Sopenharmony_ci struct ceph_cap_flush *cf = 47838c2ecf20Sopenharmony_ci list_last_entry(&mdsc->cap_flush_list, 47848c2ecf20Sopenharmony_ci struct ceph_cap_flush, g_list); 47858c2ecf20Sopenharmony_ci cf->wake = true; 47868c2ecf20Sopenharmony_ci } 47878c2ecf20Sopenharmony_ci spin_unlock(&mdsc->cap_dirty_lock); 47888c2ecf20Sopenharmony_ci 47898c2ecf20Sopenharmony_ci dout("sync want tid %lld flush_seq %lld\n", 47908c2ecf20Sopenharmony_ci want_tid, want_flush); 47918c2ecf20Sopenharmony_ci 47928c2ecf20Sopenharmony_ci wait_unsafe_requests(mdsc, want_tid); 47938c2ecf20Sopenharmony_ci wait_caps_flush(mdsc, want_flush); 47948c2ecf20Sopenharmony_ci} 47958c2ecf20Sopenharmony_ci 47968c2ecf20Sopenharmony_ci/* 47978c2ecf20Sopenharmony_ci * true if all sessions are closed, or we force unmount 47988c2ecf20Sopenharmony_ci */ 47998c2ecf20Sopenharmony_cistatic bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped) 48008c2ecf20Sopenharmony_ci{ 48018c2ecf20Sopenharmony_ci if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) 48028c2ecf20Sopenharmony_ci return true; 48038c2ecf20Sopenharmony_ci return atomic_read(&mdsc->num_sessions) <= skipped; 48048c2ecf20Sopenharmony_ci} 48058c2ecf20Sopenharmony_ci 48068c2ecf20Sopenharmony_ci/* 48078c2ecf20Sopenharmony_ci * called after sb is ro. 48088c2ecf20Sopenharmony_ci */ 48098c2ecf20Sopenharmony_civoid ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) 48108c2ecf20Sopenharmony_ci{ 48118c2ecf20Sopenharmony_ci struct ceph_options *opts = mdsc->fsc->client->options; 48128c2ecf20Sopenharmony_ci struct ceph_mds_session *session; 48138c2ecf20Sopenharmony_ci int i; 48148c2ecf20Sopenharmony_ci int skipped = 0; 48158c2ecf20Sopenharmony_ci 48168c2ecf20Sopenharmony_ci dout("close_sessions\n"); 48178c2ecf20Sopenharmony_ci 48188c2ecf20Sopenharmony_ci /* close sessions */ 48198c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 48208c2ecf20Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 48218c2ecf20Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, i); 48228c2ecf20Sopenharmony_ci if (!session) 48238c2ecf20Sopenharmony_ci continue; 48248c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 48258c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 48268c2ecf20Sopenharmony_ci if (__close_session(mdsc, session) <= 0) 48278c2ecf20Sopenharmony_ci skipped++; 48288c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 48298c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 48308c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 48318c2ecf20Sopenharmony_ci } 48328c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 48338c2ecf20Sopenharmony_ci 48348c2ecf20Sopenharmony_ci dout("waiting for sessions to close\n"); 48358c2ecf20Sopenharmony_ci wait_event_timeout(mdsc->session_close_wq, 48368c2ecf20Sopenharmony_ci done_closing_sessions(mdsc, skipped), 48378c2ecf20Sopenharmony_ci ceph_timeout_jiffies(opts->mount_timeout)); 48388c2ecf20Sopenharmony_ci 48398c2ecf20Sopenharmony_ci /* tear down remaining sessions */ 48408c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 48418c2ecf20Sopenharmony_ci for (i = 0; i < mdsc->max_sessions; i++) { 48428c2ecf20Sopenharmony_ci if (mdsc->sessions[i]) { 48438c2ecf20Sopenharmony_ci session = ceph_get_mds_session(mdsc->sessions[i]); 48448c2ecf20Sopenharmony_ci __unregister_session(mdsc, session); 48458c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 48468c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 48478c2ecf20Sopenharmony_ci remove_session_caps(session); 48488c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 48498c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 48508c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 48518c2ecf20Sopenharmony_ci } 48528c2ecf20Sopenharmony_ci } 48538c2ecf20Sopenharmony_ci WARN_ON(!list_empty(&mdsc->cap_delay_list)); 48548c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 48558c2ecf20Sopenharmony_ci 48568c2ecf20Sopenharmony_ci ceph_cleanup_snapid_map(mdsc); 48578c2ecf20Sopenharmony_ci ceph_cleanup_empty_realms(mdsc); 48588c2ecf20Sopenharmony_ci 48598c2ecf20Sopenharmony_ci cancel_work_sync(&mdsc->cap_reclaim_work); 48608c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ 48618c2ecf20Sopenharmony_ci 48628c2ecf20Sopenharmony_ci dout("stopped\n"); 48638c2ecf20Sopenharmony_ci} 48648c2ecf20Sopenharmony_ci 48658c2ecf20Sopenharmony_civoid ceph_mdsc_force_umount(struct ceph_mds_client *mdsc) 48668c2ecf20Sopenharmony_ci{ 48678c2ecf20Sopenharmony_ci struct ceph_mds_session *session; 48688c2ecf20Sopenharmony_ci int mds; 48698c2ecf20Sopenharmony_ci 48708c2ecf20Sopenharmony_ci dout("force umount\n"); 48718c2ecf20Sopenharmony_ci 48728c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 48738c2ecf20Sopenharmony_ci for (mds = 0; mds < mdsc->max_sessions; mds++) { 48748c2ecf20Sopenharmony_ci session = __ceph_lookup_mds_session(mdsc, mds); 48758c2ecf20Sopenharmony_ci if (!session) 48768c2ecf20Sopenharmony_ci continue; 48778c2ecf20Sopenharmony_ci 48788c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_REJECTED) 48798c2ecf20Sopenharmony_ci __unregister_session(mdsc, session); 48808c2ecf20Sopenharmony_ci __wake_requests(mdsc, &session->s_waiting); 48818c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 48828c2ecf20Sopenharmony_ci 48838c2ecf20Sopenharmony_ci mutex_lock(&session->s_mutex); 48848c2ecf20Sopenharmony_ci __close_session(mdsc, session); 48858c2ecf20Sopenharmony_ci if (session->s_state == CEPH_MDS_SESSION_CLOSING) { 48868c2ecf20Sopenharmony_ci cleanup_session_requests(mdsc, session); 48878c2ecf20Sopenharmony_ci remove_session_caps(session); 48888c2ecf20Sopenharmony_ci } 48898c2ecf20Sopenharmony_ci mutex_unlock(&session->s_mutex); 48908c2ecf20Sopenharmony_ci ceph_put_mds_session(session); 48918c2ecf20Sopenharmony_ci 48928c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 48938c2ecf20Sopenharmony_ci kick_requests(mdsc, mds); 48948c2ecf20Sopenharmony_ci } 48958c2ecf20Sopenharmony_ci __wake_requests(mdsc, &mdsc->waiting_for_map); 48968c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 48978c2ecf20Sopenharmony_ci} 48988c2ecf20Sopenharmony_ci 48998c2ecf20Sopenharmony_cistatic void ceph_mdsc_stop(struct ceph_mds_client *mdsc) 49008c2ecf20Sopenharmony_ci{ 49018c2ecf20Sopenharmony_ci dout("stop\n"); 49028c2ecf20Sopenharmony_ci /* 49038c2ecf20Sopenharmony_ci * Make sure the delayed work stopped before releasing 49048c2ecf20Sopenharmony_ci * the resources. 49058c2ecf20Sopenharmony_ci * 49068c2ecf20Sopenharmony_ci * Because the cancel_delayed_work_sync() will only 49078c2ecf20Sopenharmony_ci * guarantee that the work finishes executing. But the 49088c2ecf20Sopenharmony_ci * delayed work will re-arm itself again after that. 49098c2ecf20Sopenharmony_ci */ 49108c2ecf20Sopenharmony_ci flush_delayed_work(&mdsc->delayed_work); 49118c2ecf20Sopenharmony_ci 49128c2ecf20Sopenharmony_ci if (mdsc->mdsmap) 49138c2ecf20Sopenharmony_ci ceph_mdsmap_destroy(mdsc->mdsmap); 49148c2ecf20Sopenharmony_ci kfree(mdsc->sessions); 49158c2ecf20Sopenharmony_ci ceph_caps_finalize(mdsc); 49168c2ecf20Sopenharmony_ci ceph_pool_perm_destroy(mdsc); 49178c2ecf20Sopenharmony_ci} 49188c2ecf20Sopenharmony_ci 49198c2ecf20Sopenharmony_civoid ceph_mdsc_destroy(struct ceph_fs_client *fsc) 49208c2ecf20Sopenharmony_ci{ 49218c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = fsc->mdsc; 49228c2ecf20Sopenharmony_ci dout("mdsc_destroy %p\n", mdsc); 49238c2ecf20Sopenharmony_ci 49248c2ecf20Sopenharmony_ci if (!mdsc) 49258c2ecf20Sopenharmony_ci return; 49268c2ecf20Sopenharmony_ci 49278c2ecf20Sopenharmony_ci /* flush out any connection work with references to us */ 49288c2ecf20Sopenharmony_ci ceph_msgr_flush(); 49298c2ecf20Sopenharmony_ci 49308c2ecf20Sopenharmony_ci ceph_mdsc_stop(mdsc); 49318c2ecf20Sopenharmony_ci 49328c2ecf20Sopenharmony_ci ceph_metric_destroy(&mdsc->metric); 49338c2ecf20Sopenharmony_ci 49348c2ecf20Sopenharmony_ci fsc->mdsc = NULL; 49358c2ecf20Sopenharmony_ci kfree(mdsc); 49368c2ecf20Sopenharmony_ci dout("mdsc_destroy %p done\n", mdsc); 49378c2ecf20Sopenharmony_ci} 49388c2ecf20Sopenharmony_ci 49398c2ecf20Sopenharmony_civoid ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) 49408c2ecf20Sopenharmony_ci{ 49418c2ecf20Sopenharmony_ci struct ceph_fs_client *fsc = mdsc->fsc; 49428c2ecf20Sopenharmony_ci const char *mds_namespace = fsc->mount_options->mds_namespace; 49438c2ecf20Sopenharmony_ci void *p = msg->front.iov_base; 49448c2ecf20Sopenharmony_ci void *end = p + msg->front.iov_len; 49458c2ecf20Sopenharmony_ci u32 epoch; 49468c2ecf20Sopenharmony_ci u32 map_len; 49478c2ecf20Sopenharmony_ci u32 num_fs; 49488c2ecf20Sopenharmony_ci u32 mount_fscid = (u32)-1; 49498c2ecf20Sopenharmony_ci u8 struct_v, struct_cv; 49508c2ecf20Sopenharmony_ci int err = -EINVAL; 49518c2ecf20Sopenharmony_ci 49528c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, sizeof(u32), bad); 49538c2ecf20Sopenharmony_ci epoch = ceph_decode_32(&p); 49548c2ecf20Sopenharmony_ci 49558c2ecf20Sopenharmony_ci dout("handle_fsmap epoch %u\n", epoch); 49568c2ecf20Sopenharmony_ci 49578c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, 2 + sizeof(u32), bad); 49588c2ecf20Sopenharmony_ci struct_v = ceph_decode_8(&p); 49598c2ecf20Sopenharmony_ci struct_cv = ceph_decode_8(&p); 49608c2ecf20Sopenharmony_ci map_len = ceph_decode_32(&p); 49618c2ecf20Sopenharmony_ci 49628c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, sizeof(u32) * 3, bad); 49638c2ecf20Sopenharmony_ci p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */ 49648c2ecf20Sopenharmony_ci 49658c2ecf20Sopenharmony_ci num_fs = ceph_decode_32(&p); 49668c2ecf20Sopenharmony_ci while (num_fs-- > 0) { 49678c2ecf20Sopenharmony_ci void *info_p, *info_end; 49688c2ecf20Sopenharmony_ci u32 info_len; 49698c2ecf20Sopenharmony_ci u8 info_v, info_cv; 49708c2ecf20Sopenharmony_ci u32 fscid, namelen; 49718c2ecf20Sopenharmony_ci 49728c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, 2 + sizeof(u32), bad); 49738c2ecf20Sopenharmony_ci info_v = ceph_decode_8(&p); 49748c2ecf20Sopenharmony_ci info_cv = ceph_decode_8(&p); 49758c2ecf20Sopenharmony_ci info_len = ceph_decode_32(&p); 49768c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, info_len, bad); 49778c2ecf20Sopenharmony_ci info_p = p; 49788c2ecf20Sopenharmony_ci info_end = p + info_len; 49798c2ecf20Sopenharmony_ci p = info_end; 49808c2ecf20Sopenharmony_ci 49818c2ecf20Sopenharmony_ci ceph_decode_need(&info_p, info_end, sizeof(u32) * 2, bad); 49828c2ecf20Sopenharmony_ci fscid = ceph_decode_32(&info_p); 49838c2ecf20Sopenharmony_ci namelen = ceph_decode_32(&info_p); 49848c2ecf20Sopenharmony_ci ceph_decode_need(&info_p, info_end, namelen, bad); 49858c2ecf20Sopenharmony_ci 49868c2ecf20Sopenharmony_ci if (mds_namespace && 49878c2ecf20Sopenharmony_ci strlen(mds_namespace) == namelen && 49888c2ecf20Sopenharmony_ci !strncmp(mds_namespace, (char *)info_p, namelen)) { 49898c2ecf20Sopenharmony_ci mount_fscid = fscid; 49908c2ecf20Sopenharmony_ci break; 49918c2ecf20Sopenharmony_ci } 49928c2ecf20Sopenharmony_ci } 49938c2ecf20Sopenharmony_ci 49948c2ecf20Sopenharmony_ci ceph_monc_got_map(&fsc->client->monc, CEPH_SUB_FSMAP, epoch); 49958c2ecf20Sopenharmony_ci if (mount_fscid != (u32)-1) { 49968c2ecf20Sopenharmony_ci fsc->client->monc.fs_cluster_id = mount_fscid; 49978c2ecf20Sopenharmony_ci ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 49988c2ecf20Sopenharmony_ci 0, true); 49998c2ecf20Sopenharmony_ci ceph_monc_renew_subs(&fsc->client->monc); 50008c2ecf20Sopenharmony_ci } else { 50018c2ecf20Sopenharmony_ci err = -ENOENT; 50028c2ecf20Sopenharmony_ci goto err_out; 50038c2ecf20Sopenharmony_ci } 50048c2ecf20Sopenharmony_ci return; 50058c2ecf20Sopenharmony_ci 50068c2ecf20Sopenharmony_cibad: 50078c2ecf20Sopenharmony_ci pr_err("error decoding fsmap\n"); 50088c2ecf20Sopenharmony_cierr_out: 50098c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 50108c2ecf20Sopenharmony_ci mdsc->mdsmap_err = err; 50118c2ecf20Sopenharmony_ci __wake_requests(mdsc, &mdsc->waiting_for_map); 50128c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 50138c2ecf20Sopenharmony_ci} 50148c2ecf20Sopenharmony_ci 50158c2ecf20Sopenharmony_ci/* 50168c2ecf20Sopenharmony_ci * handle mds map update. 50178c2ecf20Sopenharmony_ci */ 50188c2ecf20Sopenharmony_civoid ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg) 50198c2ecf20Sopenharmony_ci{ 50208c2ecf20Sopenharmony_ci u32 epoch; 50218c2ecf20Sopenharmony_ci u32 maplen; 50228c2ecf20Sopenharmony_ci void *p = msg->front.iov_base; 50238c2ecf20Sopenharmony_ci void *end = p + msg->front.iov_len; 50248c2ecf20Sopenharmony_ci struct ceph_mdsmap *newmap, *oldmap; 50258c2ecf20Sopenharmony_ci struct ceph_fsid fsid; 50268c2ecf20Sopenharmony_ci int err = -EINVAL; 50278c2ecf20Sopenharmony_ci 50288c2ecf20Sopenharmony_ci ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); 50298c2ecf20Sopenharmony_ci ceph_decode_copy(&p, &fsid, sizeof(fsid)); 50308c2ecf20Sopenharmony_ci if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) 50318c2ecf20Sopenharmony_ci return; 50328c2ecf20Sopenharmony_ci epoch = ceph_decode_32(&p); 50338c2ecf20Sopenharmony_ci maplen = ceph_decode_32(&p); 50348c2ecf20Sopenharmony_ci dout("handle_map epoch %u len %d\n", epoch, (int)maplen); 50358c2ecf20Sopenharmony_ci 50368c2ecf20Sopenharmony_ci /* do we need it? */ 50378c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 50388c2ecf20Sopenharmony_ci if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { 50398c2ecf20Sopenharmony_ci dout("handle_map epoch %u <= our %u\n", 50408c2ecf20Sopenharmony_ci epoch, mdsc->mdsmap->m_epoch); 50418c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 50428c2ecf20Sopenharmony_ci return; 50438c2ecf20Sopenharmony_ci } 50448c2ecf20Sopenharmony_ci 50458c2ecf20Sopenharmony_ci newmap = ceph_mdsmap_decode(&p, end); 50468c2ecf20Sopenharmony_ci if (IS_ERR(newmap)) { 50478c2ecf20Sopenharmony_ci err = PTR_ERR(newmap); 50488c2ecf20Sopenharmony_ci goto bad_unlock; 50498c2ecf20Sopenharmony_ci } 50508c2ecf20Sopenharmony_ci 50518c2ecf20Sopenharmony_ci /* swap into place */ 50528c2ecf20Sopenharmony_ci if (mdsc->mdsmap) { 50538c2ecf20Sopenharmony_ci oldmap = mdsc->mdsmap; 50548c2ecf20Sopenharmony_ci mdsc->mdsmap = newmap; 50558c2ecf20Sopenharmony_ci check_new_map(mdsc, newmap, oldmap); 50568c2ecf20Sopenharmony_ci ceph_mdsmap_destroy(oldmap); 50578c2ecf20Sopenharmony_ci } else { 50588c2ecf20Sopenharmony_ci mdsc->mdsmap = newmap; /* first mds map */ 50598c2ecf20Sopenharmony_ci } 50608c2ecf20Sopenharmony_ci mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size, 50618c2ecf20Sopenharmony_ci MAX_LFS_FILESIZE); 50628c2ecf20Sopenharmony_ci 50638c2ecf20Sopenharmony_ci __wake_requests(mdsc, &mdsc->waiting_for_map); 50648c2ecf20Sopenharmony_ci ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP, 50658c2ecf20Sopenharmony_ci mdsc->mdsmap->m_epoch); 50668c2ecf20Sopenharmony_ci 50678c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 50688c2ecf20Sopenharmony_ci schedule_delayed(mdsc, 0); 50698c2ecf20Sopenharmony_ci return; 50708c2ecf20Sopenharmony_ci 50718c2ecf20Sopenharmony_cibad_unlock: 50728c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 50738c2ecf20Sopenharmony_cibad: 50748c2ecf20Sopenharmony_ci pr_err("error decoding mdsmap %d\n", err); 50758c2ecf20Sopenharmony_ci return; 50768c2ecf20Sopenharmony_ci} 50778c2ecf20Sopenharmony_ci 50788c2ecf20Sopenharmony_cistatic struct ceph_connection *con_get(struct ceph_connection *con) 50798c2ecf20Sopenharmony_ci{ 50808c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 50818c2ecf20Sopenharmony_ci 50828c2ecf20Sopenharmony_ci if (ceph_get_mds_session(s)) 50838c2ecf20Sopenharmony_ci return con; 50848c2ecf20Sopenharmony_ci return NULL; 50858c2ecf20Sopenharmony_ci} 50868c2ecf20Sopenharmony_ci 50878c2ecf20Sopenharmony_cistatic void con_put(struct ceph_connection *con) 50888c2ecf20Sopenharmony_ci{ 50898c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 50908c2ecf20Sopenharmony_ci 50918c2ecf20Sopenharmony_ci ceph_put_mds_session(s); 50928c2ecf20Sopenharmony_ci} 50938c2ecf20Sopenharmony_ci 50948c2ecf20Sopenharmony_ci/* 50958c2ecf20Sopenharmony_ci * if the client is unresponsive for long enough, the mds will kill 50968c2ecf20Sopenharmony_ci * the session entirely. 50978c2ecf20Sopenharmony_ci */ 50988c2ecf20Sopenharmony_cistatic void peer_reset(struct ceph_connection *con) 50998c2ecf20Sopenharmony_ci{ 51008c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 51018c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 51028c2ecf20Sopenharmony_ci 51038c2ecf20Sopenharmony_ci pr_warn("mds%d closed our session\n", s->s_mds); 51048c2ecf20Sopenharmony_ci send_mds_reconnect(mdsc, s); 51058c2ecf20Sopenharmony_ci} 51068c2ecf20Sopenharmony_ci 51078c2ecf20Sopenharmony_cistatic void dispatch(struct ceph_connection *con, struct ceph_msg *msg) 51088c2ecf20Sopenharmony_ci{ 51098c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 51108c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 51118c2ecf20Sopenharmony_ci int type = le16_to_cpu(msg->hdr.type); 51128c2ecf20Sopenharmony_ci 51138c2ecf20Sopenharmony_ci mutex_lock(&mdsc->mutex); 51148c2ecf20Sopenharmony_ci if (__verify_registered_session(mdsc, s) < 0) { 51158c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 51168c2ecf20Sopenharmony_ci goto out; 51178c2ecf20Sopenharmony_ci } 51188c2ecf20Sopenharmony_ci mutex_unlock(&mdsc->mutex); 51198c2ecf20Sopenharmony_ci 51208c2ecf20Sopenharmony_ci switch (type) { 51218c2ecf20Sopenharmony_ci case CEPH_MSG_MDS_MAP: 51228c2ecf20Sopenharmony_ci ceph_mdsc_handle_mdsmap(mdsc, msg); 51238c2ecf20Sopenharmony_ci break; 51248c2ecf20Sopenharmony_ci case CEPH_MSG_FS_MAP_USER: 51258c2ecf20Sopenharmony_ci ceph_mdsc_handle_fsmap(mdsc, msg); 51268c2ecf20Sopenharmony_ci break; 51278c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_SESSION: 51288c2ecf20Sopenharmony_ci handle_session(s, msg); 51298c2ecf20Sopenharmony_ci break; 51308c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_REPLY: 51318c2ecf20Sopenharmony_ci handle_reply(s, msg); 51328c2ecf20Sopenharmony_ci break; 51338c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_REQUEST_FORWARD: 51348c2ecf20Sopenharmony_ci handle_forward(mdsc, s, msg); 51358c2ecf20Sopenharmony_ci break; 51368c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_CAPS: 51378c2ecf20Sopenharmony_ci ceph_handle_caps(s, msg); 51388c2ecf20Sopenharmony_ci break; 51398c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_SNAP: 51408c2ecf20Sopenharmony_ci ceph_handle_snap(mdsc, s, msg); 51418c2ecf20Sopenharmony_ci break; 51428c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_LEASE: 51438c2ecf20Sopenharmony_ci handle_lease(mdsc, s, msg); 51448c2ecf20Sopenharmony_ci break; 51458c2ecf20Sopenharmony_ci case CEPH_MSG_CLIENT_QUOTA: 51468c2ecf20Sopenharmony_ci ceph_handle_quota(mdsc, s, msg); 51478c2ecf20Sopenharmony_ci break; 51488c2ecf20Sopenharmony_ci 51498c2ecf20Sopenharmony_ci default: 51508c2ecf20Sopenharmony_ci pr_err("received unknown message type %d %s\n", type, 51518c2ecf20Sopenharmony_ci ceph_msg_type_name(type)); 51528c2ecf20Sopenharmony_ci } 51538c2ecf20Sopenharmony_ciout: 51548c2ecf20Sopenharmony_ci ceph_msg_put(msg); 51558c2ecf20Sopenharmony_ci} 51568c2ecf20Sopenharmony_ci 51578c2ecf20Sopenharmony_ci/* 51588c2ecf20Sopenharmony_ci * authentication 51598c2ecf20Sopenharmony_ci */ 51608c2ecf20Sopenharmony_ci 51618c2ecf20Sopenharmony_ci/* 51628c2ecf20Sopenharmony_ci * Note: returned pointer is the address of a structure that's 51638c2ecf20Sopenharmony_ci * managed separately. Caller must *not* attempt to free it. 51648c2ecf20Sopenharmony_ci */ 51658c2ecf20Sopenharmony_cistatic struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con, 51668c2ecf20Sopenharmony_ci int *proto, int force_new) 51678c2ecf20Sopenharmony_ci{ 51688c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 51698c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 51708c2ecf20Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 51718c2ecf20Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 51728c2ecf20Sopenharmony_ci 51738c2ecf20Sopenharmony_ci if (force_new && auth->authorizer) { 51748c2ecf20Sopenharmony_ci ceph_auth_destroy_authorizer(auth->authorizer); 51758c2ecf20Sopenharmony_ci auth->authorizer = NULL; 51768c2ecf20Sopenharmony_ci } 51778c2ecf20Sopenharmony_ci if (!auth->authorizer) { 51788c2ecf20Sopenharmony_ci int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS, 51798c2ecf20Sopenharmony_ci auth); 51808c2ecf20Sopenharmony_ci if (ret) 51818c2ecf20Sopenharmony_ci return ERR_PTR(ret); 51828c2ecf20Sopenharmony_ci } else { 51838c2ecf20Sopenharmony_ci int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS, 51848c2ecf20Sopenharmony_ci auth); 51858c2ecf20Sopenharmony_ci if (ret) 51868c2ecf20Sopenharmony_ci return ERR_PTR(ret); 51878c2ecf20Sopenharmony_ci } 51888c2ecf20Sopenharmony_ci *proto = ac->protocol; 51898c2ecf20Sopenharmony_ci 51908c2ecf20Sopenharmony_ci return auth; 51918c2ecf20Sopenharmony_ci} 51928c2ecf20Sopenharmony_ci 51938c2ecf20Sopenharmony_cistatic int add_authorizer_challenge(struct ceph_connection *con, 51948c2ecf20Sopenharmony_ci void *challenge_buf, int challenge_buf_len) 51958c2ecf20Sopenharmony_ci{ 51968c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 51978c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 51988c2ecf20Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 51998c2ecf20Sopenharmony_ci 52008c2ecf20Sopenharmony_ci return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer, 52018c2ecf20Sopenharmony_ci challenge_buf, challenge_buf_len); 52028c2ecf20Sopenharmony_ci} 52038c2ecf20Sopenharmony_ci 52048c2ecf20Sopenharmony_cistatic int verify_authorizer_reply(struct ceph_connection *con) 52058c2ecf20Sopenharmony_ci{ 52068c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 52078c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 52088c2ecf20Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 52098c2ecf20Sopenharmony_ci 52108c2ecf20Sopenharmony_ci return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer); 52118c2ecf20Sopenharmony_ci} 52128c2ecf20Sopenharmony_ci 52138c2ecf20Sopenharmony_cistatic int invalidate_authorizer(struct ceph_connection *con) 52148c2ecf20Sopenharmony_ci{ 52158c2ecf20Sopenharmony_ci struct ceph_mds_session *s = con->private; 52168c2ecf20Sopenharmony_ci struct ceph_mds_client *mdsc = s->s_mdsc; 52178c2ecf20Sopenharmony_ci struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; 52188c2ecf20Sopenharmony_ci 52198c2ecf20Sopenharmony_ci ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); 52208c2ecf20Sopenharmony_ci 52218c2ecf20Sopenharmony_ci return ceph_monc_validate_auth(&mdsc->fsc->client->monc); 52228c2ecf20Sopenharmony_ci} 52238c2ecf20Sopenharmony_ci 52248c2ecf20Sopenharmony_cistatic struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, 52258c2ecf20Sopenharmony_ci struct ceph_msg_header *hdr, int *skip) 52268c2ecf20Sopenharmony_ci{ 52278c2ecf20Sopenharmony_ci struct ceph_msg *msg; 52288c2ecf20Sopenharmony_ci int type = (int) le16_to_cpu(hdr->type); 52298c2ecf20Sopenharmony_ci int front_len = (int) le32_to_cpu(hdr->front_len); 52308c2ecf20Sopenharmony_ci 52318c2ecf20Sopenharmony_ci if (con->in_msg) 52328c2ecf20Sopenharmony_ci return con->in_msg; 52338c2ecf20Sopenharmony_ci 52348c2ecf20Sopenharmony_ci *skip = 0; 52358c2ecf20Sopenharmony_ci msg = ceph_msg_new(type, front_len, GFP_NOFS, false); 52368c2ecf20Sopenharmony_ci if (!msg) { 52378c2ecf20Sopenharmony_ci pr_err("unable to allocate msg type %d len %d\n", 52388c2ecf20Sopenharmony_ci type, front_len); 52398c2ecf20Sopenharmony_ci return NULL; 52408c2ecf20Sopenharmony_ci } 52418c2ecf20Sopenharmony_ci 52428c2ecf20Sopenharmony_ci return msg; 52438c2ecf20Sopenharmony_ci} 52448c2ecf20Sopenharmony_ci 52458c2ecf20Sopenharmony_cistatic int mds_sign_message(struct ceph_msg *msg) 52468c2ecf20Sopenharmony_ci{ 52478c2ecf20Sopenharmony_ci struct ceph_mds_session *s = msg->con->private; 52488c2ecf20Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 52498c2ecf20Sopenharmony_ci 52508c2ecf20Sopenharmony_ci return ceph_auth_sign_message(auth, msg); 52518c2ecf20Sopenharmony_ci} 52528c2ecf20Sopenharmony_ci 52538c2ecf20Sopenharmony_cistatic int mds_check_message_signature(struct ceph_msg *msg) 52548c2ecf20Sopenharmony_ci{ 52558c2ecf20Sopenharmony_ci struct ceph_mds_session *s = msg->con->private; 52568c2ecf20Sopenharmony_ci struct ceph_auth_handshake *auth = &s->s_auth; 52578c2ecf20Sopenharmony_ci 52588c2ecf20Sopenharmony_ci return ceph_auth_check_message_signature(auth, msg); 52598c2ecf20Sopenharmony_ci} 52608c2ecf20Sopenharmony_ci 52618c2ecf20Sopenharmony_cistatic const struct ceph_connection_operations mds_con_ops = { 52628c2ecf20Sopenharmony_ci .get = con_get, 52638c2ecf20Sopenharmony_ci .put = con_put, 52648c2ecf20Sopenharmony_ci .dispatch = dispatch, 52658c2ecf20Sopenharmony_ci .get_authorizer = get_authorizer, 52668c2ecf20Sopenharmony_ci .add_authorizer_challenge = add_authorizer_challenge, 52678c2ecf20Sopenharmony_ci .verify_authorizer_reply = verify_authorizer_reply, 52688c2ecf20Sopenharmony_ci .invalidate_authorizer = invalidate_authorizer, 52698c2ecf20Sopenharmony_ci .peer_reset = peer_reset, 52708c2ecf20Sopenharmony_ci .alloc_msg = mds_alloc_msg, 52718c2ecf20Sopenharmony_ci .sign_message = mds_sign_message, 52728c2ecf20Sopenharmony_ci .check_message_signature = mds_check_message_signature, 52738c2ecf20Sopenharmony_ci}; 52748c2ecf20Sopenharmony_ci 52758c2ecf20Sopenharmony_ci/* eof */ 5276