xref: /kernel/linux/linux-5.10/fs/ceph/export.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/ceph/ceph_debug.h>
3
4#include <linux/exportfs.h>
5#include <linux/slab.h>
6#include <asm/unaligned.h>
7
8#include "super.h"
9#include "mds_client.h"
10
11/*
12 * Basic fh
13 */
14struct ceph_nfs_fh {
15	u64 ino;
16} __attribute__ ((packed));
17
18/*
19 * Larger fh that includes parent ino.
20 */
21struct ceph_nfs_confh {
22	u64 ino, parent_ino;
23} __attribute__ ((packed));
24
25/*
26 * fh for snapped inode
27 */
28struct ceph_nfs_snapfh {
29	u64 ino;
30	u64 snapid;
31	u64 parent_ino;
32	u32 hash;
33} __attribute__ ((packed));
34
35static int ceph_encode_snapfh(struct inode *inode, u32 *rawfh, int *max_len,
36			      struct inode *parent_inode)
37{
38	static const int snap_handle_length =
39		sizeof(struct ceph_nfs_snapfh) >> 2;
40	struct ceph_nfs_snapfh *sfh = (void *)rawfh;
41	u64 snapid = ceph_snap(inode);
42	int ret;
43	bool no_parent = true;
44
45	if (*max_len < snap_handle_length) {
46		*max_len = snap_handle_length;
47		ret = FILEID_INVALID;
48		goto out;
49	}
50
51	ret =  -EINVAL;
52	if (snapid != CEPH_SNAPDIR) {
53		struct inode *dir;
54		struct dentry *dentry = d_find_alias(inode);
55		if (!dentry)
56			goto out;
57
58		rcu_read_lock();
59		dir = d_inode_rcu(dentry->d_parent);
60		if (ceph_snap(dir) != CEPH_SNAPDIR) {
61			sfh->parent_ino = ceph_ino(dir);
62			sfh->hash = ceph_dentry_hash(dir, dentry);
63			no_parent = false;
64		}
65		rcu_read_unlock();
66		dput(dentry);
67	}
68
69	if (no_parent) {
70		if (!S_ISDIR(inode->i_mode))
71			goto out;
72		sfh->parent_ino = sfh->ino;
73		sfh->hash = 0;
74	}
75	sfh->ino = ceph_ino(inode);
76	sfh->snapid = snapid;
77
78	*max_len = snap_handle_length;
79	ret = FILEID_BTRFS_WITH_PARENT;
80out:
81	dout("encode_snapfh %llx.%llx ret=%d\n", ceph_vinop(inode), ret);
82	return ret;
83}
84
85static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len,
86			  struct inode *parent_inode)
87{
88	static const int handle_length =
89		sizeof(struct ceph_nfs_fh) >> 2;
90	static const int connected_handle_length =
91		sizeof(struct ceph_nfs_confh) >> 2;
92	int type;
93
94	if (ceph_snap(inode) != CEPH_NOSNAP)
95		return ceph_encode_snapfh(inode, rawfh, max_len, parent_inode);
96
97	if (parent_inode && (*max_len < connected_handle_length)) {
98		*max_len = connected_handle_length;
99		return FILEID_INVALID;
100	} else if (*max_len < handle_length) {
101		*max_len = handle_length;
102		return FILEID_INVALID;
103	}
104
105	if (parent_inode) {
106		struct ceph_nfs_confh *cfh = (void *)rawfh;
107		dout("encode_fh %llx with parent %llx\n",
108		     ceph_ino(inode), ceph_ino(parent_inode));
109		cfh->ino = ceph_ino(inode);
110		cfh->parent_ino = ceph_ino(parent_inode);
111		*max_len = connected_handle_length;
112		type = FILEID_INO32_GEN_PARENT;
113	} else {
114		struct ceph_nfs_fh *fh = (void *)rawfh;
115		dout("encode_fh %llx\n", ceph_ino(inode));
116		fh->ino = ceph_ino(inode);
117		*max_len = handle_length;
118		type = FILEID_INO32_GEN;
119	}
120	return type;
121}
122
123static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
124{
125	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
126	struct inode *inode;
127	struct ceph_vino vino;
128	int err;
129
130	vino.ino = ino;
131	vino.snap = CEPH_NOSNAP;
132
133	if (ceph_vino_is_reserved(vino))
134		return ERR_PTR(-ESTALE);
135
136	inode = ceph_find_inode(sb, vino);
137	if (!inode) {
138		struct ceph_mds_request *req;
139		int mask;
140
141		req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
142					       USE_ANY_MDS);
143		if (IS_ERR(req))
144			return ERR_CAST(req);
145
146		mask = CEPH_STAT_CAP_INODE;
147		if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
148			mask |= CEPH_CAP_XATTR_SHARED;
149		req->r_args.lookupino.mask = cpu_to_le32(mask);
150
151		req->r_ino1 = vino;
152		req->r_num_caps = 1;
153		err = ceph_mdsc_do_request(mdsc, NULL, req);
154		inode = req->r_target_inode;
155		if (inode)
156			ihold(inode);
157		ceph_mdsc_put_request(req);
158		if (!inode)
159			return err < 0 ? ERR_PTR(err) : ERR_PTR(-ESTALE);
160	}
161	return inode;
162}
163
164struct inode *ceph_lookup_inode(struct super_block *sb, u64 ino)
165{
166	struct inode *inode = __lookup_inode(sb, ino);
167	if (IS_ERR(inode))
168		return inode;
169	if (inode->i_nlink == 0) {
170		iput(inode);
171		return ERR_PTR(-ESTALE);
172	}
173	return inode;
174}
175
176static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
177{
178	struct inode *inode = __lookup_inode(sb, ino);
179	int err;
180
181	if (IS_ERR(inode))
182		return ERR_CAST(inode);
183	/* We need LINK caps to reliably check i_nlink */
184	err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
185	if (err) {
186		iput(inode);
187		return ERR_PTR(err);
188	}
189	/* -ESTALE if inode as been unlinked and no file is open */
190	if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
191		iput(inode);
192		return ERR_PTR(-ESTALE);
193	}
194	return d_obtain_alias(inode);
195}
196
197static struct dentry *__snapfh_to_dentry(struct super_block *sb,
198					  struct ceph_nfs_snapfh *sfh,
199					  bool want_parent)
200{
201	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
202	struct ceph_mds_request *req;
203	struct inode *inode;
204	struct ceph_vino vino;
205	int mask;
206	int err;
207	bool unlinked = false;
208
209	if (want_parent) {
210		vino.ino = sfh->parent_ino;
211		if (sfh->snapid == CEPH_SNAPDIR)
212			vino.snap = CEPH_NOSNAP;
213		else if (sfh->ino == sfh->parent_ino)
214			vino.snap = CEPH_SNAPDIR;
215		else
216			vino.snap = sfh->snapid;
217	} else {
218		vino.ino = sfh->ino;
219		vino.snap = sfh->snapid;
220	}
221
222	if (ceph_vino_is_reserved(vino))
223		return ERR_PTR(-ESTALE);
224
225	inode = ceph_find_inode(sb, vino);
226	if (inode)
227		return d_obtain_alias(inode);
228
229	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
230				       USE_ANY_MDS);
231	if (IS_ERR(req))
232		return ERR_CAST(req);
233
234	mask = CEPH_STAT_CAP_INODE;
235	if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
236		mask |= CEPH_CAP_XATTR_SHARED;
237	req->r_args.lookupino.mask = cpu_to_le32(mask);
238	if (vino.snap < CEPH_NOSNAP) {
239		req->r_args.lookupino.snapid = cpu_to_le64(vino.snap);
240		if (!want_parent && sfh->ino != sfh->parent_ino) {
241			req->r_args.lookupino.parent =
242					cpu_to_le64(sfh->parent_ino);
243			req->r_args.lookupino.hash =
244					cpu_to_le32(sfh->hash);
245		}
246	}
247
248	req->r_ino1 = vino;
249	req->r_num_caps = 1;
250	err = ceph_mdsc_do_request(mdsc, NULL, req);
251	inode = req->r_target_inode;
252	if (inode) {
253		if (vino.snap == CEPH_SNAPDIR) {
254			if (inode->i_nlink == 0)
255				unlinked = true;
256			inode = ceph_get_snapdir(inode);
257		} else if (ceph_snap(inode) == vino.snap) {
258			ihold(inode);
259		} else {
260			/* mds does not support lookup snapped inode */
261			err = -EOPNOTSUPP;
262			inode = NULL;
263		}
264	}
265	ceph_mdsc_put_request(req);
266
267	if (want_parent) {
268		dout("snapfh_to_parent %llx.%llx\n err=%d\n",
269		     vino.ino, vino.snap, err);
270	} else {
271		dout("snapfh_to_dentry %llx.%llx parent %llx hash %x err=%d",
272		      vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err);
273	}
274	if (!inode)
275		return ERR_PTR(-ESTALE);
276	/* see comments in ceph_get_parent() */
277	return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode);
278}
279
280/*
281 * convert regular fh to dentry
282 */
283static struct dentry *ceph_fh_to_dentry(struct super_block *sb,
284					struct fid *fid,
285					int fh_len, int fh_type)
286{
287	struct ceph_nfs_fh *fh = (void *)fid->raw;
288
289	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
290		struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
291		return __snapfh_to_dentry(sb, sfh, false);
292	}
293
294	if (fh_type != FILEID_INO32_GEN  &&
295	    fh_type != FILEID_INO32_GEN_PARENT)
296		return NULL;
297	if (fh_len < sizeof(*fh) / 4)
298		return NULL;
299
300	dout("fh_to_dentry %llx\n", fh->ino);
301	return __fh_to_dentry(sb, fh->ino);
302}
303
304static struct dentry *__get_parent(struct super_block *sb,
305				   struct dentry *child, u64 ino)
306{
307	struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc;
308	struct ceph_mds_request *req;
309	struct inode *inode;
310	int mask;
311	int err;
312
313	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
314				       USE_ANY_MDS);
315	if (IS_ERR(req))
316		return ERR_CAST(req);
317
318	if (child) {
319		req->r_inode = d_inode(child);
320		ihold(d_inode(child));
321	} else {
322		req->r_ino1 = (struct ceph_vino) {
323			.ino = ino,
324			.snap = CEPH_NOSNAP,
325		};
326	}
327
328	mask = CEPH_STAT_CAP_INODE;
329	if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
330		mask |= CEPH_CAP_XATTR_SHARED;
331	req->r_args.getattr.mask = cpu_to_le32(mask);
332
333	req->r_num_caps = 1;
334	err = ceph_mdsc_do_request(mdsc, NULL, req);
335	if (err) {
336		ceph_mdsc_put_request(req);
337		return ERR_PTR(err);
338	}
339
340	inode = req->r_target_inode;
341	if (inode)
342		ihold(inode);
343	ceph_mdsc_put_request(req);
344	if (!inode)
345		return ERR_PTR(-ENOENT);
346
347	return d_obtain_alias(inode);
348}
349
350static struct dentry *ceph_get_parent(struct dentry *child)
351{
352	struct inode *inode = d_inode(child);
353	struct dentry *dn;
354
355	if (ceph_snap(inode) != CEPH_NOSNAP) {
356		struct inode* dir;
357		bool unlinked = false;
358		/* do not support non-directory */
359		if (!d_is_dir(child)) {
360			dn = ERR_PTR(-EINVAL);
361			goto out;
362		}
363		dir = __lookup_inode(inode->i_sb, ceph_ino(inode));
364		if (IS_ERR(dir)) {
365			dn = ERR_CAST(dir);
366			goto out;
367		}
368		/* There can be multiple paths to access snapped inode.
369		 * For simplicity, treat snapdir of head inode as parent */
370		if (ceph_snap(inode) != CEPH_SNAPDIR) {
371			struct inode *snapdir = ceph_get_snapdir(dir);
372			if (dir->i_nlink == 0)
373				unlinked = true;
374			iput(dir);
375			if (IS_ERR(snapdir)) {
376				dn = ERR_CAST(snapdir);
377				goto out;
378			}
379			dir = snapdir;
380		}
381		/* If directory has already been deleted, futher get_parent
382		 * will fail. Do not mark snapdir dentry as disconnected,
383		 * this prevent exportfs from doing futher get_parent. */
384		if (unlinked)
385			dn = d_obtain_root(dir);
386		else
387			dn = d_obtain_alias(dir);
388	} else {
389		dn = __get_parent(child->d_sb, child, 0);
390	}
391out:
392	dout("get_parent %p ino %llx.%llx err=%ld\n",
393	     child, ceph_vinop(inode), (long)PTR_ERR_OR_ZERO(dn));
394	return dn;
395}
396
397/*
398 * convert regular fh to parent
399 */
400static struct dentry *ceph_fh_to_parent(struct super_block *sb,
401					struct fid *fid,
402					int fh_len, int fh_type)
403{
404	struct ceph_nfs_confh *cfh = (void *)fid->raw;
405	struct dentry *dentry;
406
407	if (fh_type == FILEID_BTRFS_WITH_PARENT) {
408		struct ceph_nfs_snapfh *sfh = (void *)fid->raw;
409		return __snapfh_to_dentry(sb, sfh, true);
410	}
411
412	if (fh_type != FILEID_INO32_GEN_PARENT)
413		return NULL;
414	if (fh_len < sizeof(*cfh) / 4)
415		return NULL;
416
417	dout("fh_to_parent %llx\n", cfh->parent_ino);
418	dentry = __get_parent(sb, NULL, cfh->ino);
419	if (unlikely(dentry == ERR_PTR(-ENOENT)))
420		dentry = __fh_to_dentry(sb, cfh->parent_ino);
421	return dentry;
422}
423
424static int __get_snap_name(struct dentry *parent, char *name,
425			   struct dentry *child)
426{
427	struct inode *inode = d_inode(child);
428	struct inode *dir = d_inode(parent);
429	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
430	struct ceph_mds_request *req = NULL;
431	char *last_name = NULL;
432	unsigned next_offset = 2;
433	int err = -EINVAL;
434
435	if (ceph_ino(inode) != ceph_ino(dir))
436		goto out;
437	if (ceph_snap(inode) == CEPH_SNAPDIR) {
438		if (ceph_snap(dir) == CEPH_NOSNAP) {
439			strcpy(name, fsc->mount_options->snapdir_name);
440			err = 0;
441		}
442		goto out;
443	}
444	if (ceph_snap(dir) != CEPH_SNAPDIR)
445		goto out;
446
447	while (1) {
448		struct ceph_mds_reply_info_parsed *rinfo;
449		struct ceph_mds_reply_dir_entry *rde;
450		int i;
451
452		req = ceph_mdsc_create_request(fsc->mdsc, CEPH_MDS_OP_LSSNAP,
453					       USE_AUTH_MDS);
454		if (IS_ERR(req)) {
455			err = PTR_ERR(req);
456			req = NULL;
457			goto out;
458		}
459		err = ceph_alloc_readdir_reply_buffer(req, inode);
460		if (err)
461			goto out;
462
463		req->r_direct_mode = USE_AUTH_MDS;
464		req->r_readdir_offset = next_offset;
465		req->r_args.readdir.flags =
466				cpu_to_le16(CEPH_READDIR_REPLY_BITFLAGS);
467		if (last_name) {
468			req->r_path2 = last_name;
469			last_name = NULL;
470		}
471
472		req->r_inode = dir;
473		ihold(dir);
474		req->r_dentry = dget(parent);
475
476		inode_lock(dir);
477		err = ceph_mdsc_do_request(fsc->mdsc, NULL, req);
478		inode_unlock(dir);
479
480		if (err < 0)
481			goto out;
482
483		rinfo = &req->r_reply_info;
484		for (i = 0; i < rinfo->dir_nr; i++) {
485			rde = rinfo->dir_entries + i;
486			BUG_ON(!rde->inode.in);
487			if (ceph_snap(inode) ==
488			    le64_to_cpu(rde->inode.in->snapid)) {
489				memcpy(name, rde->name, rde->name_len);
490				name[rde->name_len] = '\0';
491				err = 0;
492				goto out;
493			}
494		}
495
496		if (rinfo->dir_end)
497			break;
498
499		BUG_ON(rinfo->dir_nr <= 0);
500		rde = rinfo->dir_entries + (rinfo->dir_nr - 1);
501		next_offset += rinfo->dir_nr;
502		last_name = kstrndup(rde->name, rde->name_len, GFP_KERNEL);
503		if (!last_name) {
504			err = -ENOMEM;
505			goto out;
506		}
507
508		ceph_mdsc_put_request(req);
509		req = NULL;
510	}
511	err = -ENOENT;
512out:
513	if (req)
514		ceph_mdsc_put_request(req);
515	kfree(last_name);
516	dout("get_snap_name %p ino %llx.%llx err=%d\n",
517	     child, ceph_vinop(inode), err);
518	return err;
519}
520
521static int ceph_get_name(struct dentry *parent, char *name,
522			 struct dentry *child)
523{
524	struct ceph_mds_client *mdsc;
525	struct ceph_mds_request *req;
526	struct inode *inode = d_inode(child);
527	int err;
528
529	if (ceph_snap(inode) != CEPH_NOSNAP)
530		return __get_snap_name(parent, name, child);
531
532	mdsc = ceph_inode_to_client(inode)->mdsc;
533	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME,
534				       USE_ANY_MDS);
535	if (IS_ERR(req))
536		return PTR_ERR(req);
537
538	inode_lock(d_inode(parent));
539
540	req->r_inode = inode;
541	ihold(inode);
542	req->r_ino2 = ceph_vino(d_inode(parent));
543	req->r_parent = d_inode(parent);
544	set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
545	req->r_num_caps = 2;
546	err = ceph_mdsc_do_request(mdsc, NULL, req);
547
548	inode_unlock(d_inode(parent));
549
550	if (!err) {
551		struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
552		memcpy(name, rinfo->dname, rinfo->dname_len);
553		name[rinfo->dname_len] = 0;
554		dout("get_name %p ino %llx.%llx name %s\n",
555		     child, ceph_vinop(inode), name);
556	} else {
557		dout("get_name %p ino %llx.%llx err %d\n",
558		     child, ceph_vinop(inode), err);
559	}
560
561	ceph_mdsc_put_request(req);
562	return err;
563}
564
565const struct export_operations ceph_export_ops = {
566	.encode_fh = ceph_encode_fh,
567	.fh_to_dentry = ceph_fh_to_dentry,
568	.fh_to_parent = ceph_fh_to_parent,
569	.get_parent = ceph_get_parent,
570	.get_name = ceph_get_name,
571};
572