xref: /kernel/linux/linux-5.10/fs/overlayfs/namei.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Novell Inc.
4 * Copyright (C) 2016 Red Hat, Inc.
5 */
6
7#include <linux/fs.h>
8#include <linux/cred.h>
9#include <linux/ctype.h>
10#include <linux/namei.h>
11#include <linux/xattr.h>
12#include <linux/ratelimit.h>
13#include <linux/mount.h>
14#include <linux/exportfs.h>
15#include "overlayfs.h"
16
17struct ovl_lookup_data {
18	struct super_block *sb;
19	struct qstr name;
20	bool is_dir;
21	bool opaque;
22	bool stop;
23	bool last;
24	char *redirect;
25	bool metacopy;
26};
27
28static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d,
29			      size_t prelen, const char *post)
30{
31	int res;
32	char *buf;
33	struct ovl_fs *ofs = OVL_FS(d->sb);
34
35	buf = ovl_get_redirect_xattr(ofs, dentry, prelen + strlen(post));
36	if (IS_ERR_OR_NULL(buf))
37		return PTR_ERR(buf);
38
39	if (buf[0] == '/') {
40		/*
41		 * One of the ancestor path elements in an absolute path
42		 * lookup in ovl_lookup_layer() could have been opaque and
43		 * that will stop further lookup in lower layers (d->stop=true)
44		 * But we have found an absolute redirect in decendant path
45		 * element and that should force continue lookup in lower
46		 * layers (reset d->stop).
47		 */
48		d->stop = false;
49	} else {
50		res = strlen(buf) + 1;
51		memmove(buf + prelen, buf, res);
52		memcpy(buf, d->name.name, prelen);
53	}
54
55	strcat(buf, post);
56	kfree(d->redirect);
57	d->redirect = buf;
58	d->name.name = d->redirect;
59	d->name.len = strlen(d->redirect);
60
61	return 0;
62}
63
64static int ovl_acceptable(void *ctx, struct dentry *dentry)
65{
66	/*
67	 * A non-dir origin may be disconnected, which is fine, because
68	 * we only need it for its unique inode number.
69	 */
70	if (!d_is_dir(dentry))
71		return 1;
72
73	/* Don't decode a deleted empty directory */
74	if (d_unhashed(dentry))
75		return 0;
76
77	/* Check if directory belongs to the layer we are decoding from */
78	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
79}
80
81/*
82 * Check validity of an overlay file handle buffer.
83 *
84 * Return 0 for a valid file handle.
85 * Return -ENODATA for "origin unknown".
86 * Return <0 for an invalid file handle.
87 */
88int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
89{
90	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
91		return -EINVAL;
92
93	if (fb->magic != OVL_FH_MAGIC)
94		return -EINVAL;
95
96	/* Treat larger version and unknown flags as "origin unknown" */
97	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
98		return -ENODATA;
99
100	/* Treat endianness mismatch as "origin unknown" */
101	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
102	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
103		return -ENODATA;
104
105	return 0;
106}
107
108static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *dentry,
109				 enum ovl_xattr ox)
110{
111	int res, err;
112	struct ovl_fh *fh = NULL;
113
114	res = ovl_do_getxattr(ofs, dentry, ox, NULL, 0);
115	if (res < 0) {
116		if (res == -ENODATA || res == -EOPNOTSUPP)
117			return NULL;
118		goto fail;
119	}
120	/* Zero size value means "copied up but origin unknown" */
121	if (res == 0)
122		return NULL;
123
124	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
125	if (!fh)
126		return ERR_PTR(-ENOMEM);
127
128	res = ovl_do_getxattr(ofs, dentry, ox, fh->buf, res);
129	if (res < 0)
130		goto fail;
131
132	err = ovl_check_fb_len(&fh->fb, res);
133	if (err < 0) {
134		if (err == -ENODATA)
135			goto out;
136		goto invalid;
137	}
138
139	return fh;
140
141out:
142	kfree(fh);
143	return NULL;
144
145fail:
146	pr_warn_ratelimited("failed to get origin (%i)\n", res);
147	goto out;
148invalid:
149	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
150	goto out;
151}
152
153struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
154				  bool connected)
155{
156	struct dentry *real;
157	int bytes;
158
159	/*
160	 * Make sure that the stored uuid matches the uuid of the lower
161	 * layer where file handle will be decoded.
162	 */
163	if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
164		return NULL;
165
166	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
167	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
168				  bytes >> 2, (int)fh->fb.type,
169				  connected ? ovl_acceptable : NULL, mnt);
170	if (IS_ERR(real)) {
171		/*
172		 * Treat stale file handle to lower file as "origin unknown".
173		 * upper file handle could become stale when upper file is
174		 * unlinked and this information is needed to handle stale
175		 * index entries correctly.
176		 */
177		if (real == ERR_PTR(-ESTALE) &&
178		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
179			real = NULL;
180		return real;
181	}
182
183	if (ovl_dentry_weird(real)) {
184		dput(real);
185		return NULL;
186	}
187
188	return real;
189}
190
191static bool ovl_is_opaquedir(struct super_block *sb, struct dentry *dentry)
192{
193	return ovl_check_dir_xattr(sb, dentry, OVL_XATTR_OPAQUE);
194}
195
196static struct dentry *ovl_lookup_positive_unlocked(const char *name,
197						   struct dentry *base, int len,
198						   bool drop_negative)
199{
200	struct dentry *ret = lookup_one_len_unlocked(name, base, len);
201
202	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
203		if (drop_negative && ret->d_lockref.count == 1) {
204			spin_lock(&ret->d_lock);
205			/* Recheck condition under lock */
206			if (d_is_negative(ret) && ret->d_lockref.count == 1)
207				__d_drop(ret);
208			spin_unlock(&ret->d_lock);
209		}
210		dput(ret);
211		ret = ERR_PTR(-ENOENT);
212	}
213	return ret;
214}
215
216static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
217			     const char *name, unsigned int namelen,
218			     size_t prelen, const char *post,
219			     struct dentry **ret, bool drop_negative)
220{
221	struct dentry *this;
222	int err;
223	bool last_element = !post[0];
224
225	this = ovl_lookup_positive_unlocked(name, base, namelen, drop_negative);
226	if (IS_ERR(this)) {
227		err = PTR_ERR(this);
228		this = NULL;
229		if (err == -ENOENT || err == -ENAMETOOLONG)
230			goto out;
231		goto out_err;
232	}
233
234	if (ovl_dentry_weird(this)) {
235		/* Don't support traversing automounts and other weirdness */
236		err = -EREMOTE;
237		goto out_err;
238	}
239	if (ovl_is_whiteout(this)) {
240		d->stop = d->opaque = true;
241		goto put_and_out;
242	}
243	/*
244	 * This dentry should be a regular file if previous layer lookup
245	 * found a metacopy dentry.
246	 */
247	if (last_element && d->metacopy && !d_is_reg(this)) {
248		d->stop = true;
249		goto put_and_out;
250	}
251	if (!d_can_lookup(this)) {
252		if (d->is_dir || !last_element) {
253			d->stop = true;
254			goto put_and_out;
255		}
256		err = ovl_check_metacopy_xattr(OVL_FS(d->sb), this);
257		if (err < 0)
258			goto out_err;
259
260		d->metacopy = err;
261		d->stop = !d->metacopy;
262		if (!d->metacopy || d->last)
263			goto out;
264	} else {
265		if (ovl_lookup_trap_inode(d->sb, this)) {
266			/* Caught in a trap of overlapping layers */
267			err = -ELOOP;
268			goto out_err;
269		}
270
271		if (last_element)
272			d->is_dir = true;
273		if (d->last)
274			goto out;
275
276		if (ovl_is_opaquedir(d->sb, this)) {
277			d->stop = true;
278			if (last_element)
279				d->opaque = true;
280			goto out;
281		}
282	}
283	err = ovl_check_redirect(this, d, prelen, post);
284	if (err)
285		goto out_err;
286out:
287	*ret = this;
288	return 0;
289
290put_and_out:
291	dput(this);
292	this = NULL;
293	goto out;
294
295out_err:
296	dput(this);
297	return err;
298}
299
300static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
301			    struct dentry **ret, bool drop_negative)
302{
303	/* Counting down from the end, since the prefix can change */
304	size_t rem = d->name.len - 1;
305	struct dentry *dentry = NULL;
306	int err;
307
308	if (d->name.name[0] != '/')
309		return ovl_lookup_single(base, d, d->name.name, d->name.len,
310					 0, "", ret, drop_negative);
311
312	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
313		const char *s = d->name.name + d->name.len - rem;
314		const char *next = strchrnul(s, '/');
315		size_t thislen = next - s;
316		bool end = !next[0];
317
318		/* Verify we did not go off the rails */
319		if (WARN_ON(s[-1] != '/'))
320			return -EIO;
321
322		err = ovl_lookup_single(base, d, s, thislen,
323					d->name.len - rem, next, &base,
324					drop_negative);
325		dput(dentry);
326		if (err)
327			return err;
328		dentry = base;
329		if (end)
330			break;
331
332		rem -= thislen + 1;
333
334		if (WARN_ON(rem >= d->name.len))
335			return -EIO;
336	}
337	*ret = dentry;
338	return 0;
339}
340
341
342int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
343			struct dentry *upperdentry, struct ovl_path **stackp)
344{
345	struct dentry *origin = NULL;
346	int i;
347
348	for (i = 1; i < ofs->numlayer; i++) {
349		/*
350		 * If lower fs uuid is not unique among lower fs we cannot match
351		 * fh->uuid to layer.
352		 */
353		if (ofs->layers[i].fsid &&
354		    ofs->layers[i].fs->bad_uuid)
355			continue;
356
357		origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
358					    connected);
359		if (origin)
360			break;
361	}
362
363	if (!origin)
364		return -ESTALE;
365	else if (IS_ERR(origin))
366		return PTR_ERR(origin);
367
368	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
369	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
370		goto invalid;
371
372	if (!*stackp)
373		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
374	if (!*stackp) {
375		dput(origin);
376		return -ENOMEM;
377	}
378	**stackp = (struct ovl_path){
379		.dentry = origin,
380		.layer = &ofs->layers[i]
381	};
382
383	return 0;
384
385invalid:
386	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
387			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
388			    d_inode(origin)->i_mode & S_IFMT);
389	dput(origin);
390	return -EIO;
391}
392
393static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
394			    struct ovl_path **stackp)
395{
396	struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
397	int err;
398
399	if (IS_ERR_OR_NULL(fh))
400		return PTR_ERR(fh);
401
402	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
403	kfree(fh);
404
405	if (err) {
406		if (err == -ESTALE)
407			return 0;
408		return err;
409	}
410
411	return 0;
412}
413
414/*
415 * Verify that @fh matches the file handle stored in xattr @name.
416 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
417 */
418static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
419			 enum ovl_xattr ox, const struct ovl_fh *fh)
420{
421	struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
422	int err = 0;
423
424	if (!ofh)
425		return -ENODATA;
426
427	if (IS_ERR(ofh))
428		return PTR_ERR(ofh);
429
430	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
431		err = -ESTALE;
432
433	kfree(ofh);
434	return err;
435}
436
437/*
438 * Verify that @real dentry matches the file handle stored in xattr @name.
439 *
440 * If @set is true and there is no stored file handle, encode @real and store
441 * file handle in xattr @name.
442 *
443 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
444 */
445int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
446		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
447		      bool set)
448{
449	struct inode *inode;
450	struct ovl_fh *fh;
451	int err;
452
453	fh = ovl_encode_real_fh(real, is_upper);
454	err = PTR_ERR(fh);
455	if (IS_ERR(fh)) {
456		fh = NULL;
457		goto fail;
458	}
459
460	err = ovl_verify_fh(ofs, dentry, ox, fh);
461	if (set && err == -ENODATA)
462		err = ovl_do_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
463	if (err)
464		goto fail;
465
466out:
467	kfree(fh);
468	return err;
469
470fail:
471	inode = d_inode(real);
472	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
473			    is_upper ? "upper" : "origin", real,
474			    inode ? inode->i_ino : 0, err);
475	goto out;
476}
477
478/* Get upper dentry from index */
479struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
480{
481	struct ovl_fh *fh;
482	struct dentry *upper;
483
484	if (!d_is_dir(index))
485		return dget(index);
486
487	fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
488	if (IS_ERR_OR_NULL(fh))
489		return ERR_CAST(fh);
490
491	upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
492	kfree(fh);
493
494	if (IS_ERR_OR_NULL(upper))
495		return upper ?: ERR_PTR(-ESTALE);
496
497	if (!d_is_dir(upper)) {
498		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
499				    index, upper);
500		dput(upper);
501		return ERR_PTR(-EIO);
502	}
503
504	return upper;
505}
506
507/*
508 * Verify that an index entry name matches the origin file handle stored in
509 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
510 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
511 */
512int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
513{
514	struct ovl_fh *fh = NULL;
515	size_t len;
516	struct ovl_path origin = { };
517	struct ovl_path *stack = &origin;
518	struct dentry *upper = NULL;
519	int err;
520
521	if (!d_inode(index))
522		return 0;
523
524	err = -EINVAL;
525	if (index->d_name.len < sizeof(struct ovl_fb)*2)
526		goto fail;
527
528	err = -ENOMEM;
529	len = index->d_name.len / 2;
530	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
531	if (!fh)
532		goto fail;
533
534	err = -EINVAL;
535	if (hex2bin(fh->buf, index->d_name.name, len))
536		goto fail;
537
538	err = ovl_check_fb_len(&fh->fb, len);
539	if (err)
540		goto fail;
541
542	/*
543	 * Whiteout index entries are used as an indication that an exported
544	 * overlay file handle should be treated as stale (i.e. after unlink
545	 * of the overlay inode). These entries contain no origin xattr.
546	 */
547	if (ovl_is_whiteout(index))
548		goto out;
549
550	/*
551	 * Verifying directory index entries are not stale is expensive, so
552	 * only verify stale dir index if NFS export is enabled.
553	 */
554	if (d_is_dir(index) && !ofs->config.nfs_export)
555		goto out;
556
557	/*
558	 * Directory index entries should have 'upper' xattr pointing to the
559	 * real upper dir. Non-dir index entries are hardlinks to the upper
560	 * real inode. For non-dir index, we can read the copy up origin xattr
561	 * directly from the index dentry, but for dir index we first need to
562	 * decode the upper directory.
563	 */
564	upper = ovl_index_upper(ofs, index);
565	if (IS_ERR_OR_NULL(upper)) {
566		err = PTR_ERR(upper);
567		/*
568		 * Directory index entries with no 'upper' xattr need to be
569		 * removed. When dir index entry has a stale 'upper' xattr,
570		 * we assume that upper dir was removed and we treat the dir
571		 * index as orphan entry that needs to be whited out.
572		 */
573		if (err == -ESTALE)
574			goto orphan;
575		else if (!err)
576			err = -ESTALE;
577		goto fail;
578	}
579
580	err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
581	dput(upper);
582	if (err)
583		goto fail;
584
585	/* Check if non-dir index is orphan and don't warn before cleaning it */
586	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
587		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
588		if (err)
589			goto fail;
590
591		if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
592			goto orphan;
593	}
594
595out:
596	dput(origin.dentry);
597	kfree(fh);
598	return err;
599
600fail:
601	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
602			    index, d_inode(index)->i_mode & S_IFMT, err);
603	goto out;
604
605orphan:
606	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
607			    index, d_inode(index)->i_mode & S_IFMT,
608			    d_inode(index)->i_nlink);
609	err = -ENOENT;
610	goto out;
611}
612
613static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
614{
615	char *n, *s;
616
617	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
618	if (!n)
619		return -ENOMEM;
620
621	s  = bin2hex(n, fh->buf, fh->fb.len);
622	*name = (struct qstr) QSTR_INIT(n, s - n);
623
624	return 0;
625
626}
627
628/*
629 * Lookup in indexdir for the index entry of a lower real inode or a copy up
630 * origin inode. The index entry name is the hex representation of the lower
631 * inode file handle.
632 *
633 * If the index dentry in negative, then either no lower aliases have been
634 * copied up yet, or aliases have been copied up in older kernels and are
635 * not indexed.
636 *
637 * If the index dentry for a copy up origin inode is positive, but points
638 * to an inode different than the upper inode, then either the upper inode
639 * has been copied up and not indexed or it was indexed, but since then
640 * index dir was cleared. Either way, that index cannot be used to indentify
641 * the overlay inode.
642 */
643int ovl_get_index_name(struct dentry *origin, struct qstr *name)
644{
645	struct ovl_fh *fh;
646	int err;
647
648	fh = ovl_encode_real_fh(origin, false);
649	if (IS_ERR(fh))
650		return PTR_ERR(fh);
651
652	err = ovl_get_index_name_fh(fh, name);
653
654	kfree(fh);
655	return err;
656}
657
658/* Lookup index by file handle for NFS export */
659struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
660{
661	struct dentry *index;
662	struct qstr name;
663	int err;
664
665	err = ovl_get_index_name_fh(fh, &name);
666	if (err)
667		return ERR_PTR(err);
668
669	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
670	kfree(name.name);
671	if (IS_ERR(index)) {
672		if (PTR_ERR(index) == -ENOENT)
673			index = NULL;
674		return index;
675	}
676
677	if (ovl_is_whiteout(index))
678		err = -ESTALE;
679	else if (ovl_dentry_weird(index))
680		err = -EIO;
681	else
682		return index;
683
684	dput(index);
685	return ERR_PTR(err);
686}
687
688struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
689				struct dentry *origin, bool verify)
690{
691	struct dentry *index;
692	struct inode *inode;
693	struct qstr name;
694	bool is_dir = d_is_dir(origin);
695	int err;
696
697	err = ovl_get_index_name(origin, &name);
698	if (err)
699		return ERR_PTR(err);
700
701	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
702	if (IS_ERR(index)) {
703		err = PTR_ERR(index);
704		if (err == -ENOENT) {
705			index = NULL;
706			goto out;
707		}
708		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
709				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
710				    d_inode(origin)->i_ino, name.len, name.name,
711				    err);
712		goto out;
713	}
714
715	inode = d_inode(index);
716	if (ovl_is_whiteout(index) && !verify) {
717		/*
718		 * When index lookup is called with !verify for decoding an
719		 * overlay file handle, a whiteout index implies that decode
720		 * should treat file handle as stale and no need to print a
721		 * warning about it.
722		 */
723		dput(index);
724		index = ERR_PTR(-ESTALE);
725		goto out;
726	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
727		   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
728		/*
729		 * Index should always be of the same file type as origin
730		 * except for the case of a whiteout index. A whiteout
731		 * index should only exist if all lower aliases have been
732		 * unlinked, which means that finding a lower origin on lookup
733		 * whose index is a whiteout should be treated as an error.
734		 */
735		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
736				    index, d_inode(index)->i_mode & S_IFMT,
737				    d_inode(origin)->i_mode & S_IFMT);
738		goto fail;
739	} else if (is_dir && verify) {
740		if (!upper) {
741			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
742					    origin, index);
743			goto fail;
744		}
745
746		/* Verify that dir index 'upper' xattr points to upper dir */
747		err = ovl_verify_upper(ofs, index, upper, false);
748		if (err) {
749			if (err == -ESTALE) {
750				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
751						    upper, origin, index);
752			}
753			goto fail;
754		}
755	} else if (upper && d_inode(upper) != inode) {
756		goto out_dput;
757	}
758out:
759	kfree(name.name);
760	return index;
761
762out_dput:
763	dput(index);
764	index = NULL;
765	goto out;
766
767fail:
768	dput(index);
769	index = ERR_PTR(-EIO);
770	goto out;
771}
772
773/*
774 * Returns next layer in stack starting from top.
775 * Returns -1 if this is the last layer.
776 */
777int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
778{
779	struct ovl_entry *oe = dentry->d_fsdata;
780
781	BUG_ON(idx < 0);
782	if (idx == 0) {
783		ovl_path_upper(dentry, path);
784		if (path->dentry)
785			return oe->numlower ? 1 : -1;
786		idx++;
787	}
788	BUG_ON(idx > oe->numlower);
789	path->dentry = oe->lowerstack[idx - 1].dentry;
790	path->mnt = oe->lowerstack[idx - 1].layer->mnt;
791
792	return (idx < oe->numlower) ? idx + 1 : -1;
793}
794
795/* Fix missing 'origin' xattr */
796static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
797			  struct dentry *lower, struct dentry *upper)
798{
799	int err;
800
801	if (ovl_check_origin_xattr(ofs, upper))
802		return 0;
803
804	err = ovl_want_write(dentry);
805	if (err)
806		return err;
807
808	err = ovl_set_origin(dentry, lower, upper);
809	if (!err)
810		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
811
812	ovl_drop_write(dentry);
813	return err;
814}
815
816struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
817			  unsigned int flags)
818{
819	struct ovl_entry *oe;
820	const struct cred *old_cred;
821	struct ovl_fs *ofs = dentry->d_sb->s_fs_info;
822	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
823	struct ovl_entry *roe = dentry->d_sb->s_root->d_fsdata;
824	struct ovl_path *stack = NULL, *origin_path = NULL;
825	struct dentry *upperdir, *upperdentry = NULL;
826	struct dentry *origin = NULL;
827	struct dentry *index = NULL;
828	unsigned int ctr = 0;
829	struct inode *inode = NULL;
830	bool upperopaque = false;
831	char *upperredirect = NULL;
832	struct dentry *this;
833	unsigned int i;
834	int err;
835	bool uppermetacopy = false;
836	struct ovl_lookup_data d = {
837		.sb = dentry->d_sb,
838		.name = dentry->d_name,
839		.is_dir = false,
840		.opaque = false,
841		.stop = false,
842		.last = ofs->config.redirect_follow ? false : !poe->numlower,
843		.redirect = NULL,
844		.metacopy = false,
845	};
846
847	if (dentry->d_name.len > ofs->namelen)
848		return ERR_PTR(-ENAMETOOLONG);
849
850	old_cred = ovl_override_creds(dentry->d_sb);
851	upperdir = ovl_dentry_upper(dentry->d_parent);
852	if (upperdir) {
853		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
854		if (err)
855			goto out;
856
857		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
858			dput(upperdentry);
859			err = -EREMOTE;
860			goto out;
861		}
862		if (upperdentry && !d.is_dir) {
863			/*
864			 * Lookup copy up origin by decoding origin file handle.
865			 * We may get a disconnected dentry, which is fine,
866			 * because we only need to hold the origin inode in
867			 * cache and use its inode number.  We may even get a
868			 * connected dentry, that is not under any of the lower
869			 * layers root.  That is also fine for using it's inode
870			 * number - it's the same as if we held a reference
871			 * to a dentry in lower layer that was moved under us.
872			 */
873			err = ovl_check_origin(ofs, upperdentry, &origin_path);
874			if (err)
875				goto out_put_upper;
876
877			if (d.metacopy)
878				uppermetacopy = true;
879		}
880
881		if (d.redirect) {
882			err = -ENOMEM;
883			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
884			if (!upperredirect)
885				goto out_put_upper;
886			if (d.redirect[0] == '/')
887				poe = roe;
888		}
889		upperopaque = d.opaque;
890	}
891
892	if (!d.stop && poe->numlower) {
893		err = -ENOMEM;
894		stack = kcalloc(ofs->numlayer - 1, sizeof(struct ovl_path),
895				GFP_KERNEL);
896		if (!stack)
897			goto out_put_upper;
898	}
899
900	for (i = 0; !d.stop && i < poe->numlower; i++) {
901		struct ovl_path lower = poe->lowerstack[i];
902
903		if (!ofs->config.redirect_follow)
904			d.last = i == poe->numlower - 1;
905		else
906			d.last = lower.layer->idx == roe->numlower;
907
908		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
909		if (err)
910			goto out_put;
911
912		if (!this)
913			continue;
914
915		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
916			dput(this);
917			err = -EPERM;
918			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
919			goto out_put;
920		}
921
922		/*
923		 * If no origin fh is stored in upper of a merge dir, store fh
924		 * of lower dir and set upper parent "impure".
925		 */
926		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
927			err = ovl_fix_origin(ofs, dentry, this, upperdentry);
928			if (err) {
929				dput(this);
930				goto out_put;
931			}
932		}
933
934		/*
935		 * When "verify_lower" feature is enabled, do not merge with a
936		 * lower dir that does not match a stored origin xattr. In any
937		 * case, only verified origin is used for index lookup.
938		 *
939		 * For non-dir dentry, if index=on, then ensure origin
940		 * matches the dentry found using path based lookup,
941		 * otherwise error out.
942		 */
943		if (upperdentry && !ctr &&
944		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
945		     (!d.is_dir && ofs->config.index && origin_path))) {
946			err = ovl_verify_origin(ofs, upperdentry, this, false);
947			if (err) {
948				dput(this);
949				if (d.is_dir)
950					break;
951				goto out_put;
952			}
953			origin = this;
954		}
955
956		if (d.metacopy && ctr) {
957			/*
958			 * Do not store intermediate metacopy dentries in
959			 * lower chain, except top most lower metacopy dentry.
960			 * Continue the loop so that if there is an absolute
961			 * redirect on this dentry, poe can be reset to roe.
962			 */
963			dput(this);
964			this = NULL;
965		} else {
966			stack[ctr].dentry = this;
967			stack[ctr].layer = lower.layer;
968			ctr++;
969		}
970
971		/*
972		 * Following redirects can have security consequences: it's like
973		 * a symlink into the lower layer without the permission checks.
974		 * This is only a problem if the upper layer is untrusted (e.g
975		 * comes from an USB drive).  This can allow a non-readable file
976		 * or directory to become readable.
977		 *
978		 * Only following redirects when redirects are enabled disables
979		 * this attack vector when not necessary.
980		 */
981		err = -EPERM;
982		if (d.redirect && !ofs->config.redirect_follow) {
983			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
984					    dentry);
985			goto out_put;
986		}
987
988		if (d.stop)
989			break;
990
991		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
992			poe = roe;
993			/* Find the current layer on the root dentry */
994			i = lower.layer->idx - 1;
995		}
996	}
997
998	/*
999	 * For regular non-metacopy upper dentries, there is no lower
1000	 * path based lookup, hence ctr will be zero. If a dentry is found
1001	 * using ORIGIN xattr on upper, install it in stack.
1002	 *
1003	 * For metacopy dentry, path based lookup will find lower dentries.
1004	 * Just make sure a corresponding data dentry has been found.
1005	 */
1006	if (d.metacopy || (uppermetacopy && !ctr)) {
1007		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1008				    dentry);
1009		err = -EIO;
1010		goto out_put;
1011	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1012		if (WARN_ON(stack != NULL)) {
1013			err = -EIO;
1014			goto out_put;
1015		}
1016		stack = origin_path;
1017		ctr = 1;
1018		origin = origin_path->dentry;
1019		origin_path = NULL;
1020	}
1021
1022	/*
1023	 * Always lookup index if there is no-upperdentry.
1024	 *
1025	 * For the case of upperdentry, we have set origin by now if it
1026	 * needed to be set. There are basically three cases.
1027	 *
1028	 * For directories, lookup index by lower inode and verify it matches
1029	 * upper inode. We only trust dir index if we verified that lower dir
1030	 * matches origin, otherwise dir index entries may be inconsistent
1031	 * and we ignore them.
1032	 *
1033	 * For regular upper, we already set origin if upper had ORIGIN
1034	 * xattr. There is no verification though as there is no path
1035	 * based dentry lookup in lower in this case.
1036	 *
1037	 * For metacopy upper, we set a verified origin already if index
1038	 * is enabled and if upper had an ORIGIN xattr.
1039	 *
1040	 */
1041	if (!upperdentry && ctr)
1042		origin = stack[0].dentry;
1043
1044	if (origin && ovl_indexdir(dentry->d_sb) &&
1045	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1046		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1047		if (IS_ERR(index)) {
1048			err = PTR_ERR(index);
1049			index = NULL;
1050			goto out_put;
1051		}
1052	}
1053
1054	oe = ovl_alloc_entry(ctr);
1055	err = -ENOMEM;
1056	if (!oe)
1057		goto out_put;
1058
1059	memcpy(oe->lowerstack, stack, sizeof(struct ovl_path) * ctr);
1060	dentry->d_fsdata = oe;
1061
1062	if (upperopaque)
1063		ovl_dentry_set_opaque(dentry);
1064
1065	if (upperdentry)
1066		ovl_dentry_set_upper_alias(dentry);
1067	else if (index) {
1068		upperdentry = dget(index);
1069		upperredirect = ovl_get_redirect_xattr(ofs, upperdentry, 0);
1070		if (IS_ERR(upperredirect)) {
1071			err = PTR_ERR(upperredirect);
1072			upperredirect = NULL;
1073			goto out_free_oe;
1074		}
1075		err = ovl_check_metacopy_xattr(ofs, upperdentry);
1076		if (err < 0)
1077			goto out_free_oe;
1078		uppermetacopy = err;
1079	}
1080
1081	if (upperdentry || ctr) {
1082		struct ovl_inode_params oip = {
1083			.upperdentry = upperdentry,
1084			.lowerpath = stack,
1085			.index = index,
1086			.numlower = ctr,
1087			.redirect = upperredirect,
1088			.lowerdata = (ctr > 1 && !d.is_dir) ?
1089				      stack[ctr - 1].dentry : NULL,
1090		};
1091
1092		inode = ovl_get_inode(dentry->d_sb, &oip);
1093		err = PTR_ERR(inode);
1094		if (IS_ERR(inode))
1095			goto out_free_oe;
1096		if (upperdentry && !uppermetacopy)
1097			ovl_set_flag(OVL_UPPERDATA, inode);
1098	}
1099
1100	ovl_dentry_init_reval(dentry, upperdentry);
1101
1102	revert_creds(old_cred);
1103	if (origin_path) {
1104		dput(origin_path->dentry);
1105		kfree(origin_path);
1106	}
1107	dput(index);
1108	kfree(stack);
1109	kfree(d.redirect);
1110	return d_splice_alias(inode, dentry);
1111
1112out_free_oe:
1113	dentry->d_fsdata = NULL;
1114	kfree(oe);
1115out_put:
1116	dput(index);
1117	for (i = 0; i < ctr; i++)
1118		dput(stack[i].dentry);
1119	kfree(stack);
1120out_put_upper:
1121	if (origin_path) {
1122		dput(origin_path->dentry);
1123		kfree(origin_path);
1124	}
1125	dput(upperdentry);
1126	kfree(upperredirect);
1127out:
1128	kfree(d.redirect);
1129	revert_creds(old_cred);
1130	return ERR_PTR(err);
1131}
1132
1133bool ovl_lower_positive(struct dentry *dentry)
1134{
1135	struct ovl_entry *poe = dentry->d_parent->d_fsdata;
1136	const struct qstr *name = &dentry->d_name;
1137	const struct cred *old_cred;
1138	unsigned int i;
1139	bool positive = false;
1140	bool done = false;
1141
1142	/*
1143	 * If dentry is negative, then lower is positive iff this is a
1144	 * whiteout.
1145	 */
1146	if (!dentry->d_inode)
1147		return ovl_dentry_is_opaque(dentry);
1148
1149	/* Negative upper -> positive lower */
1150	if (!ovl_dentry_upper(dentry))
1151		return true;
1152
1153	old_cred = ovl_override_creds(dentry->d_sb);
1154	/* Positive upper -> have to look up lower to see whether it exists */
1155	for (i = 0; !done && !positive && i < poe->numlower; i++) {
1156		struct dentry *this;
1157		struct dentry *lowerdir = poe->lowerstack[i].dentry;
1158
1159		this = lookup_positive_unlocked(name->name, lowerdir,
1160					       name->len);
1161		if (IS_ERR(this)) {
1162			switch (PTR_ERR(this)) {
1163			case -ENOENT:
1164			case -ENAMETOOLONG:
1165				break;
1166
1167			default:
1168				/*
1169				 * Assume something is there, we just couldn't
1170				 * access it.
1171				 */
1172				positive = true;
1173				break;
1174			}
1175		} else {
1176			positive = !ovl_is_whiteout(this);
1177			done = true;
1178			dput(this);
1179		}
1180	}
1181	revert_creds(old_cred);
1182
1183	return positive;
1184}
1185