xref: /kernel/linux/linux-6.6/fs/overlayfs/namei.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2011 Novell Inc.
4 * Copyright (C) 2016 Red Hat, Inc.
5 */
6
7#include <linux/fs.h>
8#include <linux/cred.h>
9#include <linux/ctype.h>
10#include <linux/namei.h>
11#include <linux/xattr.h>
12#include <linux/ratelimit.h>
13#include <linux/mount.h>
14#include <linux/exportfs.h>
15#include "overlayfs.h"
16
17#include "../internal.h"	/* for vfs_path_lookup */
18
19struct ovl_lookup_data {
20	struct super_block *sb;
21	struct vfsmount *mnt;
22	struct qstr name;
23	bool is_dir;
24	bool opaque;
25	bool stop;
26	bool last;
27	char *redirect;
28	int metacopy;
29	/* Referring to last redirect xattr */
30	bool absolute_redirect;
31};
32
33static int ovl_check_redirect(const struct path *path, struct ovl_lookup_data *d,
34			      size_t prelen, const char *post)
35{
36	int res;
37	char *buf;
38	struct ovl_fs *ofs = OVL_FS(d->sb);
39
40	d->absolute_redirect = false;
41	buf = ovl_get_redirect_xattr(ofs, path, prelen + strlen(post));
42	if (IS_ERR_OR_NULL(buf))
43		return PTR_ERR(buf);
44
45	if (buf[0] == '/') {
46		d->absolute_redirect = true;
47		/*
48		 * One of the ancestor path elements in an absolute path
49		 * lookup in ovl_lookup_layer() could have been opaque and
50		 * that will stop further lookup in lower layers (d->stop=true)
51		 * But we have found an absolute redirect in descendant path
52		 * element and that should force continue lookup in lower
53		 * layers (reset d->stop).
54		 */
55		d->stop = false;
56	} else {
57		res = strlen(buf) + 1;
58		memmove(buf + prelen, buf, res);
59		memcpy(buf, d->name.name, prelen);
60	}
61
62	strcat(buf, post);
63	kfree(d->redirect);
64	d->redirect = buf;
65	d->name.name = d->redirect;
66	d->name.len = strlen(d->redirect);
67
68	return 0;
69}
70
71static int ovl_acceptable(void *ctx, struct dentry *dentry)
72{
73	/*
74	 * A non-dir origin may be disconnected, which is fine, because
75	 * we only need it for its unique inode number.
76	 */
77	if (!d_is_dir(dentry))
78		return 1;
79
80	/* Don't decode a deleted empty directory */
81	if (d_unhashed(dentry))
82		return 0;
83
84	/* Check if directory belongs to the layer we are decoding from */
85	return is_subdir(dentry, ((struct vfsmount *)ctx)->mnt_root);
86}
87
88/*
89 * Check validity of an overlay file handle buffer.
90 *
91 * Return 0 for a valid file handle.
92 * Return -ENODATA for "origin unknown".
93 * Return <0 for an invalid file handle.
94 */
95int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
96{
97	if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
98		return -EINVAL;
99
100	if (fb->magic != OVL_FH_MAGIC)
101		return -EINVAL;
102
103	/* Treat larger version and unknown flags as "origin unknown" */
104	if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
105		return -ENODATA;
106
107	/* Treat endianness mismatch as "origin unknown" */
108	if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
109	    (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
110		return -ENODATA;
111
112	return 0;
113}
114
115static struct ovl_fh *ovl_get_fh(struct ovl_fs *ofs, struct dentry *upperdentry,
116				 enum ovl_xattr ox)
117{
118	int res, err;
119	struct ovl_fh *fh = NULL;
120
121	res = ovl_getxattr_upper(ofs, upperdentry, ox, NULL, 0);
122	if (res < 0) {
123		if (res == -ENODATA || res == -EOPNOTSUPP)
124			return NULL;
125		goto fail;
126	}
127	/* Zero size value means "copied up but origin unknown" */
128	if (res == 0)
129		return NULL;
130
131	fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
132	if (!fh)
133		return ERR_PTR(-ENOMEM);
134
135	res = ovl_getxattr_upper(ofs, upperdentry, ox, fh->buf, res);
136	if (res < 0)
137		goto fail;
138
139	err = ovl_check_fb_len(&fh->fb, res);
140	if (err < 0) {
141		if (err == -ENODATA)
142			goto out;
143		goto invalid;
144	}
145
146	return fh;
147
148out:
149	kfree(fh);
150	return NULL;
151
152fail:
153	pr_warn_ratelimited("failed to get origin (%i)\n", res);
154	goto out;
155invalid:
156	pr_warn_ratelimited("invalid origin (%*phN)\n", res, fh);
157	goto out;
158}
159
160struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
161				  struct vfsmount *mnt, bool connected)
162{
163	struct dentry *real;
164	int bytes;
165
166	if (!capable(CAP_DAC_READ_SEARCH))
167		return NULL;
168
169	/*
170	 * Make sure that the stored uuid matches the uuid of the lower
171	 * layer where file handle will be decoded.
172	 * In case of uuid=off option just make sure that stored uuid is null.
173	 */
174	if (ovl_origin_uuid(ofs) ?
175	    !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
176	    !uuid_is_null(&fh->fb.uuid))
177		return NULL;
178
179	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
180	real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
181				  bytes >> 2, (int)fh->fb.type,
182				  connected ? ovl_acceptable : NULL, mnt);
183	if (IS_ERR(real)) {
184		/*
185		 * Treat stale file handle to lower file as "origin unknown".
186		 * upper file handle could become stale when upper file is
187		 * unlinked and this information is needed to handle stale
188		 * index entries correctly.
189		 */
190		if (real == ERR_PTR(-ESTALE) &&
191		    !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
192			real = NULL;
193		return real;
194	}
195
196	if (ovl_dentry_weird(real)) {
197		dput(real);
198		return NULL;
199	}
200
201	return real;
202}
203
204static bool ovl_is_opaquedir(struct ovl_fs *ofs, const struct path *path)
205{
206	return ovl_path_check_dir_xattr(ofs, path, OVL_XATTR_OPAQUE);
207}
208
209static struct dentry *ovl_lookup_positive_unlocked(struct ovl_lookup_data *d,
210						   const char *name,
211						   struct dentry *base, int len,
212						   bool drop_negative)
213{
214	struct dentry *ret = lookup_one_unlocked(mnt_idmap(d->mnt), name, base, len);
215
216	if (!IS_ERR(ret) && d_flags_negative(smp_load_acquire(&ret->d_flags))) {
217		if (drop_negative && ret->d_lockref.count == 1) {
218			spin_lock(&ret->d_lock);
219			/* Recheck condition under lock */
220			if (d_is_negative(ret) && ret->d_lockref.count == 1)
221				__d_drop(ret);
222			spin_unlock(&ret->d_lock);
223		}
224		dput(ret);
225		ret = ERR_PTR(-ENOENT);
226	}
227	return ret;
228}
229
230static int ovl_lookup_single(struct dentry *base, struct ovl_lookup_data *d,
231			     const char *name, unsigned int namelen,
232			     size_t prelen, const char *post,
233			     struct dentry **ret, bool drop_negative)
234{
235	struct dentry *this;
236	struct path path;
237	int err;
238	bool last_element = !post[0];
239
240	this = ovl_lookup_positive_unlocked(d, name, base, namelen, drop_negative);
241	if (IS_ERR(this)) {
242		err = PTR_ERR(this);
243		this = NULL;
244		if (err == -ENOENT || err == -ENAMETOOLONG)
245			goto out;
246		goto out_err;
247	}
248
249	if (ovl_dentry_weird(this)) {
250		/* Don't support traversing automounts and other weirdness */
251		err = -EREMOTE;
252		goto out_err;
253	}
254	if (ovl_is_whiteout(this)) {
255		d->stop = d->opaque = true;
256		goto put_and_out;
257	}
258	/*
259	 * This dentry should be a regular file if previous layer lookup
260	 * found a metacopy dentry.
261	 */
262	if (last_element && d->metacopy && !d_is_reg(this)) {
263		d->stop = true;
264		goto put_and_out;
265	}
266
267	path.dentry = this;
268	path.mnt = d->mnt;
269	if (!d_can_lookup(this)) {
270		if (d->is_dir || !last_element) {
271			d->stop = true;
272			goto put_and_out;
273		}
274		err = ovl_check_metacopy_xattr(OVL_FS(d->sb), &path, NULL);
275		if (err < 0)
276			goto out_err;
277
278		d->metacopy = err;
279		d->stop = !d->metacopy;
280		if (!d->metacopy || d->last)
281			goto out;
282	} else {
283		if (ovl_lookup_trap_inode(d->sb, this)) {
284			/* Caught in a trap of overlapping layers */
285			err = -ELOOP;
286			goto out_err;
287		}
288
289		if (last_element)
290			d->is_dir = true;
291		if (d->last)
292			goto out;
293
294		if (ovl_is_opaquedir(OVL_FS(d->sb), &path)) {
295			d->stop = true;
296			if (last_element)
297				d->opaque = true;
298			goto out;
299		}
300	}
301	err = ovl_check_redirect(&path, d, prelen, post);
302	if (err)
303		goto out_err;
304out:
305	*ret = this;
306	return 0;
307
308put_and_out:
309	dput(this);
310	this = NULL;
311	goto out;
312
313out_err:
314	dput(this);
315	return err;
316}
317
318static int ovl_lookup_layer(struct dentry *base, struct ovl_lookup_data *d,
319			    struct dentry **ret, bool drop_negative)
320{
321	/* Counting down from the end, since the prefix can change */
322	size_t rem = d->name.len - 1;
323	struct dentry *dentry = NULL;
324	int err;
325
326	if (d->name.name[0] != '/')
327		return ovl_lookup_single(base, d, d->name.name, d->name.len,
328					 0, "", ret, drop_negative);
329
330	while (!IS_ERR_OR_NULL(base) && d_can_lookup(base)) {
331		const char *s = d->name.name + d->name.len - rem;
332		const char *next = strchrnul(s, '/');
333		size_t thislen = next - s;
334		bool end = !next[0];
335
336		/* Verify we did not go off the rails */
337		if (WARN_ON(s[-1] != '/'))
338			return -EIO;
339
340		err = ovl_lookup_single(base, d, s, thislen,
341					d->name.len - rem, next, &base,
342					drop_negative);
343		dput(dentry);
344		if (err)
345			return err;
346		dentry = base;
347		if (end)
348			break;
349
350		rem -= thislen + 1;
351
352		if (WARN_ON(rem >= d->name.len))
353			return -EIO;
354	}
355	*ret = dentry;
356	return 0;
357}
358
359static int ovl_lookup_data_layer(struct dentry *dentry, const char *redirect,
360				 const struct ovl_layer *layer,
361				 struct path *datapath)
362{
363	int err;
364
365	err = vfs_path_lookup(layer->mnt->mnt_root, layer->mnt, redirect,
366			LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | LOOKUP_NO_XDEV,
367			datapath);
368	pr_debug("lookup lowerdata (%pd2, redirect=\"%s\", layer=%d, err=%i)\n",
369		 dentry, redirect, layer->idx, err);
370
371	if (err)
372		return err;
373
374	err = -EREMOTE;
375	if (ovl_dentry_weird(datapath->dentry))
376		goto out_path_put;
377
378	err = -ENOENT;
379	/* Only regular file is acceptable as lower data */
380	if (!d_is_reg(datapath->dentry))
381		goto out_path_put;
382
383	return 0;
384
385out_path_put:
386	path_put(datapath);
387
388	return err;
389}
390
391/* Lookup in data-only layers by absolute redirect to layer root */
392static int ovl_lookup_data_layers(struct dentry *dentry, const char *redirect,
393				  struct ovl_path *lowerdata)
394{
395	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
396	const struct ovl_layer *layer;
397	struct path datapath;
398	int err = -ENOENT;
399	int i;
400
401	layer = &ofs->layers[ofs->numlayer - ofs->numdatalayer];
402	for (i = 0; i < ofs->numdatalayer; i++, layer++) {
403		err = ovl_lookup_data_layer(dentry, redirect, layer, &datapath);
404		if (!err) {
405			mntput(datapath.mnt);
406			lowerdata->dentry = datapath.dentry;
407			lowerdata->layer = layer;
408			return 0;
409		}
410	}
411
412	return err;
413}
414
415int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
416			struct dentry *upperdentry, struct ovl_path **stackp)
417{
418	struct dentry *origin = NULL;
419	int i;
420
421	for (i = 1; i <= ovl_numlowerlayer(ofs); i++) {
422		/*
423		 * If lower fs uuid is not unique among lower fs we cannot match
424		 * fh->uuid to layer.
425		 */
426		if (ofs->layers[i].fsid &&
427		    ofs->layers[i].fs->bad_uuid)
428			continue;
429
430		origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
431					    connected);
432		if (origin)
433			break;
434	}
435
436	if (!origin)
437		return -ESTALE;
438	else if (IS_ERR(origin))
439		return PTR_ERR(origin);
440
441	if (upperdentry && !ovl_is_whiteout(upperdentry) &&
442	    inode_wrong_type(d_inode(upperdentry), d_inode(origin)->i_mode))
443		goto invalid;
444
445	if (!*stackp)
446		*stackp = kmalloc(sizeof(struct ovl_path), GFP_KERNEL);
447	if (!*stackp) {
448		dput(origin);
449		return -ENOMEM;
450	}
451	**stackp = (struct ovl_path){
452		.dentry = origin,
453		.layer = &ofs->layers[i]
454	};
455
456	return 0;
457
458invalid:
459	pr_warn_ratelimited("invalid origin (%pd2, ftype=%x, origin ftype=%x).\n",
460			    upperdentry, d_inode(upperdentry)->i_mode & S_IFMT,
461			    d_inode(origin)->i_mode & S_IFMT);
462	dput(origin);
463	return -ESTALE;
464}
465
466static int ovl_check_origin(struct ovl_fs *ofs, struct dentry *upperdentry,
467			    struct ovl_path **stackp)
468{
469	struct ovl_fh *fh = ovl_get_fh(ofs, upperdentry, OVL_XATTR_ORIGIN);
470	int err;
471
472	if (IS_ERR_OR_NULL(fh))
473		return PTR_ERR(fh);
474
475	err = ovl_check_origin_fh(ofs, fh, false, upperdentry, stackp);
476	kfree(fh);
477
478	if (err) {
479		if (err == -ESTALE)
480			return 0;
481		return err;
482	}
483
484	return 0;
485}
486
487/*
488 * Verify that @fh matches the file handle stored in xattr @name.
489 * Return 0 on match, -ESTALE on mismatch, < 0 on error.
490 */
491static int ovl_verify_fh(struct ovl_fs *ofs, struct dentry *dentry,
492			 enum ovl_xattr ox, const struct ovl_fh *fh)
493{
494	struct ovl_fh *ofh = ovl_get_fh(ofs, dentry, ox);
495	int err = 0;
496
497	if (!ofh)
498		return -ENODATA;
499
500	if (IS_ERR(ofh))
501		return PTR_ERR(ofh);
502
503	if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
504		err = -ESTALE;
505
506	kfree(ofh);
507	return err;
508}
509
510/*
511 * Verify that @real dentry matches the file handle stored in xattr @name.
512 *
513 * If @set is true and there is no stored file handle, encode @real and store
514 * file handle in xattr @name.
515 *
516 * Return 0 on match, -ESTALE on mismatch, -ENODATA on no xattr, < 0 on error.
517 */
518int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
519		      enum ovl_xattr ox, struct dentry *real, bool is_upper,
520		      bool set)
521{
522	struct inode *inode;
523	struct ovl_fh *fh;
524	int err;
525
526	fh = ovl_encode_real_fh(ofs, real, is_upper);
527	err = PTR_ERR(fh);
528	if (IS_ERR(fh)) {
529		fh = NULL;
530		goto fail;
531	}
532
533	err = ovl_verify_fh(ofs, dentry, ox, fh);
534	if (set && err == -ENODATA)
535		err = ovl_setxattr(ofs, dentry, ox, fh->buf, fh->fb.len);
536	if (err)
537		goto fail;
538
539out:
540	kfree(fh);
541	return err;
542
543fail:
544	inode = d_inode(real);
545	pr_warn_ratelimited("failed to verify %s (%pd2, ino=%lu, err=%i)\n",
546			    is_upper ? "upper" : "origin", real,
547			    inode ? inode->i_ino : 0, err);
548	goto out;
549}
550
551/* Get upper dentry from index */
552struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index,
553			       bool connected)
554{
555	struct ovl_fh *fh;
556	struct dentry *upper;
557
558	if (!d_is_dir(index))
559		return dget(index);
560
561	fh = ovl_get_fh(ofs, index, OVL_XATTR_UPPER);
562	if (IS_ERR_OR_NULL(fh))
563		return ERR_CAST(fh);
564
565	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), connected);
566	kfree(fh);
567
568	if (IS_ERR_OR_NULL(upper))
569		return upper ?: ERR_PTR(-ESTALE);
570
571	if (!d_is_dir(upper)) {
572		pr_warn_ratelimited("invalid index upper (%pd2, upper=%pd2).\n",
573				    index, upper);
574		dput(upper);
575		return ERR_PTR(-EIO);
576	}
577
578	return upper;
579}
580
581/*
582 * Verify that an index entry name matches the origin file handle stored in
583 * OVL_XATTR_ORIGIN and that origin file handle can be decoded to lower path.
584 * Return 0 on match, -ESTALE on mismatch or stale origin, < 0 on error.
585 */
586int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
587{
588	struct ovl_fh *fh = NULL;
589	size_t len;
590	struct ovl_path origin = { };
591	struct ovl_path *stack = &origin;
592	struct dentry *upper = NULL;
593	int err;
594
595	if (!d_inode(index))
596		return 0;
597
598	err = -EINVAL;
599	if (index->d_name.len < sizeof(struct ovl_fb)*2)
600		goto fail;
601
602	err = -ENOMEM;
603	len = index->d_name.len / 2;
604	fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
605	if (!fh)
606		goto fail;
607
608	err = -EINVAL;
609	if (hex2bin(fh->buf, index->d_name.name, len))
610		goto fail;
611
612	err = ovl_check_fb_len(&fh->fb, len);
613	if (err)
614		goto fail;
615
616	/*
617	 * Whiteout index entries are used as an indication that an exported
618	 * overlay file handle should be treated as stale (i.e. after unlink
619	 * of the overlay inode). These entries contain no origin xattr.
620	 */
621	if (ovl_is_whiteout(index))
622		goto out;
623
624	/*
625	 * Verifying directory index entries are not stale is expensive, so
626	 * only verify stale dir index if NFS export is enabled.
627	 */
628	if (d_is_dir(index) && !ofs->config.nfs_export)
629		goto out;
630
631	/*
632	 * Directory index entries should have 'upper' xattr pointing to the
633	 * real upper dir. Non-dir index entries are hardlinks to the upper
634	 * real inode. For non-dir index, we can read the copy up origin xattr
635	 * directly from the index dentry, but for dir index we first need to
636	 * decode the upper directory.
637	 */
638	upper = ovl_index_upper(ofs, index, false);
639	if (IS_ERR_OR_NULL(upper)) {
640		err = PTR_ERR(upper);
641		/*
642		 * Directory index entries with no 'upper' xattr need to be
643		 * removed. When dir index entry has a stale 'upper' xattr,
644		 * we assume that upper dir was removed and we treat the dir
645		 * index as orphan entry that needs to be whited out.
646		 */
647		if (err == -ESTALE)
648			goto orphan;
649		else if (!err)
650			err = -ESTALE;
651		goto fail;
652	}
653
654	err = ovl_verify_fh(ofs, upper, OVL_XATTR_ORIGIN, fh);
655	dput(upper);
656	if (err)
657		goto fail;
658
659	/* Check if non-dir index is orphan and don't warn before cleaning it */
660	if (!d_is_dir(index) && d_inode(index)->i_nlink == 1) {
661		err = ovl_check_origin_fh(ofs, fh, false, index, &stack);
662		if (err)
663			goto fail;
664
665		if (ovl_get_nlink(ofs, origin.dentry, index, 0) == 0)
666			goto orphan;
667	}
668
669out:
670	dput(origin.dentry);
671	kfree(fh);
672	return err;
673
674fail:
675	pr_warn_ratelimited("failed to verify index (%pd2, ftype=%x, err=%i)\n",
676			    index, d_inode(index)->i_mode & S_IFMT, err);
677	goto out;
678
679orphan:
680	pr_warn_ratelimited("orphan index entry (%pd2, ftype=%x, nlink=%u)\n",
681			    index, d_inode(index)->i_mode & S_IFMT,
682			    d_inode(index)->i_nlink);
683	err = -ENOENT;
684	goto out;
685}
686
687static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
688{
689	char *n, *s;
690
691	n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
692	if (!n)
693		return -ENOMEM;
694
695	s  = bin2hex(n, fh->buf, fh->fb.len);
696	*name = (struct qstr) QSTR_INIT(n, s - n);
697
698	return 0;
699
700}
701
702/*
703 * Lookup in indexdir for the index entry of a lower real inode or a copy up
704 * origin inode. The index entry name is the hex representation of the lower
705 * inode file handle.
706 *
707 * If the index dentry in negative, then either no lower aliases have been
708 * copied up yet, or aliases have been copied up in older kernels and are
709 * not indexed.
710 *
711 * If the index dentry for a copy up origin inode is positive, but points
712 * to an inode different than the upper inode, then either the upper inode
713 * has been copied up and not indexed or it was indexed, but since then
714 * index dir was cleared. Either way, that index cannot be used to identify
715 * the overlay inode.
716 */
717int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
718		       struct qstr *name)
719{
720	struct ovl_fh *fh;
721	int err;
722
723	fh = ovl_encode_real_fh(ofs, origin, false);
724	if (IS_ERR(fh))
725		return PTR_ERR(fh);
726
727	err = ovl_get_index_name_fh(fh, name);
728
729	kfree(fh);
730	return err;
731}
732
733/* Lookup index by file handle for NFS export */
734struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh)
735{
736	struct dentry *index;
737	struct qstr name;
738	int err;
739
740	err = ovl_get_index_name_fh(fh, &name);
741	if (err)
742		return ERR_PTR(err);
743
744	index = lookup_positive_unlocked(name.name, ofs->indexdir, name.len);
745	kfree(name.name);
746	if (IS_ERR(index)) {
747		if (PTR_ERR(index) == -ENOENT)
748			index = NULL;
749		return index;
750	}
751
752	if (ovl_is_whiteout(index))
753		err = -ESTALE;
754	else if (ovl_dentry_weird(index))
755		err = -EIO;
756	else
757		return index;
758
759	dput(index);
760	return ERR_PTR(err);
761}
762
763struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
764				struct dentry *origin, bool verify)
765{
766	struct dentry *index;
767	struct inode *inode;
768	struct qstr name;
769	bool is_dir = d_is_dir(origin);
770	int err;
771
772	err = ovl_get_index_name(ofs, origin, &name);
773	if (err)
774		return ERR_PTR(err);
775
776	index = lookup_one_positive_unlocked(ovl_upper_mnt_idmap(ofs), name.name,
777					     ofs->indexdir, name.len);
778	if (IS_ERR(index)) {
779		err = PTR_ERR(index);
780		if (err == -ENOENT) {
781			index = NULL;
782			goto out;
783		}
784		pr_warn_ratelimited("failed inode index lookup (ino=%lu, key=%.*s, err=%i);\n"
785				    "overlayfs: mount with '-o index=off' to disable inodes index.\n",
786				    d_inode(origin)->i_ino, name.len, name.name,
787				    err);
788		goto out;
789	}
790
791	inode = d_inode(index);
792	if (ovl_is_whiteout(index) && !verify) {
793		/*
794		 * When index lookup is called with !verify for decoding an
795		 * overlay file handle, a whiteout index implies that decode
796		 * should treat file handle as stale and no need to print a
797		 * warning about it.
798		 */
799		dput(index);
800		index = ERR_PTR(-ESTALE);
801		goto out;
802	} else if (ovl_dentry_weird(index) || ovl_is_whiteout(index) ||
803		   inode_wrong_type(inode, d_inode(origin)->i_mode)) {
804		/*
805		 * Index should always be of the same file type as origin
806		 * except for the case of a whiteout index. A whiteout
807		 * index should only exist if all lower aliases have been
808		 * unlinked, which means that finding a lower origin on lookup
809		 * whose index is a whiteout should be treated as an error.
810		 */
811		pr_warn_ratelimited("bad index found (index=%pd2, ftype=%x, origin ftype=%x).\n",
812				    index, d_inode(index)->i_mode & S_IFMT,
813				    d_inode(origin)->i_mode & S_IFMT);
814		goto fail;
815	} else if (is_dir && verify) {
816		if (!upper) {
817			pr_warn_ratelimited("suspected uncovered redirected dir found (origin=%pd2, index=%pd2).\n",
818					    origin, index);
819			goto fail;
820		}
821
822		/* Verify that dir index 'upper' xattr points to upper dir */
823		err = ovl_verify_upper(ofs, index, upper, false);
824		if (err) {
825			if (err == -ESTALE) {
826				pr_warn_ratelimited("suspected multiply redirected dir found (upper=%pd2, origin=%pd2, index=%pd2).\n",
827						    upper, origin, index);
828			}
829			goto fail;
830		}
831	} else if (upper && d_inode(upper) != inode) {
832		goto out_dput;
833	}
834out:
835	kfree(name.name);
836	return index;
837
838out_dput:
839	dput(index);
840	index = NULL;
841	goto out;
842
843fail:
844	dput(index);
845	index = ERR_PTR(-EIO);
846	goto out;
847}
848
849/*
850 * Returns next layer in stack starting from top.
851 * Returns -1 if this is the last layer.
852 */
853int ovl_path_next(int idx, struct dentry *dentry, struct path *path)
854{
855	struct ovl_entry *oe = OVL_E(dentry);
856	struct ovl_path *lowerstack = ovl_lowerstack(oe);
857
858	BUG_ON(idx < 0);
859	if (idx == 0) {
860		ovl_path_upper(dentry, path);
861		if (path->dentry)
862			return ovl_numlower(oe) ? 1 : -1;
863		idx++;
864	}
865	BUG_ON(idx > ovl_numlower(oe));
866	path->dentry = lowerstack[idx - 1].dentry;
867	path->mnt = lowerstack[idx - 1].layer->mnt;
868
869	return (idx < ovl_numlower(oe)) ? idx + 1 : -1;
870}
871
872/* Fix missing 'origin' xattr */
873static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
874			  struct dentry *lower, struct dentry *upper)
875{
876	int err;
877
878	if (ovl_check_origin_xattr(ofs, upper))
879		return 0;
880
881	err = ovl_want_write(dentry);
882	if (err)
883		return err;
884
885	err = ovl_set_origin(ofs, lower, upper);
886	if (!err)
887		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
888
889	ovl_drop_write(dentry);
890	return err;
891}
892
893static int ovl_maybe_validate_verity(struct dentry *dentry)
894{
895	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
896	struct inode *inode = d_inode(dentry);
897	struct path datapath, metapath;
898	int err;
899
900	if (!ofs->config.verity_mode ||
901	    !ovl_is_metacopy_dentry(dentry) ||
902	    ovl_test_flag(OVL_VERIFIED_DIGEST, inode))
903		return 0;
904
905	if (!ovl_test_flag(OVL_HAS_DIGEST, inode)) {
906		if (ofs->config.verity_mode == OVL_VERITY_REQUIRE) {
907			pr_warn_ratelimited("metacopy file '%pd' has no digest specified\n",
908					    dentry);
909			return -EIO;
910		}
911		return 0;
912	}
913
914	ovl_path_lowerdata(dentry, &datapath);
915	if (!datapath.dentry)
916		return -EIO;
917
918	ovl_path_real(dentry, &metapath);
919	if (!metapath.dentry)
920		return -EIO;
921
922	err = ovl_inode_lock_interruptible(inode);
923	if (err)
924		return err;
925
926	if (!ovl_test_flag(OVL_VERIFIED_DIGEST, inode)) {
927		const struct cred *old_cred;
928
929		old_cred = ovl_override_creds(dentry->d_sb);
930
931		err = ovl_validate_verity(ofs, &metapath, &datapath);
932		if (err == 0)
933			ovl_set_flag(OVL_VERIFIED_DIGEST, inode);
934
935		revert_creds(old_cred);
936	}
937
938	ovl_inode_unlock(inode);
939
940	return err;
941}
942
943/* Lazy lookup of lowerdata */
944static int ovl_maybe_lookup_lowerdata(struct dentry *dentry)
945{
946	struct inode *inode = d_inode(dentry);
947	const char *redirect = ovl_lowerdata_redirect(inode);
948	struct ovl_path datapath = {};
949	const struct cred *old_cred;
950	int err;
951
952	if (!redirect || ovl_dentry_lowerdata(dentry))
953		return 0;
954
955	if (redirect[0] != '/')
956		return -EIO;
957
958	err = ovl_inode_lock_interruptible(inode);
959	if (err)
960		return err;
961
962	err = 0;
963	/* Someone got here before us? */
964	if (ovl_dentry_lowerdata(dentry))
965		goto out;
966
967	old_cred = ovl_override_creds(dentry->d_sb);
968	err = ovl_lookup_data_layers(dentry, redirect, &datapath);
969	revert_creds(old_cred);
970	if (err)
971		goto out_err;
972
973	err = ovl_dentry_set_lowerdata(dentry, &datapath);
974	if (err)
975		goto out_err;
976
977out:
978	ovl_inode_unlock(inode);
979	dput(datapath.dentry);
980
981	return err;
982
983out_err:
984	pr_warn_ratelimited("lazy lowerdata lookup failed (%pd2, err=%i)\n",
985			    dentry, err);
986	goto out;
987}
988
989int ovl_verify_lowerdata(struct dentry *dentry)
990{
991	int err;
992
993	err = ovl_maybe_lookup_lowerdata(dentry);
994	if (err)
995		return err;
996
997	return ovl_maybe_validate_verity(dentry);
998}
999
1000struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
1001			  unsigned int flags)
1002{
1003	struct ovl_entry *oe = NULL;
1004	const struct cred *old_cred;
1005	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
1006	struct ovl_entry *poe = OVL_E(dentry->d_parent);
1007	struct ovl_entry *roe = OVL_E(dentry->d_sb->s_root);
1008	struct ovl_path *stack = NULL, *origin_path = NULL;
1009	struct dentry *upperdir, *upperdentry = NULL;
1010	struct dentry *origin = NULL;
1011	struct dentry *index = NULL;
1012	unsigned int ctr = 0;
1013	struct inode *inode = NULL;
1014	bool upperopaque = false;
1015	char *upperredirect = NULL;
1016	struct dentry *this;
1017	unsigned int i;
1018	int err;
1019	bool uppermetacopy = false;
1020	int metacopy_size = 0;
1021	struct ovl_lookup_data d = {
1022		.sb = dentry->d_sb,
1023		.name = dentry->d_name,
1024		.is_dir = false,
1025		.opaque = false,
1026		.stop = false,
1027		.last = ovl_redirect_follow(ofs) ? false : !ovl_numlower(poe),
1028		.redirect = NULL,
1029		.metacopy = 0,
1030	};
1031
1032	if (dentry->d_name.len > ofs->namelen)
1033		return ERR_PTR(-ENAMETOOLONG);
1034
1035	old_cred = ovl_override_creds(dentry->d_sb);
1036	upperdir = ovl_dentry_upper(dentry->d_parent);
1037	if (upperdir) {
1038		d.mnt = ovl_upper_mnt(ofs);
1039		err = ovl_lookup_layer(upperdir, &d, &upperdentry, true);
1040		if (err)
1041			goto out;
1042
1043		if (upperdentry && upperdentry->d_flags & DCACHE_OP_REAL) {
1044			dput(upperdentry);
1045			err = -EREMOTE;
1046			goto out;
1047		}
1048		if (upperdentry && !d.is_dir) {
1049			/*
1050			 * Lookup copy up origin by decoding origin file handle.
1051			 * We may get a disconnected dentry, which is fine,
1052			 * because we only need to hold the origin inode in
1053			 * cache and use its inode number.  We may even get a
1054			 * connected dentry, that is not under any of the lower
1055			 * layers root.  That is also fine for using it's inode
1056			 * number - it's the same as if we held a reference
1057			 * to a dentry in lower layer that was moved under us.
1058			 */
1059			err = ovl_check_origin(ofs, upperdentry, &origin_path);
1060			if (err)
1061				goto out_put_upper;
1062
1063			if (d.metacopy)
1064				uppermetacopy = true;
1065			metacopy_size = d.metacopy;
1066		}
1067
1068		if (d.redirect) {
1069			err = -ENOMEM;
1070			upperredirect = kstrdup(d.redirect, GFP_KERNEL);
1071			if (!upperredirect)
1072				goto out_put_upper;
1073			if (d.redirect[0] == '/')
1074				poe = roe;
1075		}
1076		upperopaque = d.opaque;
1077	}
1078
1079	if (!d.stop && ovl_numlower(poe)) {
1080		err = -ENOMEM;
1081		stack = ovl_stack_alloc(ofs->numlayer - 1);
1082		if (!stack)
1083			goto out_put_upper;
1084	}
1085
1086	for (i = 0; !d.stop && i < ovl_numlower(poe); i++) {
1087		struct ovl_path lower = ovl_lowerstack(poe)[i];
1088
1089		if (!ovl_redirect_follow(ofs))
1090			d.last = i == ovl_numlower(poe) - 1;
1091		else if (d.is_dir || !ofs->numdatalayer)
1092			d.last = lower.layer->idx == ovl_numlower(roe);
1093
1094		d.mnt = lower.layer->mnt;
1095		err = ovl_lookup_layer(lower.dentry, &d, &this, false);
1096		if (err)
1097			goto out_put;
1098
1099		if (!this)
1100			continue;
1101
1102		if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) {
1103			dput(this);
1104			err = -EPERM;
1105			pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry);
1106			goto out_put;
1107		}
1108
1109		/*
1110		 * If no origin fh is stored in upper of a merge dir, store fh
1111		 * of lower dir and set upper parent "impure".
1112		 */
1113		if (upperdentry && !ctr && !ofs->noxattr && d.is_dir) {
1114			err = ovl_fix_origin(ofs, dentry, this, upperdentry);
1115			if (err) {
1116				dput(this);
1117				goto out_put;
1118			}
1119		}
1120
1121		/*
1122		 * When "verify_lower" feature is enabled, do not merge with a
1123		 * lower dir that does not match a stored origin xattr. In any
1124		 * case, only verified origin is used for index lookup.
1125		 *
1126		 * For non-dir dentry, if index=on, then ensure origin
1127		 * matches the dentry found using path based lookup,
1128		 * otherwise error out.
1129		 */
1130		if (upperdentry && !ctr &&
1131		    ((d.is_dir && ovl_verify_lower(dentry->d_sb)) ||
1132		     (!d.is_dir && ofs->config.index && origin_path))) {
1133			err = ovl_verify_origin(ofs, upperdentry, this, false);
1134			if (err) {
1135				dput(this);
1136				if (d.is_dir)
1137					break;
1138				goto out_put;
1139			}
1140			origin = this;
1141		}
1142
1143		if (!upperdentry && !d.is_dir && !ctr && d.metacopy)
1144			metacopy_size = d.metacopy;
1145
1146		if (d.metacopy && ctr) {
1147			/*
1148			 * Do not store intermediate metacopy dentries in
1149			 * lower chain, except top most lower metacopy dentry.
1150			 * Continue the loop so that if there is an absolute
1151			 * redirect on this dentry, poe can be reset to roe.
1152			 */
1153			dput(this);
1154			this = NULL;
1155		} else {
1156			stack[ctr].dentry = this;
1157			stack[ctr].layer = lower.layer;
1158			ctr++;
1159		}
1160
1161		/*
1162		 * Following redirects can have security consequences: it's like
1163		 * a symlink into the lower layer without the permission checks.
1164		 * This is only a problem if the upper layer is untrusted (e.g
1165		 * comes from an USB drive).  This can allow a non-readable file
1166		 * or directory to become readable.
1167		 *
1168		 * Only following redirects when redirects are enabled disables
1169		 * this attack vector when not necessary.
1170		 */
1171		err = -EPERM;
1172		if (d.redirect && !ovl_redirect_follow(ofs)) {
1173			pr_warn_ratelimited("refusing to follow redirect for (%pd2)\n",
1174					    dentry);
1175			goto out_put;
1176		}
1177
1178		if (d.stop)
1179			break;
1180
1181		if (d.redirect && d.redirect[0] == '/' && poe != roe) {
1182			poe = roe;
1183			/* Find the current layer on the root dentry */
1184			i = lower.layer->idx - 1;
1185		}
1186	}
1187
1188	/* Defer lookup of lowerdata in data-only layers to first access */
1189	if (d.metacopy && ctr && ofs->numdatalayer && d.absolute_redirect) {
1190		d.metacopy = 0;
1191		ctr++;
1192	}
1193
1194	/*
1195	 * For regular non-metacopy upper dentries, there is no lower
1196	 * path based lookup, hence ctr will be zero. If a dentry is found
1197	 * using ORIGIN xattr on upper, install it in stack.
1198	 *
1199	 * For metacopy dentry, path based lookup will find lower dentries.
1200	 * Just make sure a corresponding data dentry has been found.
1201	 */
1202	if (d.metacopy || (uppermetacopy && !ctr)) {
1203		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
1204				    dentry);
1205		err = -EIO;
1206		goto out_put;
1207	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
1208		if (WARN_ON(stack != NULL)) {
1209			err = -EIO;
1210			goto out_put;
1211		}
1212		stack = origin_path;
1213		ctr = 1;
1214		origin = origin_path->dentry;
1215		origin_path = NULL;
1216	}
1217
1218	/*
1219	 * Always lookup index if there is no-upperdentry.
1220	 *
1221	 * For the case of upperdentry, we have set origin by now if it
1222	 * needed to be set. There are basically three cases.
1223	 *
1224	 * For directories, lookup index by lower inode and verify it matches
1225	 * upper inode. We only trust dir index if we verified that lower dir
1226	 * matches origin, otherwise dir index entries may be inconsistent
1227	 * and we ignore them.
1228	 *
1229	 * For regular upper, we already set origin if upper had ORIGIN
1230	 * xattr. There is no verification though as there is no path
1231	 * based dentry lookup in lower in this case.
1232	 *
1233	 * For metacopy upper, we set a verified origin already if index
1234	 * is enabled and if upper had an ORIGIN xattr.
1235	 *
1236	 */
1237	if (!upperdentry && ctr)
1238		origin = stack[0].dentry;
1239
1240	if (origin && ovl_indexdir(dentry->d_sb) &&
1241	    (!d.is_dir || ovl_index_all(dentry->d_sb))) {
1242		index = ovl_lookup_index(ofs, upperdentry, origin, true);
1243		if (IS_ERR(index)) {
1244			err = PTR_ERR(index);
1245			index = NULL;
1246			goto out_put;
1247		}
1248	}
1249
1250	if (ctr) {
1251		oe = ovl_alloc_entry(ctr);
1252		err = -ENOMEM;
1253		if (!oe)
1254			goto out_put;
1255
1256		ovl_stack_cpy(ovl_lowerstack(oe), stack, ctr);
1257	}
1258
1259	if (upperopaque)
1260		ovl_dentry_set_opaque(dentry);
1261
1262	if (upperdentry)
1263		ovl_dentry_set_upper_alias(dentry);
1264	else if (index) {
1265		struct path upperpath = {
1266			.dentry = upperdentry = dget(index),
1267			.mnt = ovl_upper_mnt(ofs),
1268		};
1269
1270		/*
1271		 * It's safe to assign upperredirect here: the previous
1272		 * assignment of happens only if upperdentry is non-NULL, and
1273		 * this one only if upperdentry is NULL.
1274		 */
1275		upperredirect = ovl_get_redirect_xattr(ofs, &upperpath, 0);
1276		if (IS_ERR(upperredirect)) {
1277			err = PTR_ERR(upperredirect);
1278			upperredirect = NULL;
1279			goto out_free_oe;
1280		}
1281		err = ovl_check_metacopy_xattr(ofs, &upperpath, NULL);
1282		if (err < 0)
1283			goto out_free_oe;
1284		uppermetacopy = err;
1285		metacopy_size = err;
1286	}
1287
1288	if (upperdentry || ctr) {
1289		struct ovl_inode_params oip = {
1290			.upperdentry = upperdentry,
1291			.oe = oe,
1292			.index = index,
1293			.redirect = upperredirect,
1294		};
1295
1296		/* Store lowerdata redirect for lazy lookup */
1297		if (ctr > 1 && !d.is_dir && !stack[ctr - 1].dentry) {
1298			oip.lowerdata_redirect = d.redirect;
1299			d.redirect = NULL;
1300		}
1301		inode = ovl_get_inode(dentry->d_sb, &oip);
1302		err = PTR_ERR(inode);
1303		if (IS_ERR(inode))
1304			goto out_free_oe;
1305		if (upperdentry && !uppermetacopy)
1306			ovl_set_flag(OVL_UPPERDATA, inode);
1307
1308		if (metacopy_size > OVL_METACOPY_MIN_SIZE)
1309			ovl_set_flag(OVL_HAS_DIGEST, inode);
1310	}
1311
1312	ovl_dentry_init_reval(dentry, upperdentry, OVL_I_E(inode));
1313
1314	revert_creds(old_cred);
1315	if (origin_path) {
1316		dput(origin_path->dentry);
1317		kfree(origin_path);
1318	}
1319	dput(index);
1320	ovl_stack_free(stack, ctr);
1321	kfree(d.redirect);
1322	return d_splice_alias(inode, dentry);
1323
1324out_free_oe:
1325	ovl_free_entry(oe);
1326out_put:
1327	dput(index);
1328	ovl_stack_free(stack, ctr);
1329out_put_upper:
1330	if (origin_path) {
1331		dput(origin_path->dentry);
1332		kfree(origin_path);
1333	}
1334	dput(upperdentry);
1335	kfree(upperredirect);
1336out:
1337	kfree(d.redirect);
1338	revert_creds(old_cred);
1339	return ERR_PTR(err);
1340}
1341
1342bool ovl_lower_positive(struct dentry *dentry)
1343{
1344	struct ovl_entry *poe = OVL_E(dentry->d_parent);
1345	const struct qstr *name = &dentry->d_name;
1346	const struct cred *old_cred;
1347	unsigned int i;
1348	bool positive = false;
1349	bool done = false;
1350
1351	/*
1352	 * If dentry is negative, then lower is positive iff this is a
1353	 * whiteout.
1354	 */
1355	if (!dentry->d_inode)
1356		return ovl_dentry_is_opaque(dentry);
1357
1358	/* Negative upper -> positive lower */
1359	if (!ovl_dentry_upper(dentry))
1360		return true;
1361
1362	old_cred = ovl_override_creds(dentry->d_sb);
1363	/* Positive upper -> have to look up lower to see whether it exists */
1364	for (i = 0; !done && !positive && i < ovl_numlower(poe); i++) {
1365		struct dentry *this;
1366		struct ovl_path *parentpath = &ovl_lowerstack(poe)[i];
1367
1368		this = lookup_one_positive_unlocked(
1369				mnt_idmap(parentpath->layer->mnt),
1370				name->name, parentpath->dentry, name->len);
1371		if (IS_ERR(this)) {
1372			switch (PTR_ERR(this)) {
1373			case -ENOENT:
1374			case -ENAMETOOLONG:
1375				break;
1376
1377			default:
1378				/*
1379				 * Assume something is there, we just couldn't
1380				 * access it.
1381				 */
1382				positive = true;
1383				break;
1384			}
1385		} else {
1386			positive = !ovl_is_whiteout(this);
1387			done = true;
1388			dput(this);
1389		}
1390	}
1391	revert_creds(old_cred);
1392
1393	return positive;
1394}
1395