xref: /kernel/linux/linux-6.6/fs/overlayfs/file.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2017 Red Hat, Inc.
4 */
5
6#include <linux/cred.h>
7#include <linux/file.h>
8#include <linux/mount.h>
9#include <linux/xattr.h>
10#include <linux/uio.h>
11#include <linux/uaccess.h>
12#include <linux/splice.h>
13#include <linux/security.h>
14#include <linux/mm.h>
15#include <linux/fs.h>
16#include "overlayfs.h"
17
18struct ovl_aio_req {
19	struct kiocb iocb;
20	refcount_t ref;
21	struct kiocb *orig_iocb;
22};
23
24static struct kmem_cache *ovl_aio_request_cachep;
25
26static char ovl_whatisit(struct inode *inode, struct inode *realinode)
27{
28	if (realinode != ovl_inode_upper(inode))
29		return 'l';
30	if (ovl_has_upperdata(inode))
31		return 'u';
32	else
33		return 'm';
34}
35
36/* No atime modification on underlying */
37#define OVL_OPEN_FLAGS (O_NOATIME)
38
39static struct file *ovl_open_realfile(const struct file *file,
40				      const struct path *realpath)
41{
42	struct inode *realinode = d_inode(realpath->dentry);
43	struct inode *inode = file_inode(file);
44	struct mnt_idmap *real_idmap;
45	struct file *realfile;
46	const struct cred *old_cred;
47	int flags = file->f_flags | OVL_OPEN_FLAGS;
48	int acc_mode = ACC_MODE(flags);
49	int err;
50
51	if (flags & O_APPEND)
52		acc_mode |= MAY_APPEND;
53
54	old_cred = ovl_override_creds(inode->i_sb);
55	real_idmap = mnt_idmap(realpath->mnt);
56	err = inode_permission(real_idmap, realinode, MAY_OPEN | acc_mode);
57	if (err) {
58		realfile = ERR_PTR(err);
59	} else {
60		if (!inode_owner_or_capable(real_idmap, realinode))
61			flags &= ~O_NOATIME;
62
63		realfile = backing_file_open(&file->f_path, flags, realpath,
64					     current_cred());
65	}
66	revert_creds(old_cred);
67
68	pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
69		 file, file, ovl_whatisit(inode, realinode), file->f_flags,
70		 realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
71
72	return realfile;
73}
74
75#define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
76
77static int ovl_change_flags(struct file *file, unsigned int flags)
78{
79	struct inode *inode = file_inode(file);
80	int err;
81
82	flags &= OVL_SETFL_MASK;
83
84	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
85		return -EPERM;
86
87	if ((flags & O_DIRECT) && !(file->f_mode & FMODE_CAN_ODIRECT))
88		return -EINVAL;
89
90	if (file->f_op->check_flags) {
91		err = file->f_op->check_flags(flags);
92		if (err)
93			return err;
94	}
95
96	spin_lock(&file->f_lock);
97	file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
98	file->f_iocb_flags = iocb_flags(file);
99	spin_unlock(&file->f_lock);
100
101	return 0;
102}
103
104static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
105			       bool allow_meta)
106{
107	struct dentry *dentry = file_dentry(file);
108	struct path realpath;
109	int err;
110
111	real->flags = 0;
112	real->file = file->private_data;
113
114	if (allow_meta) {
115		ovl_path_real(dentry, &realpath);
116	} else {
117		/* lazy lookup and verify of lowerdata */
118		err = ovl_verify_lowerdata(dentry);
119		if (err)
120			return err;
121
122		ovl_path_realdata(dentry, &realpath);
123	}
124	if (!realpath.dentry)
125		return -EIO;
126
127	/* Has it been copied up since we'd opened it? */
128	if (unlikely(file_inode(real->file) != d_inode(realpath.dentry))) {
129		real->flags = FDPUT_FPUT;
130		real->file = ovl_open_realfile(file, &realpath);
131
132		return PTR_ERR_OR_ZERO(real->file);
133	}
134
135	/* Did the flags change since open? */
136	if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
137		return ovl_change_flags(real->file, file->f_flags);
138
139	return 0;
140}
141
142static int ovl_real_fdget(const struct file *file, struct fd *real)
143{
144	if (d_is_dir(file_dentry(file))) {
145		real->flags = 0;
146		real->file = ovl_dir_real_file(file, false);
147
148		return PTR_ERR_OR_ZERO(real->file);
149	}
150
151	return ovl_real_fdget_meta(file, real, false);
152}
153
154static int ovl_open(struct inode *inode, struct file *file)
155{
156	struct dentry *dentry = file_dentry(file);
157	struct file *realfile;
158	struct path realpath;
159	int err;
160
161	/* lazy lookup and verify lowerdata */
162	err = ovl_verify_lowerdata(dentry);
163	if (err)
164		return err;
165
166	err = ovl_maybe_copy_up(dentry, file->f_flags);
167	if (err)
168		return err;
169
170	/* No longer need these flags, so don't pass them on to underlying fs */
171	file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
172
173	ovl_path_realdata(dentry, &realpath);
174	if (!realpath.dentry)
175		return -EIO;
176
177	realfile = ovl_open_realfile(file, &realpath);
178	if (IS_ERR(realfile))
179		return PTR_ERR(realfile);
180
181	file->private_data = realfile;
182
183	return 0;
184}
185
186static int ovl_release(struct inode *inode, struct file *file)
187{
188	fput(file->private_data);
189
190	return 0;
191}
192
193static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
194{
195	struct inode *inode = file_inode(file);
196	struct fd real;
197	const struct cred *old_cred;
198	loff_t ret;
199
200	/*
201	 * The two special cases below do not need to involve real fs,
202	 * so we can optimizing concurrent callers.
203	 */
204	if (offset == 0) {
205		if (whence == SEEK_CUR)
206			return file->f_pos;
207
208		if (whence == SEEK_SET)
209			return vfs_setpos(file, 0, 0);
210	}
211
212	ret = ovl_real_fdget(file, &real);
213	if (ret)
214		return ret;
215
216	/*
217	 * Overlay file f_pos is the master copy that is preserved
218	 * through copy up and modified on read/write, but only real
219	 * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
220	 * limitations that are more strict than ->s_maxbytes for specific
221	 * files, so we use the real file to perform seeks.
222	 */
223	ovl_inode_lock(inode);
224	real.file->f_pos = file->f_pos;
225
226	old_cred = ovl_override_creds(inode->i_sb);
227	ret = vfs_llseek(real.file, offset, whence);
228	revert_creds(old_cred);
229
230	file->f_pos = real.file->f_pos;
231	ovl_inode_unlock(inode);
232
233	fdput(real);
234
235	return ret;
236}
237
238static void ovl_file_accessed(struct file *file)
239{
240	struct inode *inode, *upperinode;
241	struct timespec64 ctime, uctime;
242
243	if (file->f_flags & O_NOATIME)
244		return;
245
246	inode = file_inode(file);
247	upperinode = ovl_inode_upper(inode);
248
249	if (!upperinode)
250		return;
251
252	ctime = inode_get_ctime(inode);
253	uctime = inode_get_ctime(upperinode);
254	if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
255	     !timespec64_equal(&ctime, &uctime))) {
256		inode->i_mtime = upperinode->i_mtime;
257		inode_set_ctime_to_ts(inode, uctime);
258	}
259
260	touch_atime(&file->f_path);
261}
262
263static rwf_t ovl_iocb_to_rwf(int ifl)
264{
265	rwf_t flags = 0;
266
267	if (ifl & IOCB_NOWAIT)
268		flags |= RWF_NOWAIT;
269	if (ifl & IOCB_HIPRI)
270		flags |= RWF_HIPRI;
271	if (ifl & IOCB_DSYNC)
272		flags |= RWF_DSYNC;
273	if (ifl & IOCB_SYNC)
274		flags |= RWF_SYNC;
275
276	return flags;
277}
278
279static inline void ovl_aio_put(struct ovl_aio_req *aio_req)
280{
281	if (refcount_dec_and_test(&aio_req->ref)) {
282		fput(aio_req->iocb.ki_filp);
283		kmem_cache_free(ovl_aio_request_cachep, aio_req);
284	}
285}
286
287static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
288{
289	struct kiocb *iocb = &aio_req->iocb;
290	struct kiocb *orig_iocb = aio_req->orig_iocb;
291
292	if (iocb->ki_flags & IOCB_WRITE) {
293		struct inode *inode = file_inode(orig_iocb->ki_filp);
294
295		kiocb_end_write(iocb);
296		ovl_copyattr(inode);
297	}
298
299	orig_iocb->ki_pos = iocb->ki_pos;
300	ovl_aio_put(aio_req);
301}
302
303static void ovl_aio_rw_complete(struct kiocb *iocb, long res)
304{
305	struct ovl_aio_req *aio_req = container_of(iocb,
306						   struct ovl_aio_req, iocb);
307	struct kiocb *orig_iocb = aio_req->orig_iocb;
308
309	ovl_aio_cleanup_handler(aio_req);
310	orig_iocb->ki_complete(orig_iocb, res);
311}
312
313static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
314{
315	struct file *file = iocb->ki_filp;
316	struct fd real;
317	const struct cred *old_cred;
318	ssize_t ret;
319
320	if (!iov_iter_count(iter))
321		return 0;
322
323	ret = ovl_real_fdget(file, &real);
324	if (ret)
325		return ret;
326
327	ret = -EINVAL;
328	if (iocb->ki_flags & IOCB_DIRECT &&
329	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
330		goto out_fdput;
331
332	old_cred = ovl_override_creds(file_inode(file)->i_sb);
333	if (is_sync_kiocb(iocb)) {
334		ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
335				    ovl_iocb_to_rwf(iocb->ki_flags));
336	} else {
337		struct ovl_aio_req *aio_req;
338
339		ret = -ENOMEM;
340		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
341		if (!aio_req)
342			goto out;
343
344		aio_req->orig_iocb = iocb;
345		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
346		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
347		refcount_set(&aio_req->ref, 2);
348		ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter);
349		ovl_aio_put(aio_req);
350		if (ret != -EIOCBQUEUED)
351			ovl_aio_cleanup_handler(aio_req);
352	}
353out:
354	revert_creds(old_cred);
355	ovl_file_accessed(file);
356out_fdput:
357	fdput(real);
358
359	return ret;
360}
361
362static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
363{
364	struct file *file = iocb->ki_filp;
365	struct inode *inode = file_inode(file);
366	struct fd real;
367	const struct cred *old_cred;
368	ssize_t ret;
369	int ifl = iocb->ki_flags;
370
371	if (!iov_iter_count(iter))
372		return 0;
373
374	inode_lock(inode);
375	/* Update mode */
376	ovl_copyattr(inode);
377	ret = file_remove_privs(file);
378	if (ret)
379		goto out_unlock;
380
381	ret = ovl_real_fdget(file, &real);
382	if (ret)
383		goto out_unlock;
384
385	ret = -EINVAL;
386	if (iocb->ki_flags & IOCB_DIRECT &&
387	    !(real.file->f_mode & FMODE_CAN_ODIRECT))
388		goto out_fdput;
389
390	if (!ovl_should_sync(OVL_FS(inode->i_sb)))
391		ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
392
393	/*
394	 * Overlayfs doesn't support deferred completions, don't copy
395	 * this property in case it is set by the issuer.
396	 */
397	ifl &= ~IOCB_DIO_CALLER_COMP;
398
399	old_cred = ovl_override_creds(file_inode(file)->i_sb);
400	if (is_sync_kiocb(iocb)) {
401		file_start_write(real.file);
402		ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
403				     ovl_iocb_to_rwf(ifl));
404		file_end_write(real.file);
405		/* Update size */
406		ovl_copyattr(inode);
407	} else {
408		struct ovl_aio_req *aio_req;
409
410		ret = -ENOMEM;
411		aio_req = kmem_cache_zalloc(ovl_aio_request_cachep, GFP_KERNEL);
412		if (!aio_req)
413			goto out;
414
415		aio_req->orig_iocb = iocb;
416		kiocb_clone(&aio_req->iocb, iocb, get_file(real.file));
417		aio_req->iocb.ki_flags = ifl;
418		aio_req->iocb.ki_complete = ovl_aio_rw_complete;
419		refcount_set(&aio_req->ref, 2);
420		kiocb_start_write(&aio_req->iocb);
421		ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
422		ovl_aio_put(aio_req);
423		if (ret != -EIOCBQUEUED)
424			ovl_aio_cleanup_handler(aio_req);
425	}
426out:
427	revert_creds(old_cred);
428out_fdput:
429	fdput(real);
430
431out_unlock:
432	inode_unlock(inode);
433
434	return ret;
435}
436
437static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
438			       struct pipe_inode_info *pipe, size_t len,
439			       unsigned int flags)
440{
441	const struct cred *old_cred;
442	struct fd real;
443	ssize_t ret;
444
445	ret = ovl_real_fdget(in, &real);
446	if (ret)
447		return ret;
448
449	old_cred = ovl_override_creds(file_inode(in)->i_sb);
450	ret = vfs_splice_read(real.file, ppos, pipe, len, flags);
451	revert_creds(old_cred);
452	ovl_file_accessed(in);
453
454	fdput(real);
455	return ret;
456}
457
458/*
459 * Calling iter_file_splice_write() directly from overlay's f_op may deadlock
460 * due to lock order inversion between pipe->mutex in iter_file_splice_write()
461 * and file_start_write(real.file) in ovl_write_iter().
462 *
463 * So do everything ovl_write_iter() does and call iter_file_splice_write() on
464 * the real file.
465 */
466static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
467				loff_t *ppos, size_t len, unsigned int flags)
468{
469	struct fd real;
470	const struct cred *old_cred;
471	struct inode *inode = file_inode(out);
472	ssize_t ret;
473
474	inode_lock(inode);
475	/* Update mode */
476	ovl_copyattr(inode);
477	ret = file_remove_privs(out);
478	if (ret)
479		goto out_unlock;
480
481	ret = ovl_real_fdget(out, &real);
482	if (ret)
483		goto out_unlock;
484
485	old_cred = ovl_override_creds(inode->i_sb);
486	file_start_write(real.file);
487
488	ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
489
490	file_end_write(real.file);
491	/* Update size */
492	ovl_copyattr(inode);
493	revert_creds(old_cred);
494	fdput(real);
495
496out_unlock:
497	inode_unlock(inode);
498
499	return ret;
500}
501
502static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
503{
504	struct fd real;
505	const struct cred *old_cred;
506	int ret;
507
508	ret = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
509	if (ret <= 0)
510		return ret;
511
512	ret = ovl_real_fdget_meta(file, &real, !datasync);
513	if (ret)
514		return ret;
515
516	/* Don't sync lower file for fear of receiving EROFS error */
517	if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
518		old_cred = ovl_override_creds(file_inode(file)->i_sb);
519		ret = vfs_fsync_range(real.file, start, end, datasync);
520		revert_creds(old_cred);
521	}
522
523	fdput(real);
524
525	return ret;
526}
527
528static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
529{
530	struct file *realfile = file->private_data;
531	const struct cred *old_cred;
532	int ret;
533
534	if (!realfile->f_op->mmap)
535		return -ENODEV;
536
537	if (WARN_ON(file != vma->vm_file))
538		return -EIO;
539
540	vma_set_file(vma, realfile);
541
542	old_cred = ovl_override_creds(file_inode(file)->i_sb);
543	ret = call_mmap(vma->vm_file, vma);
544	revert_creds(old_cred);
545	ovl_file_accessed(file);
546
547	return ret;
548}
549
550static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
551{
552	struct inode *inode = file_inode(file);
553	struct fd real;
554	const struct cred *old_cred;
555	int ret;
556
557	inode_lock(inode);
558	/* Update mode */
559	ovl_copyattr(inode);
560	ret = file_remove_privs(file);
561	if (ret)
562		goto out_unlock;
563
564	ret = ovl_real_fdget(file, &real);
565	if (ret)
566		goto out_unlock;
567
568	old_cred = ovl_override_creds(file_inode(file)->i_sb);
569	ret = vfs_fallocate(real.file, mode, offset, len);
570	revert_creds(old_cred);
571
572	/* Update size */
573	ovl_copyattr(inode);
574
575	fdput(real);
576
577out_unlock:
578	inode_unlock(inode);
579
580	return ret;
581}
582
583static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
584{
585	struct fd real;
586	const struct cred *old_cred;
587	int ret;
588
589	ret = ovl_real_fdget(file, &real);
590	if (ret)
591		return ret;
592
593	old_cred = ovl_override_creds(file_inode(file)->i_sb);
594	ret = vfs_fadvise(real.file, offset, len, advice);
595	revert_creds(old_cred);
596
597	fdput(real);
598
599	return ret;
600}
601
602enum ovl_copyop {
603	OVL_COPY,
604	OVL_CLONE,
605	OVL_DEDUPE,
606};
607
608static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
609			    struct file *file_out, loff_t pos_out,
610			    loff_t len, unsigned int flags, enum ovl_copyop op)
611{
612	struct inode *inode_out = file_inode(file_out);
613	struct fd real_in, real_out;
614	const struct cred *old_cred;
615	loff_t ret;
616
617	inode_lock(inode_out);
618	if (op != OVL_DEDUPE) {
619		/* Update mode */
620		ovl_copyattr(inode_out);
621		ret = file_remove_privs(file_out);
622		if (ret)
623			goto out_unlock;
624	}
625
626	ret = ovl_real_fdget(file_out, &real_out);
627	if (ret)
628		goto out_unlock;
629
630	ret = ovl_real_fdget(file_in, &real_in);
631	if (ret) {
632		fdput(real_out);
633		goto out_unlock;
634	}
635
636	old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
637	switch (op) {
638	case OVL_COPY:
639		ret = vfs_copy_file_range(real_in.file, pos_in,
640					  real_out.file, pos_out, len, flags);
641		break;
642
643	case OVL_CLONE:
644		ret = vfs_clone_file_range(real_in.file, pos_in,
645					   real_out.file, pos_out, len, flags);
646		break;
647
648	case OVL_DEDUPE:
649		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
650						real_out.file, pos_out, len,
651						flags);
652		break;
653	}
654	revert_creds(old_cred);
655
656	/* Update size */
657	ovl_copyattr(inode_out);
658
659	fdput(real_in);
660	fdput(real_out);
661
662out_unlock:
663	inode_unlock(inode_out);
664
665	return ret;
666}
667
668static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
669				   struct file *file_out, loff_t pos_out,
670				   size_t len, unsigned int flags)
671{
672	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
673			    OVL_COPY);
674}
675
676static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
677				   struct file *file_out, loff_t pos_out,
678				   loff_t len, unsigned int remap_flags)
679{
680	enum ovl_copyop op;
681
682	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
683		return -EINVAL;
684
685	if (remap_flags & REMAP_FILE_DEDUP)
686		op = OVL_DEDUPE;
687	else
688		op = OVL_CLONE;
689
690	/*
691	 * Don't copy up because of a dedupe request, this wouldn't make sense
692	 * most of the time (data would be duplicated instead of deduplicated).
693	 */
694	if (op == OVL_DEDUPE &&
695	    (!ovl_inode_upper(file_inode(file_in)) ||
696	     !ovl_inode_upper(file_inode(file_out))))
697		return -EPERM;
698
699	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
700			    remap_flags, op);
701}
702
703static int ovl_flush(struct file *file, fl_owner_t id)
704{
705	struct fd real;
706	const struct cred *old_cred;
707	int err;
708
709	err = ovl_real_fdget(file, &real);
710	if (err)
711		return err;
712
713	if (real.file->f_op->flush) {
714		old_cred = ovl_override_creds(file_inode(file)->i_sb);
715		err = real.file->f_op->flush(real.file, id);
716		revert_creds(old_cred);
717	}
718	fdput(real);
719
720	return err;
721}
722
723const struct file_operations ovl_file_operations = {
724	.open		= ovl_open,
725	.release	= ovl_release,
726	.llseek		= ovl_llseek,
727	.read_iter	= ovl_read_iter,
728	.write_iter	= ovl_write_iter,
729	.fsync		= ovl_fsync,
730	.mmap		= ovl_mmap,
731	.fallocate	= ovl_fallocate,
732	.fadvise	= ovl_fadvise,
733	.flush		= ovl_flush,
734	.splice_read    = ovl_splice_read,
735	.splice_write   = ovl_splice_write,
736
737	.copy_file_range	= ovl_copy_file_range,
738	.remap_file_range	= ovl_remap_file_range,
739};
740
741int __init ovl_aio_request_cache_init(void)
742{
743	ovl_aio_request_cachep = kmem_cache_create("ovl_aio_req",
744						   sizeof(struct ovl_aio_req),
745						   0, SLAB_HWCACHE_ALIGN, NULL);
746	if (!ovl_aio_request_cachep)
747		return -ENOMEM;
748
749	return 0;
750}
751
752void ovl_aio_request_cache_destroy(void)
753{
754	kmem_cache_destroy(ovl_aio_request_cachep);
755}
756