18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *  linux/fs/ext4/file.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 1992, 1993, 1994, 1995
68c2ecf20Sopenharmony_ci * Remy Card (card@masi.ibp.fr)
78c2ecf20Sopenharmony_ci * Laboratoire MASI - Institut Blaise Pascal
88c2ecf20Sopenharmony_ci * Universite Pierre et Marie Curie (Paris VI)
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *  from
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci *  linux/fs/minix/file.c
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *  Copyright (C) 1991, 1992  Linus Torvalds
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci *  ext4 fs regular file handling primitives
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *  64-bit file support on 64-bit platforms by Jakub Jelinek
198c2ecf20Sopenharmony_ci *	(jj@sunsite.ms.mff.cuni.cz)
208c2ecf20Sopenharmony_ci */
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#include <linux/time.h>
238c2ecf20Sopenharmony_ci#include <linux/fs.h>
248c2ecf20Sopenharmony_ci#include <linux/iomap.h>
258c2ecf20Sopenharmony_ci#include <linux/mount.h>
268c2ecf20Sopenharmony_ci#include <linux/path.h>
278c2ecf20Sopenharmony_ci#include <linux/dax.h>
288c2ecf20Sopenharmony_ci#include <linux/quotaops.h>
298c2ecf20Sopenharmony_ci#include <linux/pagevec.h>
308c2ecf20Sopenharmony_ci#include <linux/uio.h>
318c2ecf20Sopenharmony_ci#include <linux/mman.h>
328c2ecf20Sopenharmony_ci#include <linux/backing-dev.h>
338c2ecf20Sopenharmony_ci#include "ext4.h"
348c2ecf20Sopenharmony_ci#include "ext4_jbd2.h"
358c2ecf20Sopenharmony_ci#include "xattr.h"
368c2ecf20Sopenharmony_ci#include "acl.h"
378c2ecf20Sopenharmony_ci#include "truncate.h"
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cistatic bool ext4_dio_supported(struct inode *inode)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode))
428c2ecf20Sopenharmony_ci		return false;
438c2ecf20Sopenharmony_ci	if (fsverity_active(inode))
448c2ecf20Sopenharmony_ci		return false;
458c2ecf20Sopenharmony_ci	if (ext4_should_journal_data(inode))
468c2ecf20Sopenharmony_ci		return false;
478c2ecf20Sopenharmony_ci	if (ext4_has_inline_data(inode))
488c2ecf20Sopenharmony_ci		return false;
498c2ecf20Sopenharmony_ci	return true;
508c2ecf20Sopenharmony_ci}
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
538c2ecf20Sopenharmony_ci{
548c2ecf20Sopenharmony_ci	ssize_t ret;
558c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
588c2ecf20Sopenharmony_ci		if (!inode_trylock_shared(inode))
598c2ecf20Sopenharmony_ci			return -EAGAIN;
608c2ecf20Sopenharmony_ci	} else {
618c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
628c2ecf20Sopenharmony_ci	}
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	if (!ext4_dio_supported(inode)) {
658c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
668c2ecf20Sopenharmony_ci		/*
678c2ecf20Sopenharmony_ci		 * Fallback to buffered I/O if the operation being performed on
688c2ecf20Sopenharmony_ci		 * the inode is not supported by direct I/O. The IOCB_DIRECT
698c2ecf20Sopenharmony_ci		 * flag needs to be cleared here in order to ensure that the
708c2ecf20Sopenharmony_ci		 * direct I/O path within generic_file_read_iter() is not
718c2ecf20Sopenharmony_ci		 * taken.
728c2ecf20Sopenharmony_ci		 */
738c2ecf20Sopenharmony_ci		iocb->ki_flags &= ~IOCB_DIRECT;
748c2ecf20Sopenharmony_ci		return generic_file_read_iter(iocb, to);
758c2ecf20Sopenharmony_ci	}
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL,
788c2ecf20Sopenharmony_ci			   is_sync_kiocb(iocb));
798c2ecf20Sopenharmony_ci	inode_unlock_shared(inode);
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci	file_accessed(iocb->ki_filp);
828c2ecf20Sopenharmony_ci	return ret;
838c2ecf20Sopenharmony_ci}
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX
868c2ecf20Sopenharmony_cistatic ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
878c2ecf20Sopenharmony_ci{
888c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
898c2ecf20Sopenharmony_ci	ssize_t ret;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
928c2ecf20Sopenharmony_ci		if (!inode_trylock_shared(inode))
938c2ecf20Sopenharmony_ci			return -EAGAIN;
948c2ecf20Sopenharmony_ci	} else {
958c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
968c2ecf20Sopenharmony_ci	}
978c2ecf20Sopenharmony_ci	/*
988c2ecf20Sopenharmony_ci	 * Recheck under inode lock - at this point we are sure it cannot
998c2ecf20Sopenharmony_ci	 * change anymore
1008c2ecf20Sopenharmony_ci	 */
1018c2ecf20Sopenharmony_ci	if (!IS_DAX(inode)) {
1028c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
1038c2ecf20Sopenharmony_ci		/* Fallback to buffered IO in case we cannot support DAX */
1048c2ecf20Sopenharmony_ci		return generic_file_read_iter(iocb, to);
1058c2ecf20Sopenharmony_ci	}
1068c2ecf20Sopenharmony_ci	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops);
1078c2ecf20Sopenharmony_ci	inode_unlock_shared(inode);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	file_accessed(iocb->ki_filp);
1108c2ecf20Sopenharmony_ci	return ret;
1118c2ecf20Sopenharmony_ci}
1128c2ecf20Sopenharmony_ci#endif
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_cistatic ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
1158c2ecf20Sopenharmony_ci{
1168c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
1198c2ecf20Sopenharmony_ci		return -EIO;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	if (!iov_iter_count(to))
1228c2ecf20Sopenharmony_ci		return 0; /* skip atime */
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX
1258c2ecf20Sopenharmony_ci	if (IS_DAX(inode))
1268c2ecf20Sopenharmony_ci		return ext4_dax_read_iter(iocb, to);
1278c2ecf20Sopenharmony_ci#endif
1288c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT)
1298c2ecf20Sopenharmony_ci		return ext4_dio_read_iter(iocb, to);
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	return generic_file_read_iter(iocb, to);
1328c2ecf20Sopenharmony_ci}
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci/*
1358c2ecf20Sopenharmony_ci * Called when an inode is released. Note that this is different
1368c2ecf20Sopenharmony_ci * from ext4_file_open: open gets called at every open, but release
1378c2ecf20Sopenharmony_ci * gets called only when /all/ the files are closed.
1388c2ecf20Sopenharmony_ci */
1398c2ecf20Sopenharmony_cistatic int ext4_release_file(struct inode *inode, struct file *filp)
1408c2ecf20Sopenharmony_ci{
1418c2ecf20Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
1428c2ecf20Sopenharmony_ci		ext4_alloc_da_blocks(inode);
1438c2ecf20Sopenharmony_ci		ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
1448c2ecf20Sopenharmony_ci	}
1458c2ecf20Sopenharmony_ci	/* if we are the last writer on the inode, drop the block reservation */
1468c2ecf20Sopenharmony_ci	if ((filp->f_mode & FMODE_WRITE) &&
1478c2ecf20Sopenharmony_ci			(atomic_read(&inode->i_writecount) == 1) &&
1488c2ecf20Sopenharmony_ci			!EXT4_I(inode)->i_reserved_data_blocks) {
1498c2ecf20Sopenharmony_ci		down_write(&EXT4_I(inode)->i_data_sem);
1508c2ecf20Sopenharmony_ci		ext4_discard_preallocations(inode, 0);
1518c2ecf20Sopenharmony_ci		up_write(&EXT4_I(inode)->i_data_sem);
1528c2ecf20Sopenharmony_ci	}
1538c2ecf20Sopenharmony_ci	if (is_dx(inode) && filp->private_data)
1548c2ecf20Sopenharmony_ci		ext4_htree_free_dir_info(filp->private_data);
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	return 0;
1578c2ecf20Sopenharmony_ci}
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci/*
1608c2ecf20Sopenharmony_ci * This tests whether the IO in question is block-aligned or not.
1618c2ecf20Sopenharmony_ci * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
1628c2ecf20Sopenharmony_ci * are converted to written only after the IO is complete.  Until they are
1638c2ecf20Sopenharmony_ci * mapped, these blocks appear as holes, so dio_zero_block() will assume that
1648c2ecf20Sopenharmony_ci * it needs to zero out portions of the start and/or end block.  If 2 AIO
1658c2ecf20Sopenharmony_ci * threads are at work on the same unwritten block, they must be synchronized
1668c2ecf20Sopenharmony_ci * or one thread will zero the other's data, causing corruption.
1678c2ecf20Sopenharmony_ci */
1688c2ecf20Sopenharmony_cistatic bool
1698c2ecf20Sopenharmony_ciext4_unaligned_io(struct inode *inode, struct iov_iter *from, loff_t pos)
1708c2ecf20Sopenharmony_ci{
1718c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
1728c2ecf20Sopenharmony_ci	unsigned long blockmask = sb->s_blocksize - 1;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci	if ((pos | iov_iter_alignment(from)) & blockmask)
1758c2ecf20Sopenharmony_ci		return true;
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	return false;
1788c2ecf20Sopenharmony_ci}
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_cistatic bool
1818c2ecf20Sopenharmony_ciext4_extending_io(struct inode *inode, loff_t offset, size_t len)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	if (offset + len > i_size_read(inode) ||
1848c2ecf20Sopenharmony_ci	    offset + len > EXT4_I(inode)->i_disksize)
1858c2ecf20Sopenharmony_ci		return true;
1868c2ecf20Sopenharmony_ci	return false;
1878c2ecf20Sopenharmony_ci}
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci/* Is IO overwriting allocated and initialized blocks? */
1908c2ecf20Sopenharmony_cistatic bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	struct ext4_map_blocks map;
1938c2ecf20Sopenharmony_ci	unsigned int blkbits = inode->i_blkbits;
1948c2ecf20Sopenharmony_ci	int err, blklen;
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	if (pos + len > i_size_read(inode))
1978c2ecf20Sopenharmony_ci		return false;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	map.m_lblk = pos >> blkbits;
2008c2ecf20Sopenharmony_ci	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
2018c2ecf20Sopenharmony_ci	blklen = map.m_len;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	err = ext4_map_blocks(NULL, inode, &map, 0);
2048c2ecf20Sopenharmony_ci	/*
2058c2ecf20Sopenharmony_ci	 * 'err==len' means that all of the blocks have been preallocated,
2068c2ecf20Sopenharmony_ci	 * regardless of whether they have been initialized or not. To exclude
2078c2ecf20Sopenharmony_ci	 * unwritten extents, we need to check m_flags.
2088c2ecf20Sopenharmony_ci	 */
2098c2ecf20Sopenharmony_ci	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
2108c2ecf20Sopenharmony_ci}
2118c2ecf20Sopenharmony_ci
2128c2ecf20Sopenharmony_cistatic ssize_t ext4_generic_write_checks(struct kiocb *iocb,
2138c2ecf20Sopenharmony_ci					 struct iov_iter *from)
2148c2ecf20Sopenharmony_ci{
2158c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
2168c2ecf20Sopenharmony_ci	ssize_t ret;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
2198c2ecf20Sopenharmony_ci		return -EPERM;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	ret = generic_write_checks(iocb, from);
2228c2ecf20Sopenharmony_ci	if (ret <= 0)
2238c2ecf20Sopenharmony_ci		return ret;
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	/*
2268c2ecf20Sopenharmony_ci	 * If we have encountered a bitmap-format file, the size limit
2278c2ecf20Sopenharmony_ci	 * is smaller than s_maxbytes, which is for extent-mapped files.
2288c2ecf20Sopenharmony_ci	 */
2298c2ecf20Sopenharmony_ci	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
2308c2ecf20Sopenharmony_ci		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
2338c2ecf20Sopenharmony_ci			return -EFBIG;
2348c2ecf20Sopenharmony_ci		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
2358c2ecf20Sopenharmony_ci	}
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	return iov_iter_count(from);
2388c2ecf20Sopenharmony_ci}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_cistatic ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
2418c2ecf20Sopenharmony_ci{
2428c2ecf20Sopenharmony_ci	ssize_t ret, count;
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci	count = ext4_generic_write_checks(iocb, from);
2458c2ecf20Sopenharmony_ci	if (count <= 0)
2468c2ecf20Sopenharmony_ci		return count;
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	ret = file_modified(iocb->ki_filp);
2498c2ecf20Sopenharmony_ci	if (ret)
2508c2ecf20Sopenharmony_ci		return ret;
2518c2ecf20Sopenharmony_ci	return count;
2528c2ecf20Sopenharmony_ci}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cistatic ssize_t ext4_buffered_write_iter(struct kiocb *iocb,
2558c2ecf20Sopenharmony_ci					struct iov_iter *from)
2568c2ecf20Sopenharmony_ci{
2578c2ecf20Sopenharmony_ci	ssize_t ret;
2588c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT)
2618c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	inode_lock(inode);
2648c2ecf20Sopenharmony_ci	ret = ext4_write_checks(iocb, from);
2658c2ecf20Sopenharmony_ci	if (ret <= 0)
2668c2ecf20Sopenharmony_ci		goto out;
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	current->backing_dev_info = inode_to_bdi(inode);
2698c2ecf20Sopenharmony_ci	ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos);
2708c2ecf20Sopenharmony_ci	current->backing_dev_info = NULL;
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ciout:
2738c2ecf20Sopenharmony_ci	inode_unlock(inode);
2748c2ecf20Sopenharmony_ci	if (likely(ret > 0)) {
2758c2ecf20Sopenharmony_ci		iocb->ki_pos += ret;
2768c2ecf20Sopenharmony_ci		ret = generic_write_sync(iocb, ret);
2778c2ecf20Sopenharmony_ci	}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	return ret;
2808c2ecf20Sopenharmony_ci}
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_cistatic ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset,
2838c2ecf20Sopenharmony_ci					   ssize_t written, size_t count)
2848c2ecf20Sopenharmony_ci{
2858c2ecf20Sopenharmony_ci	handle_t *handle;
2868c2ecf20Sopenharmony_ci	bool truncate = false;
2878c2ecf20Sopenharmony_ci	u8 blkbits = inode->i_blkbits;
2888c2ecf20Sopenharmony_ci	ext4_lblk_t written_blk, end_blk;
2898c2ecf20Sopenharmony_ci	int ret;
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	/*
2928c2ecf20Sopenharmony_ci	 * Note that EXT4_I(inode)->i_disksize can get extended up to
2938c2ecf20Sopenharmony_ci	 * inode->i_size while the I/O was running due to writeback of delalloc
2948c2ecf20Sopenharmony_ci	 * blocks. But, the code in ext4_iomap_alloc() is careful to use
2958c2ecf20Sopenharmony_ci	 * zeroed/unwritten extents if this is possible; thus we won't leave
2968c2ecf20Sopenharmony_ci	 * uninitialized blocks in a file even if we didn't succeed in writing
2978c2ecf20Sopenharmony_ci	 * as much as we intended.
2988c2ecf20Sopenharmony_ci	 */
2998c2ecf20Sopenharmony_ci	WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize);
3008c2ecf20Sopenharmony_ci	if (offset + count <= EXT4_I(inode)->i_disksize) {
3018c2ecf20Sopenharmony_ci		/*
3028c2ecf20Sopenharmony_ci		 * We need to ensure that the inode is removed from the orphan
3038c2ecf20Sopenharmony_ci		 * list if it has been added prematurely, due to writeback of
3048c2ecf20Sopenharmony_ci		 * delalloc blocks.
3058c2ecf20Sopenharmony_ci		 */
3068c2ecf20Sopenharmony_ci		if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) {
3078c2ecf20Sopenharmony_ci			handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci			if (IS_ERR(handle)) {
3108c2ecf20Sopenharmony_ci				ext4_orphan_del(NULL, inode);
3118c2ecf20Sopenharmony_ci				return PTR_ERR(handle);
3128c2ecf20Sopenharmony_ci			}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci			ext4_orphan_del(handle, inode);
3158c2ecf20Sopenharmony_ci			ext4_journal_stop(handle);
3168c2ecf20Sopenharmony_ci		}
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci		return written;
3198c2ecf20Sopenharmony_ci	}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	if (written < 0)
3228c2ecf20Sopenharmony_ci		goto truncate;
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
3258c2ecf20Sopenharmony_ci	if (IS_ERR(handle)) {
3268c2ecf20Sopenharmony_ci		written = PTR_ERR(handle);
3278c2ecf20Sopenharmony_ci		goto truncate;
3288c2ecf20Sopenharmony_ci	}
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci	if (ext4_update_inode_size(inode, offset + written)) {
3318c2ecf20Sopenharmony_ci		ret = ext4_mark_inode_dirty(handle, inode);
3328c2ecf20Sopenharmony_ci		if (unlikely(ret)) {
3338c2ecf20Sopenharmony_ci			written = ret;
3348c2ecf20Sopenharmony_ci			ext4_journal_stop(handle);
3358c2ecf20Sopenharmony_ci			goto truncate;
3368c2ecf20Sopenharmony_ci		}
3378c2ecf20Sopenharmony_ci	}
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	/*
3408c2ecf20Sopenharmony_ci	 * We may need to truncate allocated but not written blocks beyond EOF.
3418c2ecf20Sopenharmony_ci	 */
3428c2ecf20Sopenharmony_ci	written_blk = ALIGN(offset + written, 1 << blkbits);
3438c2ecf20Sopenharmony_ci	end_blk = ALIGN(offset + count, 1 << blkbits);
3448c2ecf20Sopenharmony_ci	if (written_blk < end_blk && ext4_can_truncate(inode))
3458c2ecf20Sopenharmony_ci		truncate = true;
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci	/*
3488c2ecf20Sopenharmony_ci	 * Remove the inode from the orphan list if it has been extended and
3498c2ecf20Sopenharmony_ci	 * everything went OK.
3508c2ecf20Sopenharmony_ci	 */
3518c2ecf20Sopenharmony_ci	if (!truncate && inode->i_nlink)
3528c2ecf20Sopenharmony_ci		ext4_orphan_del(handle, inode);
3538c2ecf20Sopenharmony_ci	ext4_journal_stop(handle);
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	if (truncate) {
3568c2ecf20Sopenharmony_citruncate:
3578c2ecf20Sopenharmony_ci		ext4_truncate_failed_write(inode);
3588c2ecf20Sopenharmony_ci		/*
3598c2ecf20Sopenharmony_ci		 * If the truncate operation failed early, then the inode may
3608c2ecf20Sopenharmony_ci		 * still be on the orphan list. In that case, we need to try
3618c2ecf20Sopenharmony_ci		 * remove the inode from the in-memory linked list.
3628c2ecf20Sopenharmony_ci		 */
3638c2ecf20Sopenharmony_ci		if (inode->i_nlink)
3648c2ecf20Sopenharmony_ci			ext4_orphan_del(NULL, inode);
3658c2ecf20Sopenharmony_ci	}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci	return written;
3688c2ecf20Sopenharmony_ci}
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_cistatic int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
3718c2ecf20Sopenharmony_ci				 int error, unsigned int flags)
3728c2ecf20Sopenharmony_ci{
3738c2ecf20Sopenharmony_ci	loff_t pos = iocb->ki_pos;
3748c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
3758c2ecf20Sopenharmony_ci
3768c2ecf20Sopenharmony_ci	if (error)
3778c2ecf20Sopenharmony_ci		return error;
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_ci	if (size && flags & IOMAP_DIO_UNWRITTEN) {
3808c2ecf20Sopenharmony_ci		error = ext4_convert_unwritten_extents(NULL, inode, pos, size);
3818c2ecf20Sopenharmony_ci		if (error < 0)
3828c2ecf20Sopenharmony_ci			return error;
3838c2ecf20Sopenharmony_ci	}
3848c2ecf20Sopenharmony_ci	/*
3858c2ecf20Sopenharmony_ci	 * If we are extending the file, we have to update i_size here before
3868c2ecf20Sopenharmony_ci	 * page cache gets invalidated in iomap_dio_rw(). Otherwise racing
3878c2ecf20Sopenharmony_ci	 * buffered reads could zero out too much from page cache pages. Update
3888c2ecf20Sopenharmony_ci	 * of on-disk size will happen later in ext4_dio_write_iter() where
3898c2ecf20Sopenharmony_ci	 * we have enough information to also perform orphan list handling etc.
3908c2ecf20Sopenharmony_ci	 * Note that we perform all extending writes synchronously under
3918c2ecf20Sopenharmony_ci	 * i_rwsem held exclusively so i_size update is safe here in that case.
3928c2ecf20Sopenharmony_ci	 * If the write was not extending, we cannot see pos > i_size here
3938c2ecf20Sopenharmony_ci	 * because operations reducing i_size like truncate wait for all
3948c2ecf20Sopenharmony_ci	 * outstanding DIO before updating i_size.
3958c2ecf20Sopenharmony_ci	 */
3968c2ecf20Sopenharmony_ci	pos += size;
3978c2ecf20Sopenharmony_ci	if (pos > i_size_read(inode))
3988c2ecf20Sopenharmony_ci		i_size_write(inode, pos);
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci	return 0;
4018c2ecf20Sopenharmony_ci}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_cistatic const struct iomap_dio_ops ext4_dio_write_ops = {
4048c2ecf20Sopenharmony_ci	.end_io = ext4_dio_write_end_io,
4058c2ecf20Sopenharmony_ci};
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci/*
4088c2ecf20Sopenharmony_ci * The intention here is to start with shared lock acquired then see if any
4098c2ecf20Sopenharmony_ci * condition requires an exclusive inode lock. If yes, then we restart the
4108c2ecf20Sopenharmony_ci * whole operation by releasing the shared lock and acquiring exclusive lock.
4118c2ecf20Sopenharmony_ci *
4128c2ecf20Sopenharmony_ci * - For unaligned_io we never take shared lock as it may cause data corruption
4138c2ecf20Sopenharmony_ci *   when two unaligned IO tries to modify the same block e.g. while zeroing.
4148c2ecf20Sopenharmony_ci *
4158c2ecf20Sopenharmony_ci * - For extending writes case we don't take the shared lock, since it requires
4168c2ecf20Sopenharmony_ci *   updating inode i_disksize and/or orphan handling with exclusive lock.
4178c2ecf20Sopenharmony_ci *
4188c2ecf20Sopenharmony_ci * - shared locking will only be true mostly with overwrites. Otherwise we will
4198c2ecf20Sopenharmony_ci *   switch to exclusive i_rwsem lock.
4208c2ecf20Sopenharmony_ci */
4218c2ecf20Sopenharmony_cistatic ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
4228c2ecf20Sopenharmony_ci				     bool *ilock_shared, bool *extend)
4238c2ecf20Sopenharmony_ci{
4248c2ecf20Sopenharmony_ci	struct file *file = iocb->ki_filp;
4258c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(file);
4268c2ecf20Sopenharmony_ci	loff_t offset;
4278c2ecf20Sopenharmony_ci	size_t count;
4288c2ecf20Sopenharmony_ci	ssize_t ret;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_cirestart:
4318c2ecf20Sopenharmony_ci	ret = ext4_generic_write_checks(iocb, from);
4328c2ecf20Sopenharmony_ci	if (ret <= 0)
4338c2ecf20Sopenharmony_ci		goto out;
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	offset = iocb->ki_pos;
4368c2ecf20Sopenharmony_ci	count = ret;
4378c2ecf20Sopenharmony_ci	if (ext4_extending_io(inode, offset, count))
4388c2ecf20Sopenharmony_ci		*extend = true;
4398c2ecf20Sopenharmony_ci	/*
4408c2ecf20Sopenharmony_ci	 * Determine whether the IO operation will overwrite allocated
4418c2ecf20Sopenharmony_ci	 * and initialized blocks.
4428c2ecf20Sopenharmony_ci	 * We need exclusive i_rwsem for changing security info
4438c2ecf20Sopenharmony_ci	 * in file_modified().
4448c2ecf20Sopenharmony_ci	 */
4458c2ecf20Sopenharmony_ci	if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
4468c2ecf20Sopenharmony_ci	     !ext4_overwrite_io(inode, offset, count))) {
4478c2ecf20Sopenharmony_ci		if (iocb->ki_flags & IOCB_NOWAIT) {
4488c2ecf20Sopenharmony_ci			ret = -EAGAIN;
4498c2ecf20Sopenharmony_ci			goto out;
4508c2ecf20Sopenharmony_ci		}
4518c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
4528c2ecf20Sopenharmony_ci		*ilock_shared = false;
4538c2ecf20Sopenharmony_ci		inode_lock(inode);
4548c2ecf20Sopenharmony_ci		goto restart;
4558c2ecf20Sopenharmony_ci	}
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	ret = file_modified(file);
4588c2ecf20Sopenharmony_ci	if (ret < 0)
4598c2ecf20Sopenharmony_ci		goto out;
4608c2ecf20Sopenharmony_ci
4618c2ecf20Sopenharmony_ci	return count;
4628c2ecf20Sopenharmony_ciout:
4638c2ecf20Sopenharmony_ci	if (*ilock_shared)
4648c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
4658c2ecf20Sopenharmony_ci	else
4668c2ecf20Sopenharmony_ci		inode_unlock(inode);
4678c2ecf20Sopenharmony_ci	return ret;
4688c2ecf20Sopenharmony_ci}
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_cistatic ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
4718c2ecf20Sopenharmony_ci{
4728c2ecf20Sopenharmony_ci	ssize_t ret;
4738c2ecf20Sopenharmony_ci	handle_t *handle;
4748c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
4758c2ecf20Sopenharmony_ci	loff_t offset = iocb->ki_pos;
4768c2ecf20Sopenharmony_ci	size_t count = iov_iter_count(from);
4778c2ecf20Sopenharmony_ci	const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
4788c2ecf20Sopenharmony_ci	bool extend = false, unaligned_io = false;
4798c2ecf20Sopenharmony_ci	bool ilock_shared = true;
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	/*
4828c2ecf20Sopenharmony_ci	 * We initially start with shared inode lock unless it is
4838c2ecf20Sopenharmony_ci	 * unaligned IO which needs exclusive lock anyways.
4848c2ecf20Sopenharmony_ci	 */
4858c2ecf20Sopenharmony_ci	if (ext4_unaligned_io(inode, from, offset)) {
4868c2ecf20Sopenharmony_ci		unaligned_io = true;
4878c2ecf20Sopenharmony_ci		ilock_shared = false;
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci	/*
4908c2ecf20Sopenharmony_ci	 * Quick check here without any i_rwsem lock to see if it is extending
4918c2ecf20Sopenharmony_ci	 * IO. A more reliable check is done in ext4_dio_write_checks() with
4928c2ecf20Sopenharmony_ci	 * proper locking in place.
4938c2ecf20Sopenharmony_ci	 */
4948c2ecf20Sopenharmony_ci	if (offset + count > i_size_read(inode))
4958c2ecf20Sopenharmony_ci		ilock_shared = false;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
4988c2ecf20Sopenharmony_ci		if (ilock_shared) {
4998c2ecf20Sopenharmony_ci			if (!inode_trylock_shared(inode))
5008c2ecf20Sopenharmony_ci				return -EAGAIN;
5018c2ecf20Sopenharmony_ci		} else {
5028c2ecf20Sopenharmony_ci			if (!inode_trylock(inode))
5038c2ecf20Sopenharmony_ci				return -EAGAIN;
5048c2ecf20Sopenharmony_ci		}
5058c2ecf20Sopenharmony_ci	} else {
5068c2ecf20Sopenharmony_ci		if (ilock_shared)
5078c2ecf20Sopenharmony_ci			inode_lock_shared(inode);
5088c2ecf20Sopenharmony_ci		else
5098c2ecf20Sopenharmony_ci			inode_lock(inode);
5108c2ecf20Sopenharmony_ci	}
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	/* Fallback to buffered I/O if the inode does not support direct I/O. */
5138c2ecf20Sopenharmony_ci	if (!ext4_dio_supported(inode)) {
5148c2ecf20Sopenharmony_ci		if (ilock_shared)
5158c2ecf20Sopenharmony_ci			inode_unlock_shared(inode);
5168c2ecf20Sopenharmony_ci		else
5178c2ecf20Sopenharmony_ci			inode_unlock(inode);
5188c2ecf20Sopenharmony_ci		return ext4_buffered_write_iter(iocb, from);
5198c2ecf20Sopenharmony_ci	}
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend);
5228c2ecf20Sopenharmony_ci	if (ret <= 0)
5238c2ecf20Sopenharmony_ci		return ret;
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	/* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */
5268c2ecf20Sopenharmony_ci	if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) {
5278c2ecf20Sopenharmony_ci		ret = -EAGAIN;
5288c2ecf20Sopenharmony_ci		goto out;
5298c2ecf20Sopenharmony_ci	}
5308c2ecf20Sopenharmony_ci	/*
5318c2ecf20Sopenharmony_ci	 * Make sure inline data cannot be created anymore since we are going
5328c2ecf20Sopenharmony_ci	 * to allocate blocks for DIO. We know the inode does not have any
5338c2ecf20Sopenharmony_ci	 * inline data now because ext4_dio_supported() checked for that.
5348c2ecf20Sopenharmony_ci	 */
5358c2ecf20Sopenharmony_ci	ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci	offset = iocb->ki_pos;
5388c2ecf20Sopenharmony_ci	count = ret;
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	/*
5418c2ecf20Sopenharmony_ci	 * Unaligned direct IO must be serialized among each other as zeroing
5428c2ecf20Sopenharmony_ci	 * of partial blocks of two competing unaligned IOs can result in data
5438c2ecf20Sopenharmony_ci	 * corruption.
5448c2ecf20Sopenharmony_ci	 *
5458c2ecf20Sopenharmony_ci	 * So we make sure we don't allow any unaligned IO in flight.
5468c2ecf20Sopenharmony_ci	 * For IOs where we need not wait (like unaligned non-AIO DIO),
5478c2ecf20Sopenharmony_ci	 * below inode_dio_wait() may anyway become a no-op, since we start
5488c2ecf20Sopenharmony_ci	 * with exclusive lock.
5498c2ecf20Sopenharmony_ci	 */
5508c2ecf20Sopenharmony_ci	if (unaligned_io)
5518c2ecf20Sopenharmony_ci		inode_dio_wait(inode);
5528c2ecf20Sopenharmony_ci
5538c2ecf20Sopenharmony_ci	if (extend) {
5548c2ecf20Sopenharmony_ci		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
5558c2ecf20Sopenharmony_ci		if (IS_ERR(handle)) {
5568c2ecf20Sopenharmony_ci			ret = PTR_ERR(handle);
5578c2ecf20Sopenharmony_ci			goto out;
5588c2ecf20Sopenharmony_ci		}
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci		ret = ext4_orphan_add(handle, inode);
5618c2ecf20Sopenharmony_ci		if (ret) {
5628c2ecf20Sopenharmony_ci			ext4_journal_stop(handle);
5638c2ecf20Sopenharmony_ci			goto out;
5648c2ecf20Sopenharmony_ci		}
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci		ext4_journal_stop(handle);
5678c2ecf20Sopenharmony_ci	}
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	if (ilock_shared)
5708c2ecf20Sopenharmony_ci		iomap_ops = &ext4_iomap_overwrite_ops;
5718c2ecf20Sopenharmony_ci	ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
5728c2ecf20Sopenharmony_ci			   is_sync_kiocb(iocb) || unaligned_io || extend);
5738c2ecf20Sopenharmony_ci	if (ret == -ENOTBLK)
5748c2ecf20Sopenharmony_ci		ret = 0;
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	if (extend)
5778c2ecf20Sopenharmony_ci		ret = ext4_handle_inode_extension(inode, offset, ret, count);
5788c2ecf20Sopenharmony_ci
5798c2ecf20Sopenharmony_ciout:
5808c2ecf20Sopenharmony_ci	if (ilock_shared)
5818c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
5828c2ecf20Sopenharmony_ci	else
5838c2ecf20Sopenharmony_ci		inode_unlock(inode);
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_ci	if (ret >= 0 && iov_iter_count(from)) {
5868c2ecf20Sopenharmony_ci		ssize_t err;
5878c2ecf20Sopenharmony_ci		loff_t endbyte;
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci		offset = iocb->ki_pos;
5908c2ecf20Sopenharmony_ci		err = ext4_buffered_write_iter(iocb, from);
5918c2ecf20Sopenharmony_ci		if (err < 0)
5928c2ecf20Sopenharmony_ci			return err;
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_ci		/*
5958c2ecf20Sopenharmony_ci		 * We need to ensure that the pages within the page cache for
5968c2ecf20Sopenharmony_ci		 * the range covered by this I/O are written to disk and
5978c2ecf20Sopenharmony_ci		 * invalidated. This is in attempt to preserve the expected
5988c2ecf20Sopenharmony_ci		 * direct I/O semantics in the case we fallback to buffered I/O
5998c2ecf20Sopenharmony_ci		 * to complete off the I/O request.
6008c2ecf20Sopenharmony_ci		 */
6018c2ecf20Sopenharmony_ci		ret += err;
6028c2ecf20Sopenharmony_ci		endbyte = offset + err - 1;
6038c2ecf20Sopenharmony_ci		err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping,
6048c2ecf20Sopenharmony_ci						   offset, endbyte);
6058c2ecf20Sopenharmony_ci		if (!err)
6068c2ecf20Sopenharmony_ci			invalidate_mapping_pages(iocb->ki_filp->f_mapping,
6078c2ecf20Sopenharmony_ci						 offset >> PAGE_SHIFT,
6088c2ecf20Sopenharmony_ci						 endbyte >> PAGE_SHIFT);
6098c2ecf20Sopenharmony_ci	}
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	return ret;
6128c2ecf20Sopenharmony_ci}
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX
6158c2ecf20Sopenharmony_cistatic ssize_t
6168c2ecf20Sopenharmony_ciext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
6178c2ecf20Sopenharmony_ci{
6188c2ecf20Sopenharmony_ci	ssize_t ret;
6198c2ecf20Sopenharmony_ci	size_t count;
6208c2ecf20Sopenharmony_ci	loff_t offset;
6218c2ecf20Sopenharmony_ci	handle_t *handle;
6228c2ecf20Sopenharmony_ci	bool extend = false;
6238c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
6248c2ecf20Sopenharmony_ci
6258c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_NOWAIT) {
6268c2ecf20Sopenharmony_ci		if (!inode_trylock(inode))
6278c2ecf20Sopenharmony_ci			return -EAGAIN;
6288c2ecf20Sopenharmony_ci	} else {
6298c2ecf20Sopenharmony_ci		inode_lock(inode);
6308c2ecf20Sopenharmony_ci	}
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci	ret = ext4_write_checks(iocb, from);
6338c2ecf20Sopenharmony_ci	if (ret <= 0)
6348c2ecf20Sopenharmony_ci		goto out;
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ci	offset = iocb->ki_pos;
6378c2ecf20Sopenharmony_ci	count = iov_iter_count(from);
6388c2ecf20Sopenharmony_ci
6398c2ecf20Sopenharmony_ci	if (offset + count > EXT4_I(inode)->i_disksize) {
6408c2ecf20Sopenharmony_ci		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
6418c2ecf20Sopenharmony_ci		if (IS_ERR(handle)) {
6428c2ecf20Sopenharmony_ci			ret = PTR_ERR(handle);
6438c2ecf20Sopenharmony_ci			goto out;
6448c2ecf20Sopenharmony_ci		}
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci		ret = ext4_orphan_add(handle, inode);
6478c2ecf20Sopenharmony_ci		if (ret) {
6488c2ecf20Sopenharmony_ci			ext4_journal_stop(handle);
6498c2ecf20Sopenharmony_ci			goto out;
6508c2ecf20Sopenharmony_ci		}
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci		extend = true;
6538c2ecf20Sopenharmony_ci		ext4_journal_stop(handle);
6548c2ecf20Sopenharmony_ci	}
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
6578c2ecf20Sopenharmony_ci
6588c2ecf20Sopenharmony_ci	if (extend)
6598c2ecf20Sopenharmony_ci		ret = ext4_handle_inode_extension(inode, offset, ret, count);
6608c2ecf20Sopenharmony_ciout:
6618c2ecf20Sopenharmony_ci	inode_unlock(inode);
6628c2ecf20Sopenharmony_ci	if (ret > 0)
6638c2ecf20Sopenharmony_ci		ret = generic_write_sync(iocb, ret);
6648c2ecf20Sopenharmony_ci	return ret;
6658c2ecf20Sopenharmony_ci}
6668c2ecf20Sopenharmony_ci#endif
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_cistatic ssize_t
6698c2ecf20Sopenharmony_ciext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
6708c2ecf20Sopenharmony_ci{
6718c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(iocb->ki_filp);
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
6748c2ecf20Sopenharmony_ci		return -EIO;
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX
6778c2ecf20Sopenharmony_ci	if (IS_DAX(inode))
6788c2ecf20Sopenharmony_ci		return ext4_dax_write_iter(iocb, from);
6798c2ecf20Sopenharmony_ci#endif
6808c2ecf20Sopenharmony_ci	if (iocb->ki_flags & IOCB_DIRECT)
6818c2ecf20Sopenharmony_ci		return ext4_dio_write_iter(iocb, from);
6828c2ecf20Sopenharmony_ci	else
6838c2ecf20Sopenharmony_ci		return ext4_buffered_write_iter(iocb, from);
6848c2ecf20Sopenharmony_ci}
6858c2ecf20Sopenharmony_ci
6868c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX
6878c2ecf20Sopenharmony_cistatic vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
6888c2ecf20Sopenharmony_ci		enum page_entry_size pe_size)
6898c2ecf20Sopenharmony_ci{
6908c2ecf20Sopenharmony_ci	int error = 0;
6918c2ecf20Sopenharmony_ci	vm_fault_t result;
6928c2ecf20Sopenharmony_ci	int retries = 0;
6938c2ecf20Sopenharmony_ci	handle_t *handle = NULL;
6948c2ecf20Sopenharmony_ci	struct inode *inode = file_inode(vmf->vma->vm_file);
6958c2ecf20Sopenharmony_ci	struct super_block *sb = inode->i_sb;
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	/*
6988c2ecf20Sopenharmony_ci	 * We have to distinguish real writes from writes which will result in a
6998c2ecf20Sopenharmony_ci	 * COW page; COW writes should *not* poke the journal (the file will not
7008c2ecf20Sopenharmony_ci	 * be changed). Doing so would cause unintended failures when mounted
7018c2ecf20Sopenharmony_ci	 * read-only.
7028c2ecf20Sopenharmony_ci	 *
7038c2ecf20Sopenharmony_ci	 * We check for VM_SHARED rather than vmf->cow_page since the latter is
7048c2ecf20Sopenharmony_ci	 * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for
7058c2ecf20Sopenharmony_ci	 * other sizes, dax_iomap_fault will handle splitting / fallback so that
7068c2ecf20Sopenharmony_ci	 * we eventually come back with a COW page.
7078c2ecf20Sopenharmony_ci	 */
7088c2ecf20Sopenharmony_ci	bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
7098c2ecf20Sopenharmony_ci		(vmf->vma->vm_flags & VM_SHARED);
7108c2ecf20Sopenharmony_ci	pfn_t pfn;
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_ci	if (write) {
7138c2ecf20Sopenharmony_ci		sb_start_pagefault(sb);
7148c2ecf20Sopenharmony_ci		file_update_time(vmf->vma->vm_file);
7158c2ecf20Sopenharmony_ci		down_read(&EXT4_I(inode)->i_mmap_sem);
7168c2ecf20Sopenharmony_ciretry:
7178c2ecf20Sopenharmony_ci		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
7188c2ecf20Sopenharmony_ci					       EXT4_DATA_TRANS_BLOCKS(sb));
7198c2ecf20Sopenharmony_ci		if (IS_ERR(handle)) {
7208c2ecf20Sopenharmony_ci			up_read(&EXT4_I(inode)->i_mmap_sem);
7218c2ecf20Sopenharmony_ci			sb_end_pagefault(sb);
7228c2ecf20Sopenharmony_ci			return VM_FAULT_SIGBUS;
7238c2ecf20Sopenharmony_ci		}
7248c2ecf20Sopenharmony_ci	} else {
7258c2ecf20Sopenharmony_ci		down_read(&EXT4_I(inode)->i_mmap_sem);
7268c2ecf20Sopenharmony_ci	}
7278c2ecf20Sopenharmony_ci	result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
7288c2ecf20Sopenharmony_ci	if (write) {
7298c2ecf20Sopenharmony_ci		ext4_journal_stop(handle);
7308c2ecf20Sopenharmony_ci
7318c2ecf20Sopenharmony_ci		if ((result & VM_FAULT_ERROR) && error == -ENOSPC &&
7328c2ecf20Sopenharmony_ci		    ext4_should_retry_alloc(sb, &retries))
7338c2ecf20Sopenharmony_ci			goto retry;
7348c2ecf20Sopenharmony_ci		/* Handling synchronous page fault? */
7358c2ecf20Sopenharmony_ci		if (result & VM_FAULT_NEEDDSYNC)
7368c2ecf20Sopenharmony_ci			result = dax_finish_sync_fault(vmf, pe_size, pfn);
7378c2ecf20Sopenharmony_ci		up_read(&EXT4_I(inode)->i_mmap_sem);
7388c2ecf20Sopenharmony_ci		sb_end_pagefault(sb);
7398c2ecf20Sopenharmony_ci	} else {
7408c2ecf20Sopenharmony_ci		up_read(&EXT4_I(inode)->i_mmap_sem);
7418c2ecf20Sopenharmony_ci	}
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_ci	return result;
7448c2ecf20Sopenharmony_ci}
7458c2ecf20Sopenharmony_ci
7468c2ecf20Sopenharmony_cistatic vm_fault_t ext4_dax_fault(struct vm_fault *vmf)
7478c2ecf20Sopenharmony_ci{
7488c2ecf20Sopenharmony_ci	return ext4_dax_huge_fault(vmf, PE_SIZE_PTE);
7498c2ecf20Sopenharmony_ci}
7508c2ecf20Sopenharmony_ci
7518c2ecf20Sopenharmony_cistatic const struct vm_operations_struct ext4_dax_vm_ops = {
7528c2ecf20Sopenharmony_ci	.fault		= ext4_dax_fault,
7538c2ecf20Sopenharmony_ci	.huge_fault	= ext4_dax_huge_fault,
7548c2ecf20Sopenharmony_ci	.page_mkwrite	= ext4_dax_fault,
7558c2ecf20Sopenharmony_ci	.pfn_mkwrite	= ext4_dax_fault,
7568c2ecf20Sopenharmony_ci};
7578c2ecf20Sopenharmony_ci#else
7588c2ecf20Sopenharmony_ci#define ext4_dax_vm_ops	ext4_file_vm_ops
7598c2ecf20Sopenharmony_ci#endif
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_cistatic const struct vm_operations_struct ext4_file_vm_ops = {
7628c2ecf20Sopenharmony_ci	.fault		= ext4_filemap_fault,
7638c2ecf20Sopenharmony_ci	.map_pages	= filemap_map_pages,
7648c2ecf20Sopenharmony_ci	.page_mkwrite   = ext4_page_mkwrite,
7658c2ecf20Sopenharmony_ci};
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_cistatic int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
7688c2ecf20Sopenharmony_ci{
7698c2ecf20Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
7708c2ecf20Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
7718c2ecf20Sopenharmony_ci	struct dax_device *dax_dev = sbi->s_daxdev;
7728c2ecf20Sopenharmony_ci
7738c2ecf20Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(sbi)))
7748c2ecf20Sopenharmony_ci		return -EIO;
7758c2ecf20Sopenharmony_ci
7768c2ecf20Sopenharmony_ci	/*
7778c2ecf20Sopenharmony_ci	 * We don't support synchronous mappings for non-DAX files and
7788c2ecf20Sopenharmony_ci	 * for DAX files if underneath dax_device is not synchronous.
7798c2ecf20Sopenharmony_ci	 */
7808c2ecf20Sopenharmony_ci	if (!daxdev_mapping_supported(vma, dax_dev))
7818c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci	file_accessed(file);
7848c2ecf20Sopenharmony_ci	if (IS_DAX(file_inode(file))) {
7858c2ecf20Sopenharmony_ci		vma->vm_ops = &ext4_dax_vm_ops;
7868c2ecf20Sopenharmony_ci		vma->vm_flags |= VM_HUGEPAGE;
7878c2ecf20Sopenharmony_ci	} else {
7888c2ecf20Sopenharmony_ci		vma->vm_ops = &ext4_file_vm_ops;
7898c2ecf20Sopenharmony_ci	}
7908c2ecf20Sopenharmony_ci	return 0;
7918c2ecf20Sopenharmony_ci}
7928c2ecf20Sopenharmony_ci
7938c2ecf20Sopenharmony_cistatic int ext4_sample_last_mounted(struct super_block *sb,
7948c2ecf20Sopenharmony_ci				    struct vfsmount *mnt)
7958c2ecf20Sopenharmony_ci{
7968c2ecf20Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(sb);
7978c2ecf20Sopenharmony_ci	struct path path;
7988c2ecf20Sopenharmony_ci	char buf[64], *cp;
7998c2ecf20Sopenharmony_ci	handle_t *handle;
8008c2ecf20Sopenharmony_ci	int err;
8018c2ecf20Sopenharmony_ci
8028c2ecf20Sopenharmony_ci	if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED)))
8038c2ecf20Sopenharmony_ci		return 0;
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb))
8068c2ecf20Sopenharmony_ci		return 0;
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci	ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED);
8098c2ecf20Sopenharmony_ci	/*
8108c2ecf20Sopenharmony_ci	 * Sample where the filesystem has been mounted and
8118c2ecf20Sopenharmony_ci	 * store it in the superblock for sysadmin convenience
8128c2ecf20Sopenharmony_ci	 * when trying to sort through large numbers of block
8138c2ecf20Sopenharmony_ci	 * devices or filesystem images.
8148c2ecf20Sopenharmony_ci	 */
8158c2ecf20Sopenharmony_ci	memset(buf, 0, sizeof(buf));
8168c2ecf20Sopenharmony_ci	path.mnt = mnt;
8178c2ecf20Sopenharmony_ci	path.dentry = mnt->mnt_root;
8188c2ecf20Sopenharmony_ci	cp = d_path(&path, buf, sizeof(buf));
8198c2ecf20Sopenharmony_ci	err = 0;
8208c2ecf20Sopenharmony_ci	if (IS_ERR(cp))
8218c2ecf20Sopenharmony_ci		goto out;
8228c2ecf20Sopenharmony_ci
8238c2ecf20Sopenharmony_ci	handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
8248c2ecf20Sopenharmony_ci	err = PTR_ERR(handle);
8258c2ecf20Sopenharmony_ci	if (IS_ERR(handle))
8268c2ecf20Sopenharmony_ci		goto out;
8278c2ecf20Sopenharmony_ci	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
8288c2ecf20Sopenharmony_ci	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
8298c2ecf20Sopenharmony_ci	if (err)
8308c2ecf20Sopenharmony_ci		goto out_journal;
8318c2ecf20Sopenharmony_ci	lock_buffer(sbi->s_sbh);
8328c2ecf20Sopenharmony_ci	strncpy(sbi->s_es->s_last_mounted, cp,
8338c2ecf20Sopenharmony_ci		sizeof(sbi->s_es->s_last_mounted));
8348c2ecf20Sopenharmony_ci	ext4_superblock_csum_set(sb);
8358c2ecf20Sopenharmony_ci	unlock_buffer(sbi->s_sbh);
8368c2ecf20Sopenharmony_ci	ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh);
8378c2ecf20Sopenharmony_ciout_journal:
8388c2ecf20Sopenharmony_ci	ext4_journal_stop(handle);
8398c2ecf20Sopenharmony_ciout:
8408c2ecf20Sopenharmony_ci	sb_end_intwrite(sb);
8418c2ecf20Sopenharmony_ci	return err;
8428c2ecf20Sopenharmony_ci}
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_cistatic int ext4_file_open(struct inode *inode, struct file *filp)
8458c2ecf20Sopenharmony_ci{
8468c2ecf20Sopenharmony_ci	int ret;
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
8498c2ecf20Sopenharmony_ci		return -EIO;
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci	ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt);
8528c2ecf20Sopenharmony_ci	if (ret)
8538c2ecf20Sopenharmony_ci		return ret;
8548c2ecf20Sopenharmony_ci
8558c2ecf20Sopenharmony_ci	ret = fscrypt_file_open(inode, filp);
8568c2ecf20Sopenharmony_ci	if (ret)
8578c2ecf20Sopenharmony_ci		return ret;
8588c2ecf20Sopenharmony_ci
8598c2ecf20Sopenharmony_ci	ret = fsverity_file_open(inode, filp);
8608c2ecf20Sopenharmony_ci	if (ret)
8618c2ecf20Sopenharmony_ci		return ret;
8628c2ecf20Sopenharmony_ci
8638c2ecf20Sopenharmony_ci	/*
8648c2ecf20Sopenharmony_ci	 * Set up the jbd2_inode if we are opening the inode for
8658c2ecf20Sopenharmony_ci	 * writing and the journal is present
8668c2ecf20Sopenharmony_ci	 */
8678c2ecf20Sopenharmony_ci	if (filp->f_mode & FMODE_WRITE) {
8688c2ecf20Sopenharmony_ci		ret = ext4_inode_attach_jinode(inode);
8698c2ecf20Sopenharmony_ci		if (ret < 0)
8708c2ecf20Sopenharmony_ci			return ret;
8718c2ecf20Sopenharmony_ci	}
8728c2ecf20Sopenharmony_ci
8738c2ecf20Sopenharmony_ci	filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
8748c2ecf20Sopenharmony_ci	return dquot_file_open(inode, filp);
8758c2ecf20Sopenharmony_ci}
8768c2ecf20Sopenharmony_ci
8778c2ecf20Sopenharmony_ci/*
8788c2ecf20Sopenharmony_ci * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
8798c2ecf20Sopenharmony_ci * by calling generic_file_llseek_size() with the appropriate maxbytes
8808c2ecf20Sopenharmony_ci * value for each.
8818c2ecf20Sopenharmony_ci */
8828c2ecf20Sopenharmony_ciloff_t ext4_llseek(struct file *file, loff_t offset, int whence)
8838c2ecf20Sopenharmony_ci{
8848c2ecf20Sopenharmony_ci	struct inode *inode = file->f_mapping->host;
8858c2ecf20Sopenharmony_ci	loff_t maxbytes;
8868c2ecf20Sopenharmony_ci
8878c2ecf20Sopenharmony_ci	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
8888c2ecf20Sopenharmony_ci		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
8898c2ecf20Sopenharmony_ci	else
8908c2ecf20Sopenharmony_ci		maxbytes = inode->i_sb->s_maxbytes;
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_ci	switch (whence) {
8938c2ecf20Sopenharmony_ci	default:
8948c2ecf20Sopenharmony_ci		return generic_file_llseek_size(file, offset, whence,
8958c2ecf20Sopenharmony_ci						maxbytes, i_size_read(inode));
8968c2ecf20Sopenharmony_ci	case SEEK_HOLE:
8978c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
8988c2ecf20Sopenharmony_ci		offset = iomap_seek_hole(inode, offset,
8998c2ecf20Sopenharmony_ci					 &ext4_iomap_report_ops);
9008c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
9018c2ecf20Sopenharmony_ci		break;
9028c2ecf20Sopenharmony_ci	case SEEK_DATA:
9038c2ecf20Sopenharmony_ci		inode_lock_shared(inode);
9048c2ecf20Sopenharmony_ci		offset = iomap_seek_data(inode, offset,
9058c2ecf20Sopenharmony_ci					 &ext4_iomap_report_ops);
9068c2ecf20Sopenharmony_ci		inode_unlock_shared(inode);
9078c2ecf20Sopenharmony_ci		break;
9088c2ecf20Sopenharmony_ci	}
9098c2ecf20Sopenharmony_ci
9108c2ecf20Sopenharmony_ci	if (offset < 0)
9118c2ecf20Sopenharmony_ci		return offset;
9128c2ecf20Sopenharmony_ci	return vfs_setpos(file, offset, maxbytes);
9138c2ecf20Sopenharmony_ci}
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ciconst struct file_operations ext4_file_operations = {
9168c2ecf20Sopenharmony_ci	.llseek		= ext4_llseek,
9178c2ecf20Sopenharmony_ci	.read_iter	= ext4_file_read_iter,
9188c2ecf20Sopenharmony_ci	.write_iter	= ext4_file_write_iter,
9198c2ecf20Sopenharmony_ci	.iopoll		= iomap_dio_iopoll,
9208c2ecf20Sopenharmony_ci	.unlocked_ioctl = ext4_ioctl,
9218c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT
9228c2ecf20Sopenharmony_ci	.compat_ioctl	= ext4_compat_ioctl,
9238c2ecf20Sopenharmony_ci#endif
9248c2ecf20Sopenharmony_ci	.mmap		= ext4_file_mmap,
9258c2ecf20Sopenharmony_ci	.mmap_supported_flags = MAP_SYNC,
9268c2ecf20Sopenharmony_ci	.open		= ext4_file_open,
9278c2ecf20Sopenharmony_ci	.release	= ext4_release_file,
9288c2ecf20Sopenharmony_ci	.fsync		= ext4_sync_file,
9298c2ecf20Sopenharmony_ci	.get_unmapped_area = thp_get_unmapped_area,
9308c2ecf20Sopenharmony_ci	.splice_read	= generic_file_splice_read,
9318c2ecf20Sopenharmony_ci	.splice_write	= iter_file_splice_write,
9328c2ecf20Sopenharmony_ci	.fallocate	= ext4_fallocate,
9338c2ecf20Sopenharmony_ci};
9348c2ecf20Sopenharmony_ci
9358c2ecf20Sopenharmony_ciconst struct inode_operations ext4_file_inode_operations = {
9368c2ecf20Sopenharmony_ci	.setattr	= ext4_setattr,
9378c2ecf20Sopenharmony_ci	.getattr	= ext4_file_getattr,
9388c2ecf20Sopenharmony_ci	.listxattr	= ext4_listxattr,
9398c2ecf20Sopenharmony_ci	.get_acl	= ext4_get_acl,
9408c2ecf20Sopenharmony_ci	.set_acl	= ext4_set_acl,
9418c2ecf20Sopenharmony_ci	.fiemap		= ext4_fiemap,
9428c2ecf20Sopenharmony_ci};
9438c2ecf20Sopenharmony_ci
944