18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/fs/ext4/file.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1992, 1993, 1994, 1995 68c2ecf20Sopenharmony_ci * Remy Card (card@masi.ibp.fr) 78c2ecf20Sopenharmony_ci * Laboratoire MASI - Institut Blaise Pascal 88c2ecf20Sopenharmony_ci * Universite Pierre et Marie Curie (Paris VI) 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * from 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * linux/fs/minix/file.c 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992 Linus Torvalds 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * ext4 fs regular file handling primitives 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * 64-bit file support on 64-bit platforms by Jakub Jelinek 198c2ecf20Sopenharmony_ci * (jj@sunsite.ms.mff.cuni.cz) 208c2ecf20Sopenharmony_ci */ 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include <linux/time.h> 238c2ecf20Sopenharmony_ci#include <linux/fs.h> 248c2ecf20Sopenharmony_ci#include <linux/iomap.h> 258c2ecf20Sopenharmony_ci#include <linux/mount.h> 268c2ecf20Sopenharmony_ci#include <linux/path.h> 278c2ecf20Sopenharmony_ci#include <linux/dax.h> 288c2ecf20Sopenharmony_ci#include <linux/quotaops.h> 298c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 308c2ecf20Sopenharmony_ci#include <linux/uio.h> 318c2ecf20Sopenharmony_ci#include <linux/mman.h> 328c2ecf20Sopenharmony_ci#include <linux/backing-dev.h> 338c2ecf20Sopenharmony_ci#include "ext4.h" 348c2ecf20Sopenharmony_ci#include "ext4_jbd2.h" 358c2ecf20Sopenharmony_ci#include "xattr.h" 368c2ecf20Sopenharmony_ci#include "acl.h" 378c2ecf20Sopenharmony_ci#include "truncate.h" 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistatic bool ext4_dio_supported(struct inode *inode) 408c2ecf20Sopenharmony_ci{ 418c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode)) 428c2ecf20Sopenharmony_ci return false; 438c2ecf20Sopenharmony_ci if (fsverity_active(inode)) 448c2ecf20Sopenharmony_ci return false; 458c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) 468c2ecf20Sopenharmony_ci return false; 478c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 488c2ecf20Sopenharmony_ci return false; 498c2ecf20Sopenharmony_ci return true; 508c2ecf20Sopenharmony_ci} 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic ssize_t ext4_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci ssize_t ret; 558c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 588c2ecf20Sopenharmony_ci if (!inode_trylock_shared(inode)) 598c2ecf20Sopenharmony_ci return -EAGAIN; 608c2ecf20Sopenharmony_ci } else { 618c2ecf20Sopenharmony_ci inode_lock_shared(inode); 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci if (!ext4_dio_supported(inode)) { 658c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 668c2ecf20Sopenharmony_ci /* 678c2ecf20Sopenharmony_ci * Fallback to buffered I/O if the operation being performed on 688c2ecf20Sopenharmony_ci * the inode is not supported by direct I/O. The IOCB_DIRECT 698c2ecf20Sopenharmony_ci * flag needs to be cleared here in order to ensure that the 708c2ecf20Sopenharmony_ci * direct I/O path within generic_file_read_iter() is not 718c2ecf20Sopenharmony_ci * taken. 728c2ecf20Sopenharmony_ci */ 738c2ecf20Sopenharmony_ci iocb->ki_flags &= ~IOCB_DIRECT; 748c2ecf20Sopenharmony_ci return generic_file_read_iter(iocb, to); 758c2ecf20Sopenharmony_ci } 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci ret = iomap_dio_rw(iocb, to, &ext4_iomap_ops, NULL, 788c2ecf20Sopenharmony_ci is_sync_kiocb(iocb)); 798c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci file_accessed(iocb->ki_filp); 828c2ecf20Sopenharmony_ci return ret; 838c2ecf20Sopenharmony_ci} 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX 868c2ecf20Sopenharmony_cistatic ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 898c2ecf20Sopenharmony_ci ssize_t ret; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 928c2ecf20Sopenharmony_ci if (!inode_trylock_shared(inode)) 938c2ecf20Sopenharmony_ci return -EAGAIN; 948c2ecf20Sopenharmony_ci } else { 958c2ecf20Sopenharmony_ci inode_lock_shared(inode); 968c2ecf20Sopenharmony_ci } 978c2ecf20Sopenharmony_ci /* 988c2ecf20Sopenharmony_ci * Recheck under inode lock - at this point we are sure it cannot 998c2ecf20Sopenharmony_ci * change anymore 1008c2ecf20Sopenharmony_ci */ 1018c2ecf20Sopenharmony_ci if (!IS_DAX(inode)) { 1028c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 1038c2ecf20Sopenharmony_ci /* Fallback to buffered IO in case we cannot support DAX */ 1048c2ecf20Sopenharmony_ci return generic_file_read_iter(iocb, to); 1058c2ecf20Sopenharmony_ci } 1068c2ecf20Sopenharmony_ci ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops); 1078c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci file_accessed(iocb->ki_filp); 1108c2ecf20Sopenharmony_ci return ret; 1118c2ecf20Sopenharmony_ci} 1128c2ecf20Sopenharmony_ci#endif 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_cistatic ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 1198c2ecf20Sopenharmony_ci return -EIO; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci if (!iov_iter_count(to)) 1228c2ecf20Sopenharmony_ci return 0; /* skip atime */ 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX 1258c2ecf20Sopenharmony_ci if (IS_DAX(inode)) 1268c2ecf20Sopenharmony_ci return ext4_dax_read_iter(iocb, to); 1278c2ecf20Sopenharmony_ci#endif 1288c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_DIRECT) 1298c2ecf20Sopenharmony_ci return ext4_dio_read_iter(iocb, to); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci return generic_file_read_iter(iocb, to); 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/* 1358c2ecf20Sopenharmony_ci * Called when an inode is released. Note that this is different 1368c2ecf20Sopenharmony_ci * from ext4_file_open: open gets called at every open, but release 1378c2ecf20Sopenharmony_ci * gets called only when /all/ the files are closed. 1388c2ecf20Sopenharmony_ci */ 1398c2ecf20Sopenharmony_cistatic int ext4_release_file(struct inode *inode, struct file *filp) 1408c2ecf20Sopenharmony_ci{ 1418c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { 1428c2ecf20Sopenharmony_ci ext4_alloc_da_blocks(inode); 1438c2ecf20Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 1448c2ecf20Sopenharmony_ci } 1458c2ecf20Sopenharmony_ci /* if we are the last writer on the inode, drop the block reservation */ 1468c2ecf20Sopenharmony_ci if ((filp->f_mode & FMODE_WRITE) && 1478c2ecf20Sopenharmony_ci (atomic_read(&inode->i_writecount) == 1) && 1488c2ecf20Sopenharmony_ci !EXT4_I(inode)->i_reserved_data_blocks) { 1498c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 1508c2ecf20Sopenharmony_ci ext4_discard_preallocations(inode, 0); 1518c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 1528c2ecf20Sopenharmony_ci } 1538c2ecf20Sopenharmony_ci if (is_dx(inode) && filp->private_data) 1548c2ecf20Sopenharmony_ci ext4_htree_free_dir_info(filp->private_data); 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci return 0; 1578c2ecf20Sopenharmony_ci} 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci/* 1608c2ecf20Sopenharmony_ci * This tests whether the IO in question is block-aligned or not. 1618c2ecf20Sopenharmony_ci * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they 1628c2ecf20Sopenharmony_ci * are converted to written only after the IO is complete. Until they are 1638c2ecf20Sopenharmony_ci * mapped, these blocks appear as holes, so dio_zero_block() will assume that 1648c2ecf20Sopenharmony_ci * it needs to zero out portions of the start and/or end block. If 2 AIO 1658c2ecf20Sopenharmony_ci * threads are at work on the same unwritten block, they must be synchronized 1668c2ecf20Sopenharmony_ci * or one thread will zero the other's data, causing corruption. 1678c2ecf20Sopenharmony_ci */ 1688c2ecf20Sopenharmony_cistatic bool 1698c2ecf20Sopenharmony_ciext4_unaligned_io(struct inode *inode, struct iov_iter *from, loff_t pos) 1708c2ecf20Sopenharmony_ci{ 1718c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 1728c2ecf20Sopenharmony_ci unsigned long blockmask = sb->s_blocksize - 1; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci if ((pos | iov_iter_alignment(from)) & blockmask) 1758c2ecf20Sopenharmony_ci return true; 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci return false; 1788c2ecf20Sopenharmony_ci} 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_cistatic bool 1818c2ecf20Sopenharmony_ciext4_extending_io(struct inode *inode, loff_t offset, size_t len) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci if (offset + len > i_size_read(inode) || 1848c2ecf20Sopenharmony_ci offset + len > EXT4_I(inode)->i_disksize) 1858c2ecf20Sopenharmony_ci return true; 1868c2ecf20Sopenharmony_ci return false; 1878c2ecf20Sopenharmony_ci} 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci/* Is IO overwriting allocated and initialized blocks? */ 1908c2ecf20Sopenharmony_cistatic bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) 1918c2ecf20Sopenharmony_ci{ 1928c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 1938c2ecf20Sopenharmony_ci unsigned int blkbits = inode->i_blkbits; 1948c2ecf20Sopenharmony_ci int err, blklen; 1958c2ecf20Sopenharmony_ci 1968c2ecf20Sopenharmony_ci if (pos + len > i_size_read(inode)) 1978c2ecf20Sopenharmony_ci return false; 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci map.m_lblk = pos >> blkbits; 2008c2ecf20Sopenharmony_ci map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits); 2018c2ecf20Sopenharmony_ci blklen = map.m_len; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci err = ext4_map_blocks(NULL, inode, &map, 0); 2048c2ecf20Sopenharmony_ci /* 2058c2ecf20Sopenharmony_ci * 'err==len' means that all of the blocks have been preallocated, 2068c2ecf20Sopenharmony_ci * regardless of whether they have been initialized or not. To exclude 2078c2ecf20Sopenharmony_ci * unwritten extents, we need to check m_flags. 2088c2ecf20Sopenharmony_ci */ 2098c2ecf20Sopenharmony_ci return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); 2108c2ecf20Sopenharmony_ci} 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_cistatic ssize_t ext4_generic_write_checks(struct kiocb *iocb, 2138c2ecf20Sopenharmony_ci struct iov_iter *from) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 2168c2ecf20Sopenharmony_ci ssize_t ret; 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci if (unlikely(IS_IMMUTABLE(inode))) 2198c2ecf20Sopenharmony_ci return -EPERM; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci ret = generic_write_checks(iocb, from); 2228c2ecf20Sopenharmony_ci if (ret <= 0) 2238c2ecf20Sopenharmony_ci return ret; 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci /* 2268c2ecf20Sopenharmony_ci * If we have encountered a bitmap-format file, the size limit 2278c2ecf20Sopenharmony_ci * is smaller than s_maxbytes, which is for extent-mapped files. 2288c2ecf20Sopenharmony_ci */ 2298c2ecf20Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 2308c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) 2338c2ecf20Sopenharmony_ci return -EFBIG; 2348c2ecf20Sopenharmony_ci iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); 2358c2ecf20Sopenharmony_ci } 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci return iov_iter_count(from); 2388c2ecf20Sopenharmony_ci} 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_cistatic ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) 2418c2ecf20Sopenharmony_ci{ 2428c2ecf20Sopenharmony_ci ssize_t ret, count; 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci count = ext4_generic_write_checks(iocb, from); 2458c2ecf20Sopenharmony_ci if (count <= 0) 2468c2ecf20Sopenharmony_ci return count; 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci ret = file_modified(iocb->ki_filp); 2498c2ecf20Sopenharmony_ci if (ret) 2508c2ecf20Sopenharmony_ci return ret; 2518c2ecf20Sopenharmony_ci return count; 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_cistatic ssize_t ext4_buffered_write_iter(struct kiocb *iocb, 2558c2ecf20Sopenharmony_ci struct iov_iter *from) 2568c2ecf20Sopenharmony_ci{ 2578c2ecf20Sopenharmony_ci ssize_t ret; 2588c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) 2618c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci inode_lock(inode); 2648c2ecf20Sopenharmony_ci ret = ext4_write_checks(iocb, from); 2658c2ecf20Sopenharmony_ci if (ret <= 0) 2668c2ecf20Sopenharmony_ci goto out; 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci current->backing_dev_info = inode_to_bdi(inode); 2698c2ecf20Sopenharmony_ci ret = generic_perform_write(iocb->ki_filp, from, iocb->ki_pos); 2708c2ecf20Sopenharmony_ci current->backing_dev_info = NULL; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ciout: 2738c2ecf20Sopenharmony_ci inode_unlock(inode); 2748c2ecf20Sopenharmony_ci if (likely(ret > 0)) { 2758c2ecf20Sopenharmony_ci iocb->ki_pos += ret; 2768c2ecf20Sopenharmony_ci ret = generic_write_sync(iocb, ret); 2778c2ecf20Sopenharmony_ci } 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci return ret; 2808c2ecf20Sopenharmony_ci} 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_cistatic ssize_t ext4_handle_inode_extension(struct inode *inode, loff_t offset, 2838c2ecf20Sopenharmony_ci ssize_t written, size_t count) 2848c2ecf20Sopenharmony_ci{ 2858c2ecf20Sopenharmony_ci handle_t *handle; 2868c2ecf20Sopenharmony_ci bool truncate = false; 2878c2ecf20Sopenharmony_ci u8 blkbits = inode->i_blkbits; 2888c2ecf20Sopenharmony_ci ext4_lblk_t written_blk, end_blk; 2898c2ecf20Sopenharmony_ci int ret; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci /* 2928c2ecf20Sopenharmony_ci * Note that EXT4_I(inode)->i_disksize can get extended up to 2938c2ecf20Sopenharmony_ci * inode->i_size while the I/O was running due to writeback of delalloc 2948c2ecf20Sopenharmony_ci * blocks. But, the code in ext4_iomap_alloc() is careful to use 2958c2ecf20Sopenharmony_ci * zeroed/unwritten extents if this is possible; thus we won't leave 2968c2ecf20Sopenharmony_ci * uninitialized blocks in a file even if we didn't succeed in writing 2978c2ecf20Sopenharmony_ci * as much as we intended. 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_ci WARN_ON_ONCE(i_size_read(inode) < EXT4_I(inode)->i_disksize); 3008c2ecf20Sopenharmony_ci if (offset + count <= EXT4_I(inode)->i_disksize) { 3018c2ecf20Sopenharmony_ci /* 3028c2ecf20Sopenharmony_ci * We need to ensure that the inode is removed from the orphan 3038c2ecf20Sopenharmony_ci * list if it has been added prematurely, due to writeback of 3048c2ecf20Sopenharmony_ci * delalloc blocks. 3058c2ecf20Sopenharmony_ci */ 3068c2ecf20Sopenharmony_ci if (!list_empty(&EXT4_I(inode)->i_orphan) && inode->i_nlink) { 3078c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 3108c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 3118c2ecf20Sopenharmony_ci return PTR_ERR(handle); 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci ext4_orphan_del(handle, inode); 3158c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 3168c2ecf20Sopenharmony_ci } 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci return written; 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci if (written < 0) 3228c2ecf20Sopenharmony_ci goto truncate; 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 3258c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 3268c2ecf20Sopenharmony_ci written = PTR_ERR(handle); 3278c2ecf20Sopenharmony_ci goto truncate; 3288c2ecf20Sopenharmony_ci } 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci if (ext4_update_inode_size(inode, offset + written)) { 3318c2ecf20Sopenharmony_ci ret = ext4_mark_inode_dirty(handle, inode); 3328c2ecf20Sopenharmony_ci if (unlikely(ret)) { 3338c2ecf20Sopenharmony_ci written = ret; 3348c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 3358c2ecf20Sopenharmony_ci goto truncate; 3368c2ecf20Sopenharmony_ci } 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci /* 3408c2ecf20Sopenharmony_ci * We may need to truncate allocated but not written blocks beyond EOF. 3418c2ecf20Sopenharmony_ci */ 3428c2ecf20Sopenharmony_ci written_blk = ALIGN(offset + written, 1 << blkbits); 3438c2ecf20Sopenharmony_ci end_blk = ALIGN(offset + count, 1 << blkbits); 3448c2ecf20Sopenharmony_ci if (written_blk < end_blk && ext4_can_truncate(inode)) 3458c2ecf20Sopenharmony_ci truncate = true; 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci /* 3488c2ecf20Sopenharmony_ci * Remove the inode from the orphan list if it has been extended and 3498c2ecf20Sopenharmony_ci * everything went OK. 3508c2ecf20Sopenharmony_ci */ 3518c2ecf20Sopenharmony_ci if (!truncate && inode->i_nlink) 3528c2ecf20Sopenharmony_ci ext4_orphan_del(handle, inode); 3538c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci if (truncate) { 3568c2ecf20Sopenharmony_citruncate: 3578c2ecf20Sopenharmony_ci ext4_truncate_failed_write(inode); 3588c2ecf20Sopenharmony_ci /* 3598c2ecf20Sopenharmony_ci * If the truncate operation failed early, then the inode may 3608c2ecf20Sopenharmony_ci * still be on the orphan list. In that case, we need to try 3618c2ecf20Sopenharmony_ci * remove the inode from the in-memory linked list. 3628c2ecf20Sopenharmony_ci */ 3638c2ecf20Sopenharmony_ci if (inode->i_nlink) 3648c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 3658c2ecf20Sopenharmony_ci } 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci return written; 3688c2ecf20Sopenharmony_ci} 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_cistatic int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size, 3718c2ecf20Sopenharmony_ci int error, unsigned int flags) 3728c2ecf20Sopenharmony_ci{ 3738c2ecf20Sopenharmony_ci loff_t pos = iocb->ki_pos; 3748c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci if (error) 3778c2ecf20Sopenharmony_ci return error; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci if (size && flags & IOMAP_DIO_UNWRITTEN) { 3808c2ecf20Sopenharmony_ci error = ext4_convert_unwritten_extents(NULL, inode, pos, size); 3818c2ecf20Sopenharmony_ci if (error < 0) 3828c2ecf20Sopenharmony_ci return error; 3838c2ecf20Sopenharmony_ci } 3848c2ecf20Sopenharmony_ci /* 3858c2ecf20Sopenharmony_ci * If we are extending the file, we have to update i_size here before 3868c2ecf20Sopenharmony_ci * page cache gets invalidated in iomap_dio_rw(). Otherwise racing 3878c2ecf20Sopenharmony_ci * buffered reads could zero out too much from page cache pages. Update 3888c2ecf20Sopenharmony_ci * of on-disk size will happen later in ext4_dio_write_iter() where 3898c2ecf20Sopenharmony_ci * we have enough information to also perform orphan list handling etc. 3908c2ecf20Sopenharmony_ci * Note that we perform all extending writes synchronously under 3918c2ecf20Sopenharmony_ci * i_rwsem held exclusively so i_size update is safe here in that case. 3928c2ecf20Sopenharmony_ci * If the write was not extending, we cannot see pos > i_size here 3938c2ecf20Sopenharmony_ci * because operations reducing i_size like truncate wait for all 3948c2ecf20Sopenharmony_ci * outstanding DIO before updating i_size. 3958c2ecf20Sopenharmony_ci */ 3968c2ecf20Sopenharmony_ci pos += size; 3978c2ecf20Sopenharmony_ci if (pos > i_size_read(inode)) 3988c2ecf20Sopenharmony_ci i_size_write(inode, pos); 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci return 0; 4018c2ecf20Sopenharmony_ci} 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_cistatic const struct iomap_dio_ops ext4_dio_write_ops = { 4048c2ecf20Sopenharmony_ci .end_io = ext4_dio_write_end_io, 4058c2ecf20Sopenharmony_ci}; 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci/* 4088c2ecf20Sopenharmony_ci * The intention here is to start with shared lock acquired then see if any 4098c2ecf20Sopenharmony_ci * condition requires an exclusive inode lock. If yes, then we restart the 4108c2ecf20Sopenharmony_ci * whole operation by releasing the shared lock and acquiring exclusive lock. 4118c2ecf20Sopenharmony_ci * 4128c2ecf20Sopenharmony_ci * - For unaligned_io we never take shared lock as it may cause data corruption 4138c2ecf20Sopenharmony_ci * when two unaligned IO tries to modify the same block e.g. while zeroing. 4148c2ecf20Sopenharmony_ci * 4158c2ecf20Sopenharmony_ci * - For extending writes case we don't take the shared lock, since it requires 4168c2ecf20Sopenharmony_ci * updating inode i_disksize and/or orphan handling with exclusive lock. 4178c2ecf20Sopenharmony_ci * 4188c2ecf20Sopenharmony_ci * - shared locking will only be true mostly with overwrites. Otherwise we will 4198c2ecf20Sopenharmony_ci * switch to exclusive i_rwsem lock. 4208c2ecf20Sopenharmony_ci */ 4218c2ecf20Sopenharmony_cistatic ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, 4228c2ecf20Sopenharmony_ci bool *ilock_shared, bool *extend) 4238c2ecf20Sopenharmony_ci{ 4248c2ecf20Sopenharmony_ci struct file *file = iocb->ki_filp; 4258c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 4268c2ecf20Sopenharmony_ci loff_t offset; 4278c2ecf20Sopenharmony_ci size_t count; 4288c2ecf20Sopenharmony_ci ssize_t ret; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_cirestart: 4318c2ecf20Sopenharmony_ci ret = ext4_generic_write_checks(iocb, from); 4328c2ecf20Sopenharmony_ci if (ret <= 0) 4338c2ecf20Sopenharmony_ci goto out; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci offset = iocb->ki_pos; 4368c2ecf20Sopenharmony_ci count = ret; 4378c2ecf20Sopenharmony_ci if (ext4_extending_io(inode, offset, count)) 4388c2ecf20Sopenharmony_ci *extend = true; 4398c2ecf20Sopenharmony_ci /* 4408c2ecf20Sopenharmony_ci * Determine whether the IO operation will overwrite allocated 4418c2ecf20Sopenharmony_ci * and initialized blocks. 4428c2ecf20Sopenharmony_ci * We need exclusive i_rwsem for changing security info 4438c2ecf20Sopenharmony_ci * in file_modified(). 4448c2ecf20Sopenharmony_ci */ 4458c2ecf20Sopenharmony_ci if (*ilock_shared && (!IS_NOSEC(inode) || *extend || 4468c2ecf20Sopenharmony_ci !ext4_overwrite_io(inode, offset, count))) { 4478c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 4488c2ecf20Sopenharmony_ci ret = -EAGAIN; 4498c2ecf20Sopenharmony_ci goto out; 4508c2ecf20Sopenharmony_ci } 4518c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 4528c2ecf20Sopenharmony_ci *ilock_shared = false; 4538c2ecf20Sopenharmony_ci inode_lock(inode); 4548c2ecf20Sopenharmony_ci goto restart; 4558c2ecf20Sopenharmony_ci } 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci ret = file_modified(file); 4588c2ecf20Sopenharmony_ci if (ret < 0) 4598c2ecf20Sopenharmony_ci goto out; 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci return count; 4628c2ecf20Sopenharmony_ciout: 4638c2ecf20Sopenharmony_ci if (*ilock_shared) 4648c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 4658c2ecf20Sopenharmony_ci else 4668c2ecf20Sopenharmony_ci inode_unlock(inode); 4678c2ecf20Sopenharmony_ci return ret; 4688c2ecf20Sopenharmony_ci} 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_cistatic ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) 4718c2ecf20Sopenharmony_ci{ 4728c2ecf20Sopenharmony_ci ssize_t ret; 4738c2ecf20Sopenharmony_ci handle_t *handle; 4748c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 4758c2ecf20Sopenharmony_ci loff_t offset = iocb->ki_pos; 4768c2ecf20Sopenharmony_ci size_t count = iov_iter_count(from); 4778c2ecf20Sopenharmony_ci const struct iomap_ops *iomap_ops = &ext4_iomap_ops; 4788c2ecf20Sopenharmony_ci bool extend = false, unaligned_io = false; 4798c2ecf20Sopenharmony_ci bool ilock_shared = true; 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci /* 4828c2ecf20Sopenharmony_ci * We initially start with shared inode lock unless it is 4838c2ecf20Sopenharmony_ci * unaligned IO which needs exclusive lock anyways. 4848c2ecf20Sopenharmony_ci */ 4858c2ecf20Sopenharmony_ci if (ext4_unaligned_io(inode, from, offset)) { 4868c2ecf20Sopenharmony_ci unaligned_io = true; 4878c2ecf20Sopenharmony_ci ilock_shared = false; 4888c2ecf20Sopenharmony_ci } 4898c2ecf20Sopenharmony_ci /* 4908c2ecf20Sopenharmony_ci * Quick check here without any i_rwsem lock to see if it is extending 4918c2ecf20Sopenharmony_ci * IO. A more reliable check is done in ext4_dio_write_checks() with 4928c2ecf20Sopenharmony_ci * proper locking in place. 4938c2ecf20Sopenharmony_ci */ 4948c2ecf20Sopenharmony_ci if (offset + count > i_size_read(inode)) 4958c2ecf20Sopenharmony_ci ilock_shared = false; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 4988c2ecf20Sopenharmony_ci if (ilock_shared) { 4998c2ecf20Sopenharmony_ci if (!inode_trylock_shared(inode)) 5008c2ecf20Sopenharmony_ci return -EAGAIN; 5018c2ecf20Sopenharmony_ci } else { 5028c2ecf20Sopenharmony_ci if (!inode_trylock(inode)) 5038c2ecf20Sopenharmony_ci return -EAGAIN; 5048c2ecf20Sopenharmony_ci } 5058c2ecf20Sopenharmony_ci } else { 5068c2ecf20Sopenharmony_ci if (ilock_shared) 5078c2ecf20Sopenharmony_ci inode_lock_shared(inode); 5088c2ecf20Sopenharmony_ci else 5098c2ecf20Sopenharmony_ci inode_lock(inode); 5108c2ecf20Sopenharmony_ci } 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci /* Fallback to buffered I/O if the inode does not support direct I/O. */ 5138c2ecf20Sopenharmony_ci if (!ext4_dio_supported(inode)) { 5148c2ecf20Sopenharmony_ci if (ilock_shared) 5158c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 5168c2ecf20Sopenharmony_ci else 5178c2ecf20Sopenharmony_ci inode_unlock(inode); 5188c2ecf20Sopenharmony_ci return ext4_buffered_write_iter(iocb, from); 5198c2ecf20Sopenharmony_ci } 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend); 5228c2ecf20Sopenharmony_ci if (ret <= 0) 5238c2ecf20Sopenharmony_ci return ret; 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci /* if we're going to block and IOCB_NOWAIT is set, return -EAGAIN */ 5268c2ecf20Sopenharmony_ci if ((iocb->ki_flags & IOCB_NOWAIT) && (unaligned_io || extend)) { 5278c2ecf20Sopenharmony_ci ret = -EAGAIN; 5288c2ecf20Sopenharmony_ci goto out; 5298c2ecf20Sopenharmony_ci } 5308c2ecf20Sopenharmony_ci /* 5318c2ecf20Sopenharmony_ci * Make sure inline data cannot be created anymore since we are going 5328c2ecf20Sopenharmony_ci * to allocate blocks for DIO. We know the inode does not have any 5338c2ecf20Sopenharmony_ci * inline data now because ext4_dio_supported() checked for that. 5348c2ecf20Sopenharmony_ci */ 5358c2ecf20Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci offset = iocb->ki_pos; 5388c2ecf20Sopenharmony_ci count = ret; 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci /* 5418c2ecf20Sopenharmony_ci * Unaligned direct IO must be serialized among each other as zeroing 5428c2ecf20Sopenharmony_ci * of partial blocks of two competing unaligned IOs can result in data 5438c2ecf20Sopenharmony_ci * corruption. 5448c2ecf20Sopenharmony_ci * 5458c2ecf20Sopenharmony_ci * So we make sure we don't allow any unaligned IO in flight. 5468c2ecf20Sopenharmony_ci * For IOs where we need not wait (like unaligned non-AIO DIO), 5478c2ecf20Sopenharmony_ci * below inode_dio_wait() may anyway become a no-op, since we start 5488c2ecf20Sopenharmony_ci * with exclusive lock. 5498c2ecf20Sopenharmony_ci */ 5508c2ecf20Sopenharmony_ci if (unaligned_io) 5518c2ecf20Sopenharmony_ci inode_dio_wait(inode); 5528c2ecf20Sopenharmony_ci 5538c2ecf20Sopenharmony_ci if (extend) { 5548c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 5558c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 5568c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 5578c2ecf20Sopenharmony_ci goto out; 5588c2ecf20Sopenharmony_ci } 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci ret = ext4_orphan_add(handle, inode); 5618c2ecf20Sopenharmony_ci if (ret) { 5628c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 5638c2ecf20Sopenharmony_ci goto out; 5648c2ecf20Sopenharmony_ci } 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 5678c2ecf20Sopenharmony_ci } 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci if (ilock_shared) 5708c2ecf20Sopenharmony_ci iomap_ops = &ext4_iomap_overwrite_ops; 5718c2ecf20Sopenharmony_ci ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, 5728c2ecf20Sopenharmony_ci is_sync_kiocb(iocb) || unaligned_io || extend); 5738c2ecf20Sopenharmony_ci if (ret == -ENOTBLK) 5748c2ecf20Sopenharmony_ci ret = 0; 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci if (extend) 5778c2ecf20Sopenharmony_ci ret = ext4_handle_inode_extension(inode, offset, ret, count); 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ciout: 5808c2ecf20Sopenharmony_ci if (ilock_shared) 5818c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 5828c2ecf20Sopenharmony_ci else 5838c2ecf20Sopenharmony_ci inode_unlock(inode); 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci if (ret >= 0 && iov_iter_count(from)) { 5868c2ecf20Sopenharmony_ci ssize_t err; 5878c2ecf20Sopenharmony_ci loff_t endbyte; 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci offset = iocb->ki_pos; 5908c2ecf20Sopenharmony_ci err = ext4_buffered_write_iter(iocb, from); 5918c2ecf20Sopenharmony_ci if (err < 0) 5928c2ecf20Sopenharmony_ci return err; 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci /* 5958c2ecf20Sopenharmony_ci * We need to ensure that the pages within the page cache for 5968c2ecf20Sopenharmony_ci * the range covered by this I/O are written to disk and 5978c2ecf20Sopenharmony_ci * invalidated. This is in attempt to preserve the expected 5988c2ecf20Sopenharmony_ci * direct I/O semantics in the case we fallback to buffered I/O 5998c2ecf20Sopenharmony_ci * to complete off the I/O request. 6008c2ecf20Sopenharmony_ci */ 6018c2ecf20Sopenharmony_ci ret += err; 6028c2ecf20Sopenharmony_ci endbyte = offset + err - 1; 6038c2ecf20Sopenharmony_ci err = filemap_write_and_wait_range(iocb->ki_filp->f_mapping, 6048c2ecf20Sopenharmony_ci offset, endbyte); 6058c2ecf20Sopenharmony_ci if (!err) 6068c2ecf20Sopenharmony_ci invalidate_mapping_pages(iocb->ki_filp->f_mapping, 6078c2ecf20Sopenharmony_ci offset >> PAGE_SHIFT, 6088c2ecf20Sopenharmony_ci endbyte >> PAGE_SHIFT); 6098c2ecf20Sopenharmony_ci } 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci return ret; 6128c2ecf20Sopenharmony_ci} 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX 6158c2ecf20Sopenharmony_cistatic ssize_t 6168c2ecf20Sopenharmony_ciext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 6178c2ecf20Sopenharmony_ci{ 6188c2ecf20Sopenharmony_ci ssize_t ret; 6198c2ecf20Sopenharmony_ci size_t count; 6208c2ecf20Sopenharmony_ci loff_t offset; 6218c2ecf20Sopenharmony_ci handle_t *handle; 6228c2ecf20Sopenharmony_ci bool extend = false; 6238c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 6248c2ecf20Sopenharmony_ci 6258c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 6268c2ecf20Sopenharmony_ci if (!inode_trylock(inode)) 6278c2ecf20Sopenharmony_ci return -EAGAIN; 6288c2ecf20Sopenharmony_ci } else { 6298c2ecf20Sopenharmony_ci inode_lock(inode); 6308c2ecf20Sopenharmony_ci } 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci ret = ext4_write_checks(iocb, from); 6338c2ecf20Sopenharmony_ci if (ret <= 0) 6348c2ecf20Sopenharmony_ci goto out; 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci offset = iocb->ki_pos; 6378c2ecf20Sopenharmony_ci count = iov_iter_count(from); 6388c2ecf20Sopenharmony_ci 6398c2ecf20Sopenharmony_ci if (offset + count > EXT4_I(inode)->i_disksize) { 6408c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 6418c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 6428c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 6438c2ecf20Sopenharmony_ci goto out; 6448c2ecf20Sopenharmony_ci } 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci ret = ext4_orphan_add(handle, inode); 6478c2ecf20Sopenharmony_ci if (ret) { 6488c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 6498c2ecf20Sopenharmony_ci goto out; 6508c2ecf20Sopenharmony_ci } 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci extend = true; 6538c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 6548c2ecf20Sopenharmony_ci } 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); 6578c2ecf20Sopenharmony_ci 6588c2ecf20Sopenharmony_ci if (extend) 6598c2ecf20Sopenharmony_ci ret = ext4_handle_inode_extension(inode, offset, ret, count); 6608c2ecf20Sopenharmony_ciout: 6618c2ecf20Sopenharmony_ci inode_unlock(inode); 6628c2ecf20Sopenharmony_ci if (ret > 0) 6638c2ecf20Sopenharmony_ci ret = generic_write_sync(iocb, ret); 6648c2ecf20Sopenharmony_ci return ret; 6658c2ecf20Sopenharmony_ci} 6668c2ecf20Sopenharmony_ci#endif 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_cistatic ssize_t 6698c2ecf20Sopenharmony_ciext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 6708c2ecf20Sopenharmony_ci{ 6718c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 6728c2ecf20Sopenharmony_ci 6738c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 6748c2ecf20Sopenharmony_ci return -EIO; 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX 6778c2ecf20Sopenharmony_ci if (IS_DAX(inode)) 6788c2ecf20Sopenharmony_ci return ext4_dax_write_iter(iocb, from); 6798c2ecf20Sopenharmony_ci#endif 6808c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_DIRECT) 6818c2ecf20Sopenharmony_ci return ext4_dio_write_iter(iocb, from); 6828c2ecf20Sopenharmony_ci else 6838c2ecf20Sopenharmony_ci return ext4_buffered_write_iter(iocb, from); 6848c2ecf20Sopenharmony_ci} 6858c2ecf20Sopenharmony_ci 6868c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_DAX 6878c2ecf20Sopenharmony_cistatic vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, 6888c2ecf20Sopenharmony_ci enum page_entry_size pe_size) 6898c2ecf20Sopenharmony_ci{ 6908c2ecf20Sopenharmony_ci int error = 0; 6918c2ecf20Sopenharmony_ci vm_fault_t result; 6928c2ecf20Sopenharmony_ci int retries = 0; 6938c2ecf20Sopenharmony_ci handle_t *handle = NULL; 6948c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vmf->vma->vm_file); 6958c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci /* 6988c2ecf20Sopenharmony_ci * We have to distinguish real writes from writes which will result in a 6998c2ecf20Sopenharmony_ci * COW page; COW writes should *not* poke the journal (the file will not 7008c2ecf20Sopenharmony_ci * be changed). Doing so would cause unintended failures when mounted 7018c2ecf20Sopenharmony_ci * read-only. 7028c2ecf20Sopenharmony_ci * 7038c2ecf20Sopenharmony_ci * We check for VM_SHARED rather than vmf->cow_page since the latter is 7048c2ecf20Sopenharmony_ci * unset for pe_size != PE_SIZE_PTE (i.e. only in do_cow_fault); for 7058c2ecf20Sopenharmony_ci * other sizes, dax_iomap_fault will handle splitting / fallback so that 7068c2ecf20Sopenharmony_ci * we eventually come back with a COW page. 7078c2ecf20Sopenharmony_ci */ 7088c2ecf20Sopenharmony_ci bool write = (vmf->flags & FAULT_FLAG_WRITE) && 7098c2ecf20Sopenharmony_ci (vmf->vma->vm_flags & VM_SHARED); 7108c2ecf20Sopenharmony_ci pfn_t pfn; 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ci if (write) { 7138c2ecf20Sopenharmony_ci sb_start_pagefault(sb); 7148c2ecf20Sopenharmony_ci file_update_time(vmf->vma->vm_file); 7158c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_mmap_sem); 7168c2ecf20Sopenharmony_ciretry: 7178c2ecf20Sopenharmony_ci handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, 7188c2ecf20Sopenharmony_ci EXT4_DATA_TRANS_BLOCKS(sb)); 7198c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 7208c2ecf20Sopenharmony_ci up_read(&EXT4_I(inode)->i_mmap_sem); 7218c2ecf20Sopenharmony_ci sb_end_pagefault(sb); 7228c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 7238c2ecf20Sopenharmony_ci } 7248c2ecf20Sopenharmony_ci } else { 7258c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_mmap_sem); 7268c2ecf20Sopenharmony_ci } 7278c2ecf20Sopenharmony_ci result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops); 7288c2ecf20Sopenharmony_ci if (write) { 7298c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 7308c2ecf20Sopenharmony_ci 7318c2ecf20Sopenharmony_ci if ((result & VM_FAULT_ERROR) && error == -ENOSPC && 7328c2ecf20Sopenharmony_ci ext4_should_retry_alloc(sb, &retries)) 7338c2ecf20Sopenharmony_ci goto retry; 7348c2ecf20Sopenharmony_ci /* Handling synchronous page fault? */ 7358c2ecf20Sopenharmony_ci if (result & VM_FAULT_NEEDDSYNC) 7368c2ecf20Sopenharmony_ci result = dax_finish_sync_fault(vmf, pe_size, pfn); 7378c2ecf20Sopenharmony_ci up_read(&EXT4_I(inode)->i_mmap_sem); 7388c2ecf20Sopenharmony_ci sb_end_pagefault(sb); 7398c2ecf20Sopenharmony_ci } else { 7408c2ecf20Sopenharmony_ci up_read(&EXT4_I(inode)->i_mmap_sem); 7418c2ecf20Sopenharmony_ci } 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_ci return result; 7448c2ecf20Sopenharmony_ci} 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_cistatic vm_fault_t ext4_dax_fault(struct vm_fault *vmf) 7478c2ecf20Sopenharmony_ci{ 7488c2ecf20Sopenharmony_ci return ext4_dax_huge_fault(vmf, PE_SIZE_PTE); 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_cistatic const struct vm_operations_struct ext4_dax_vm_ops = { 7528c2ecf20Sopenharmony_ci .fault = ext4_dax_fault, 7538c2ecf20Sopenharmony_ci .huge_fault = ext4_dax_huge_fault, 7548c2ecf20Sopenharmony_ci .page_mkwrite = ext4_dax_fault, 7558c2ecf20Sopenharmony_ci .pfn_mkwrite = ext4_dax_fault, 7568c2ecf20Sopenharmony_ci}; 7578c2ecf20Sopenharmony_ci#else 7588c2ecf20Sopenharmony_ci#define ext4_dax_vm_ops ext4_file_vm_ops 7598c2ecf20Sopenharmony_ci#endif 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_cistatic const struct vm_operations_struct ext4_file_vm_ops = { 7628c2ecf20Sopenharmony_ci .fault = ext4_filemap_fault, 7638c2ecf20Sopenharmony_ci .map_pages = filemap_map_pages, 7648c2ecf20Sopenharmony_ci .page_mkwrite = ext4_page_mkwrite, 7658c2ecf20Sopenharmony_ci}; 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_cistatic int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) 7688c2ecf20Sopenharmony_ci{ 7698c2ecf20Sopenharmony_ci struct inode *inode = file->f_mapping->host; 7708c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 7718c2ecf20Sopenharmony_ci struct dax_device *dax_dev = sbi->s_daxdev; 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(sbi))) 7748c2ecf20Sopenharmony_ci return -EIO; 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci /* 7778c2ecf20Sopenharmony_ci * We don't support synchronous mappings for non-DAX files and 7788c2ecf20Sopenharmony_ci * for DAX files if underneath dax_device is not synchronous. 7798c2ecf20Sopenharmony_ci */ 7808c2ecf20Sopenharmony_ci if (!daxdev_mapping_supported(vma, dax_dev)) 7818c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci file_accessed(file); 7848c2ecf20Sopenharmony_ci if (IS_DAX(file_inode(file))) { 7858c2ecf20Sopenharmony_ci vma->vm_ops = &ext4_dax_vm_ops; 7868c2ecf20Sopenharmony_ci vma->vm_flags |= VM_HUGEPAGE; 7878c2ecf20Sopenharmony_ci } else { 7888c2ecf20Sopenharmony_ci vma->vm_ops = &ext4_file_vm_ops; 7898c2ecf20Sopenharmony_ci } 7908c2ecf20Sopenharmony_ci return 0; 7918c2ecf20Sopenharmony_ci} 7928c2ecf20Sopenharmony_ci 7938c2ecf20Sopenharmony_cistatic int ext4_sample_last_mounted(struct super_block *sb, 7948c2ecf20Sopenharmony_ci struct vfsmount *mnt) 7958c2ecf20Sopenharmony_ci{ 7968c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 7978c2ecf20Sopenharmony_ci struct path path; 7988c2ecf20Sopenharmony_ci char buf[64], *cp; 7998c2ecf20Sopenharmony_ci handle_t *handle; 8008c2ecf20Sopenharmony_ci int err; 8018c2ecf20Sopenharmony_ci 8028c2ecf20Sopenharmony_ci if (likely(ext4_test_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED))) 8038c2ecf20Sopenharmony_ci return 0; 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci if (sb_rdonly(sb) || !sb_start_intwrite_trylock(sb)) 8068c2ecf20Sopenharmony_ci return 0; 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci ext4_set_mount_flag(sb, EXT4_MF_MNTDIR_SAMPLED); 8098c2ecf20Sopenharmony_ci /* 8108c2ecf20Sopenharmony_ci * Sample where the filesystem has been mounted and 8118c2ecf20Sopenharmony_ci * store it in the superblock for sysadmin convenience 8128c2ecf20Sopenharmony_ci * when trying to sort through large numbers of block 8138c2ecf20Sopenharmony_ci * devices or filesystem images. 8148c2ecf20Sopenharmony_ci */ 8158c2ecf20Sopenharmony_ci memset(buf, 0, sizeof(buf)); 8168c2ecf20Sopenharmony_ci path.mnt = mnt; 8178c2ecf20Sopenharmony_ci path.dentry = mnt->mnt_root; 8188c2ecf20Sopenharmony_ci cp = d_path(&path, buf, sizeof(buf)); 8198c2ecf20Sopenharmony_ci err = 0; 8208c2ecf20Sopenharmony_ci if (IS_ERR(cp)) 8218c2ecf20Sopenharmony_ci goto out; 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); 8248c2ecf20Sopenharmony_ci err = PTR_ERR(handle); 8258c2ecf20Sopenharmony_ci if (IS_ERR(handle)) 8268c2ecf20Sopenharmony_ci goto out; 8278c2ecf20Sopenharmony_ci BUFFER_TRACE(sbi->s_sbh, "get_write_access"); 8288c2ecf20Sopenharmony_ci err = ext4_journal_get_write_access(handle, sbi->s_sbh); 8298c2ecf20Sopenharmony_ci if (err) 8308c2ecf20Sopenharmony_ci goto out_journal; 8318c2ecf20Sopenharmony_ci lock_buffer(sbi->s_sbh); 8328c2ecf20Sopenharmony_ci strncpy(sbi->s_es->s_last_mounted, cp, 8338c2ecf20Sopenharmony_ci sizeof(sbi->s_es->s_last_mounted)); 8348c2ecf20Sopenharmony_ci ext4_superblock_csum_set(sb); 8358c2ecf20Sopenharmony_ci unlock_buffer(sbi->s_sbh); 8368c2ecf20Sopenharmony_ci ext4_handle_dirty_metadata(handle, NULL, sbi->s_sbh); 8378c2ecf20Sopenharmony_ciout_journal: 8388c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 8398c2ecf20Sopenharmony_ciout: 8408c2ecf20Sopenharmony_ci sb_end_intwrite(sb); 8418c2ecf20Sopenharmony_ci return err; 8428c2ecf20Sopenharmony_ci} 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_cistatic int ext4_file_open(struct inode *inode, struct file *filp) 8458c2ecf20Sopenharmony_ci{ 8468c2ecf20Sopenharmony_ci int ret; 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 8498c2ecf20Sopenharmony_ci return -EIO; 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci ret = ext4_sample_last_mounted(inode->i_sb, filp->f_path.mnt); 8528c2ecf20Sopenharmony_ci if (ret) 8538c2ecf20Sopenharmony_ci return ret; 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci ret = fscrypt_file_open(inode, filp); 8568c2ecf20Sopenharmony_ci if (ret) 8578c2ecf20Sopenharmony_ci return ret; 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci ret = fsverity_file_open(inode, filp); 8608c2ecf20Sopenharmony_ci if (ret) 8618c2ecf20Sopenharmony_ci return ret; 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci /* 8648c2ecf20Sopenharmony_ci * Set up the jbd2_inode if we are opening the inode for 8658c2ecf20Sopenharmony_ci * writing and the journal is present 8668c2ecf20Sopenharmony_ci */ 8678c2ecf20Sopenharmony_ci if (filp->f_mode & FMODE_WRITE) { 8688c2ecf20Sopenharmony_ci ret = ext4_inode_attach_jinode(inode); 8698c2ecf20Sopenharmony_ci if (ret < 0) 8708c2ecf20Sopenharmony_ci return ret; 8718c2ecf20Sopenharmony_ci } 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 8748c2ecf20Sopenharmony_ci return dquot_file_open(inode, filp); 8758c2ecf20Sopenharmony_ci} 8768c2ecf20Sopenharmony_ci 8778c2ecf20Sopenharmony_ci/* 8788c2ecf20Sopenharmony_ci * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values 8798c2ecf20Sopenharmony_ci * by calling generic_file_llseek_size() with the appropriate maxbytes 8808c2ecf20Sopenharmony_ci * value for each. 8818c2ecf20Sopenharmony_ci */ 8828c2ecf20Sopenharmony_ciloff_t ext4_llseek(struct file *file, loff_t offset, int whence) 8838c2ecf20Sopenharmony_ci{ 8848c2ecf20Sopenharmony_ci struct inode *inode = file->f_mapping->host; 8858c2ecf20Sopenharmony_ci loff_t maxbytes; 8868c2ecf20Sopenharmony_ci 8878c2ecf20Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 8888c2ecf20Sopenharmony_ci maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes; 8898c2ecf20Sopenharmony_ci else 8908c2ecf20Sopenharmony_ci maxbytes = inode->i_sb->s_maxbytes; 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci switch (whence) { 8938c2ecf20Sopenharmony_ci default: 8948c2ecf20Sopenharmony_ci return generic_file_llseek_size(file, offset, whence, 8958c2ecf20Sopenharmony_ci maxbytes, i_size_read(inode)); 8968c2ecf20Sopenharmony_ci case SEEK_HOLE: 8978c2ecf20Sopenharmony_ci inode_lock_shared(inode); 8988c2ecf20Sopenharmony_ci offset = iomap_seek_hole(inode, offset, 8998c2ecf20Sopenharmony_ci &ext4_iomap_report_ops); 9008c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 9018c2ecf20Sopenharmony_ci break; 9028c2ecf20Sopenharmony_ci case SEEK_DATA: 9038c2ecf20Sopenharmony_ci inode_lock_shared(inode); 9048c2ecf20Sopenharmony_ci offset = iomap_seek_data(inode, offset, 9058c2ecf20Sopenharmony_ci &ext4_iomap_report_ops); 9068c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 9078c2ecf20Sopenharmony_ci break; 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci 9108c2ecf20Sopenharmony_ci if (offset < 0) 9118c2ecf20Sopenharmony_ci return offset; 9128c2ecf20Sopenharmony_ci return vfs_setpos(file, offset, maxbytes); 9138c2ecf20Sopenharmony_ci} 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ciconst struct file_operations ext4_file_operations = { 9168c2ecf20Sopenharmony_ci .llseek = ext4_llseek, 9178c2ecf20Sopenharmony_ci .read_iter = ext4_file_read_iter, 9188c2ecf20Sopenharmony_ci .write_iter = ext4_file_write_iter, 9198c2ecf20Sopenharmony_ci .iopoll = iomap_dio_iopoll, 9208c2ecf20Sopenharmony_ci .unlocked_ioctl = ext4_ioctl, 9218c2ecf20Sopenharmony_ci#ifdef CONFIG_COMPAT 9228c2ecf20Sopenharmony_ci .compat_ioctl = ext4_compat_ioctl, 9238c2ecf20Sopenharmony_ci#endif 9248c2ecf20Sopenharmony_ci .mmap = ext4_file_mmap, 9258c2ecf20Sopenharmony_ci .mmap_supported_flags = MAP_SYNC, 9268c2ecf20Sopenharmony_ci .open = ext4_file_open, 9278c2ecf20Sopenharmony_ci .release = ext4_release_file, 9288c2ecf20Sopenharmony_ci .fsync = ext4_sync_file, 9298c2ecf20Sopenharmony_ci .get_unmapped_area = thp_get_unmapped_area, 9308c2ecf20Sopenharmony_ci .splice_read = generic_file_splice_read, 9318c2ecf20Sopenharmony_ci .splice_write = iter_file_splice_write, 9328c2ecf20Sopenharmony_ci .fallocate = ext4_fallocate, 9338c2ecf20Sopenharmony_ci}; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ciconst struct inode_operations ext4_file_inode_operations = { 9368c2ecf20Sopenharmony_ci .setattr = ext4_setattr, 9378c2ecf20Sopenharmony_ci .getattr = ext4_file_getattr, 9388c2ecf20Sopenharmony_ci .listxattr = ext4_listxattr, 9398c2ecf20Sopenharmony_ci .get_acl = ext4_get_acl, 9408c2ecf20Sopenharmony_ci .set_acl = ext4_set_acl, 9418c2ecf20Sopenharmony_ci .fiemap = ext4_fiemap, 9428c2ecf20Sopenharmony_ci}; 9438c2ecf20Sopenharmony_ci 944