18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/fs/ext4/inode.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 1992, 1993, 1994, 1995 68c2ecf20Sopenharmony_ci * Remy Card (card@masi.ibp.fr) 78c2ecf20Sopenharmony_ci * Laboratoire MASI - Institut Blaise Pascal 88c2ecf20Sopenharmony_ci * Universite Pierre et Marie Curie (Paris VI) 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * from 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * linux/fs/minix/inode.c 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * Copyright (C) 1991, 1992 Linus Torvalds 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * 64-bit file support on 64-bit platforms by Jakub Jelinek 178c2ecf20Sopenharmony_ci * (jj@sunsite.ms.mff.cuni.cz) 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 208c2ecf20Sopenharmony_ci */ 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include <linux/fs.h> 238c2ecf20Sopenharmony_ci#include <linux/time.h> 248c2ecf20Sopenharmony_ci#include <linux/highuid.h> 258c2ecf20Sopenharmony_ci#include <linux/pagemap.h> 268c2ecf20Sopenharmony_ci#include <linux/dax.h> 278c2ecf20Sopenharmony_ci#include <linux/quotaops.h> 288c2ecf20Sopenharmony_ci#include <linux/string.h> 298c2ecf20Sopenharmony_ci#include <linux/buffer_head.h> 308c2ecf20Sopenharmony_ci#include <linux/writeback.h> 318c2ecf20Sopenharmony_ci#include <linux/pagevec.h> 328c2ecf20Sopenharmony_ci#include <linux/mpage.h> 338c2ecf20Sopenharmony_ci#include <linux/namei.h> 348c2ecf20Sopenharmony_ci#include <linux/uio.h> 358c2ecf20Sopenharmony_ci#include <linux/bio.h> 368c2ecf20Sopenharmony_ci#include <linux/workqueue.h> 378c2ecf20Sopenharmony_ci#include <linux/kernel.h> 388c2ecf20Sopenharmony_ci#include <linux/printk.h> 398c2ecf20Sopenharmony_ci#include <linux/slab.h> 408c2ecf20Sopenharmony_ci#include <linux/bitops.h> 418c2ecf20Sopenharmony_ci#include <linux/iomap.h> 428c2ecf20Sopenharmony_ci#include <linux/iversion.h> 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci#include "ext4_jbd2.h" 458c2ecf20Sopenharmony_ci#include "xattr.h" 468c2ecf20Sopenharmony_ci#include "acl.h" 478c2ecf20Sopenharmony_ci#include "truncate.h" 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci#include <trace/events/ext4.h> 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistatic __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, 528c2ecf20Sopenharmony_ci struct ext4_inode_info *ei) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 558c2ecf20Sopenharmony_ci __u32 csum; 568c2ecf20Sopenharmony_ci __u16 dummy_csum = 0; 578c2ecf20Sopenharmony_ci int offset = offsetof(struct ext4_inode, i_checksum_lo); 588c2ecf20Sopenharmony_ci unsigned int csum_size = sizeof(dummy_csum); 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset); 618c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size); 628c2ecf20Sopenharmony_ci offset += csum_size; 638c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, 648c2ecf20Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE - offset); 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 678c2ecf20Sopenharmony_ci offset = offsetof(struct ext4_inode, i_checksum_hi); 688c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)raw + 698c2ecf20Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE, 708c2ecf20Sopenharmony_ci offset - EXT4_GOOD_OLD_INODE_SIZE); 718c2ecf20Sopenharmony_ci if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { 728c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, 738c2ecf20Sopenharmony_ci csum_size); 748c2ecf20Sopenharmony_ci offset += csum_size; 758c2ecf20Sopenharmony_ci } 768c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, 778c2ecf20Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb) - offset); 788c2ecf20Sopenharmony_ci } 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci return csum; 818c2ecf20Sopenharmony_ci} 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_cistatic int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, 848c2ecf20Sopenharmony_ci struct ext4_inode_info *ei) 858c2ecf20Sopenharmony_ci{ 868c2ecf20Sopenharmony_ci __u32 provided, calculated; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 898c2ecf20Sopenharmony_ci cpu_to_le32(EXT4_OS_LINUX) || 908c2ecf20Sopenharmony_ci !ext4_has_metadata_csum(inode->i_sb)) 918c2ecf20Sopenharmony_ci return 1; 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci provided = le16_to_cpu(raw->i_checksum_lo); 948c2ecf20Sopenharmony_ci calculated = ext4_inode_csum(inode, raw, ei); 958c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 968c2ecf20Sopenharmony_ci EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 978c2ecf20Sopenharmony_ci provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; 988c2ecf20Sopenharmony_ci else 998c2ecf20Sopenharmony_ci calculated &= 0xFFFF; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci return provided == calculated; 1028c2ecf20Sopenharmony_ci} 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_civoid ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, 1058c2ecf20Sopenharmony_ci struct ext4_inode_info *ei) 1068c2ecf20Sopenharmony_ci{ 1078c2ecf20Sopenharmony_ci __u32 csum; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 1108c2ecf20Sopenharmony_ci cpu_to_le32(EXT4_OS_LINUX) || 1118c2ecf20Sopenharmony_ci !ext4_has_metadata_csum(inode->i_sb)) 1128c2ecf20Sopenharmony_ci return; 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci csum = ext4_inode_csum(inode, raw, ei); 1158c2ecf20Sopenharmony_ci raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); 1168c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 1178c2ecf20Sopenharmony_ci EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 1188c2ecf20Sopenharmony_ci raw->i_checksum_hi = cpu_to_le16(csum >> 16); 1198c2ecf20Sopenharmony_ci} 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_cistatic inline int ext4_begin_ordered_truncate(struct inode *inode, 1228c2ecf20Sopenharmony_ci loff_t new_size) 1238c2ecf20Sopenharmony_ci{ 1248c2ecf20Sopenharmony_ci trace_ext4_begin_ordered_truncate(inode, new_size); 1258c2ecf20Sopenharmony_ci /* 1268c2ecf20Sopenharmony_ci * If jinode is zero, then we never opened the file for 1278c2ecf20Sopenharmony_ci * writing, so there's no need to call 1288c2ecf20Sopenharmony_ci * jbd2_journal_begin_ordered_truncate() since there's no 1298c2ecf20Sopenharmony_ci * outstanding writes we need to flush. 1308c2ecf20Sopenharmony_ci */ 1318c2ecf20Sopenharmony_ci if (!EXT4_I(inode)->jinode) 1328c2ecf20Sopenharmony_ci return 0; 1338c2ecf20Sopenharmony_ci return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), 1348c2ecf20Sopenharmony_ci EXT4_I(inode)->jinode, 1358c2ecf20Sopenharmony_ci new_size); 1368c2ecf20Sopenharmony_ci} 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_cistatic void ext4_invalidatepage(struct page *page, unsigned int offset, 1398c2ecf20Sopenharmony_ci unsigned int length); 1408c2ecf20Sopenharmony_cistatic int __ext4_journalled_writepage(struct page *page, unsigned int len); 1418c2ecf20Sopenharmony_cistatic int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 1428c2ecf20Sopenharmony_cistatic int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 1438c2ecf20Sopenharmony_ci int pextents); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci/* 1468c2ecf20Sopenharmony_ci * Test whether an inode is a fast symlink. 1478c2ecf20Sopenharmony_ci * A fast symlink has its symlink data stored in ext4_inode_info->i_data. 1488c2ecf20Sopenharmony_ci */ 1498c2ecf20Sopenharmony_ciint ext4_inode_is_fast_symlink(struct inode *inode) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { 1528c2ecf20Sopenharmony_ci int ea_blocks = EXT4_I(inode)->i_file_acl ? 1538c2ecf20Sopenharmony_ci EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 1568c2ecf20Sopenharmony_ci return 0; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 1598c2ecf20Sopenharmony_ci } 1608c2ecf20Sopenharmony_ci return S_ISLNK(inode->i_mode) && inode->i_size && 1618c2ecf20Sopenharmony_ci (inode->i_size < EXT4_N_BLOCKS * 4); 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci/* 1658c2ecf20Sopenharmony_ci * Called at the last iput() if i_nlink is zero. 1668c2ecf20Sopenharmony_ci */ 1678c2ecf20Sopenharmony_civoid ext4_evict_inode(struct inode *inode) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci handle_t *handle; 1708c2ecf20Sopenharmony_ci int err; 1718c2ecf20Sopenharmony_ci /* 1728c2ecf20Sopenharmony_ci * Credits for final inode cleanup and freeing: 1738c2ecf20Sopenharmony_ci * sb + inode (ext4_orphan_del()), block bitmap, group descriptor 1748c2ecf20Sopenharmony_ci * (xattr block freeing), bitmap, group descriptor (inode freeing) 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_ci int extra_credits = 6; 1778c2ecf20Sopenharmony_ci struct ext4_xattr_inode_array *ea_inode_array = NULL; 1788c2ecf20Sopenharmony_ci bool freeze_protected = false; 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci trace_ext4_evict_inode(inode); 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) 1838c2ecf20Sopenharmony_ci ext4_evict_ea_inode(inode); 1848c2ecf20Sopenharmony_ci if (inode->i_nlink) { 1858c2ecf20Sopenharmony_ci /* 1868c2ecf20Sopenharmony_ci * When journalling data dirty buffers are tracked only in the 1878c2ecf20Sopenharmony_ci * journal. So although mm thinks everything is clean and 1888c2ecf20Sopenharmony_ci * ready for reaping the inode might still have some pages to 1898c2ecf20Sopenharmony_ci * write in the running transaction or waiting to be 1908c2ecf20Sopenharmony_ci * checkpointed. Thus calling jbd2_journal_invalidatepage() 1918c2ecf20Sopenharmony_ci * (via truncate_inode_pages()) to discard these buffers can 1928c2ecf20Sopenharmony_ci * cause data loss. Also even if we did not discard these 1938c2ecf20Sopenharmony_ci * buffers, we would have no way to find them after the inode 1948c2ecf20Sopenharmony_ci * is reaped and thus user could see stale data if he tries to 1958c2ecf20Sopenharmony_ci * read them before the transaction is checkpointed. So be 1968c2ecf20Sopenharmony_ci * careful and force everything to disk here... We use 1978c2ecf20Sopenharmony_ci * ei->i_datasync_tid to store the newest transaction 1988c2ecf20Sopenharmony_ci * containing inode's data. 1998c2ecf20Sopenharmony_ci * 2008c2ecf20Sopenharmony_ci * Note that directories do not have this problem because they 2018c2ecf20Sopenharmony_ci * don't use page cache. 2028c2ecf20Sopenharmony_ci */ 2038c2ecf20Sopenharmony_ci if (inode->i_ino != EXT4_JOURNAL_INO && 2048c2ecf20Sopenharmony_ci ext4_should_journal_data(inode) && 2058c2ecf20Sopenharmony_ci (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) && 2068c2ecf20Sopenharmony_ci inode->i_data.nrpages) { 2078c2ecf20Sopenharmony_ci journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 2088c2ecf20Sopenharmony_ci tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci jbd2_complete_transaction(journal, commit_tid); 2118c2ecf20Sopenharmony_ci filemap_write_and_wait(&inode->i_data); 2128c2ecf20Sopenharmony_ci } 2138c2ecf20Sopenharmony_ci truncate_inode_pages_final(&inode->i_data); 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci goto no_delete; 2168c2ecf20Sopenharmony_ci } 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci if (is_bad_inode(inode)) 2198c2ecf20Sopenharmony_ci goto no_delete; 2208c2ecf20Sopenharmony_ci dquot_initialize(inode); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci if (ext4_should_order_data(inode)) 2238c2ecf20Sopenharmony_ci ext4_begin_ordered_truncate(inode, 0); 2248c2ecf20Sopenharmony_ci truncate_inode_pages_final(&inode->i_data); 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci /* 2278c2ecf20Sopenharmony_ci * For inodes with journalled data, transaction commit could have 2288c2ecf20Sopenharmony_ci * dirtied the inode. And for inodes with dioread_nolock, unwritten 2298c2ecf20Sopenharmony_ci * extents converting worker could merge extents and also have dirtied 2308c2ecf20Sopenharmony_ci * the inode. Flush worker is ignoring it because of I_FREEING flag but 2318c2ecf20Sopenharmony_ci * we still need to remove the inode from the writeback lists. 2328c2ecf20Sopenharmony_ci */ 2338c2ecf20Sopenharmony_ci if (!list_empty_careful(&inode->i_io_list)) 2348c2ecf20Sopenharmony_ci inode_io_list_del(inode); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci /* 2378c2ecf20Sopenharmony_ci * Protect us against freezing - iput() caller didn't have to have any 2388c2ecf20Sopenharmony_ci * protection against it. When we are in a running transaction though, 2398c2ecf20Sopenharmony_ci * we are already protected against freezing and we cannot grab further 2408c2ecf20Sopenharmony_ci * protection due to lock ordering constraints. 2418c2ecf20Sopenharmony_ci */ 2428c2ecf20Sopenharmony_ci if (!ext4_journal_current_handle()) { 2438c2ecf20Sopenharmony_ci sb_start_intwrite(inode->i_sb); 2448c2ecf20Sopenharmony_ci freeze_protected = true; 2458c2ecf20Sopenharmony_ci } 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci if (!IS_NOQUOTA(inode)) 2488c2ecf20Sopenharmony_ci extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci /* 2518c2ecf20Sopenharmony_ci * Block bitmap, group descriptor, and inode are accounted in both 2528c2ecf20Sopenharmony_ci * ext4_blocks_for_truncate() and extra_credits. So subtract 3. 2538c2ecf20Sopenharmony_ci */ 2548c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 2558c2ecf20Sopenharmony_ci ext4_blocks_for_truncate(inode) + extra_credits - 3); 2568c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 2578c2ecf20Sopenharmony_ci ext4_std_error(inode->i_sb, PTR_ERR(handle)); 2588c2ecf20Sopenharmony_ci /* 2598c2ecf20Sopenharmony_ci * If we're going to skip the normal cleanup, we still need to 2608c2ecf20Sopenharmony_ci * make sure that the in-core orphan linked list is properly 2618c2ecf20Sopenharmony_ci * cleaned up. 2628c2ecf20Sopenharmony_ci */ 2638c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 2648c2ecf20Sopenharmony_ci if (freeze_protected) 2658c2ecf20Sopenharmony_ci sb_end_intwrite(inode->i_sb); 2668c2ecf20Sopenharmony_ci goto no_delete; 2678c2ecf20Sopenharmony_ci } 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci if (IS_SYNC(inode)) 2708c2ecf20Sopenharmony_ci ext4_handle_sync(handle); 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci /* 2738c2ecf20Sopenharmony_ci * Set inode->i_size to 0 before calling ext4_truncate(). We need 2748c2ecf20Sopenharmony_ci * special handling of symlinks here because i_size is used to 2758c2ecf20Sopenharmony_ci * determine whether ext4_inode_info->i_data contains symlink data or 2768c2ecf20Sopenharmony_ci * block mappings. Setting i_size to 0 will remove its fast symlink 2778c2ecf20Sopenharmony_ci * status. Erase i_data so that it becomes a valid empty block map. 2788c2ecf20Sopenharmony_ci */ 2798c2ecf20Sopenharmony_ci if (ext4_inode_is_fast_symlink(inode)) 2808c2ecf20Sopenharmony_ci memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data)); 2818c2ecf20Sopenharmony_ci inode->i_size = 0; 2828c2ecf20Sopenharmony_ci err = ext4_mark_inode_dirty(handle, inode); 2838c2ecf20Sopenharmony_ci if (err) { 2848c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, 2858c2ecf20Sopenharmony_ci "couldn't mark inode dirty (err %d)", err); 2868c2ecf20Sopenharmony_ci goto stop_handle; 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci if (inode->i_blocks) { 2898c2ecf20Sopenharmony_ci err = ext4_truncate(inode); 2908c2ecf20Sopenharmony_ci if (err) { 2918c2ecf20Sopenharmony_ci ext4_error_err(inode->i_sb, -err, 2928c2ecf20Sopenharmony_ci "couldn't truncate inode %lu (err %d)", 2938c2ecf20Sopenharmony_ci inode->i_ino, err); 2948c2ecf20Sopenharmony_ci goto stop_handle; 2958c2ecf20Sopenharmony_ci } 2968c2ecf20Sopenharmony_ci } 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci /* Remove xattr references. */ 2998c2ecf20Sopenharmony_ci err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array, 3008c2ecf20Sopenharmony_ci extra_credits); 3018c2ecf20Sopenharmony_ci if (err) { 3028c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, "xattr delete (err %d)", err); 3038c2ecf20Sopenharmony_cistop_handle: 3048c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 3058c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 3068c2ecf20Sopenharmony_ci if (freeze_protected) 3078c2ecf20Sopenharmony_ci sb_end_intwrite(inode->i_sb); 3088c2ecf20Sopenharmony_ci ext4_xattr_inode_array_free(ea_inode_array); 3098c2ecf20Sopenharmony_ci goto no_delete; 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci /* 3138c2ecf20Sopenharmony_ci * Kill off the orphan record which ext4_truncate created. 3148c2ecf20Sopenharmony_ci * AKPM: I think this can be inside the above `if'. 3158c2ecf20Sopenharmony_ci * Note that ext4_orphan_del() has to be able to cope with the 3168c2ecf20Sopenharmony_ci * deletion of a non-existent orphan - this is because we don't 3178c2ecf20Sopenharmony_ci * know if ext4_truncate() actually created an orphan record. 3188c2ecf20Sopenharmony_ci * (Well, we could do this if we need to, but heck - it works) 3198c2ecf20Sopenharmony_ci */ 3208c2ecf20Sopenharmony_ci ext4_orphan_del(handle, inode); 3218c2ecf20Sopenharmony_ci EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds(); 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci /* 3248c2ecf20Sopenharmony_ci * One subtle ordering requirement: if anything has gone wrong 3258c2ecf20Sopenharmony_ci * (transaction abort, IO errors, whatever), then we can still 3268c2ecf20Sopenharmony_ci * do these next steps (the fs will already have been marked as 3278c2ecf20Sopenharmony_ci * having errors), but we can't free the inode if the mark_dirty 3288c2ecf20Sopenharmony_ci * fails. 3298c2ecf20Sopenharmony_ci */ 3308c2ecf20Sopenharmony_ci if (ext4_mark_inode_dirty(handle, inode)) 3318c2ecf20Sopenharmony_ci /* If that failed, just do the required in-core inode clear. */ 3328c2ecf20Sopenharmony_ci ext4_clear_inode(inode); 3338c2ecf20Sopenharmony_ci else 3348c2ecf20Sopenharmony_ci ext4_free_inode(handle, inode); 3358c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 3368c2ecf20Sopenharmony_ci if (freeze_protected) 3378c2ecf20Sopenharmony_ci sb_end_intwrite(inode->i_sb); 3388c2ecf20Sopenharmony_ci ext4_xattr_inode_array_free(ea_inode_array); 3398c2ecf20Sopenharmony_ci return; 3408c2ecf20Sopenharmony_cino_delete: 3418c2ecf20Sopenharmony_ci /* 3428c2ecf20Sopenharmony_ci * Check out some where else accidentally dirty the evicting inode, 3438c2ecf20Sopenharmony_ci * which may probably cause inode use-after-free issues later. 3448c2ecf20Sopenharmony_ci */ 3458c2ecf20Sopenharmony_ci WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list)); 3468c2ecf20Sopenharmony_ci 3478c2ecf20Sopenharmony_ci if (!list_empty(&EXT4_I(inode)->i_fc_list)) 3488c2ecf20Sopenharmony_ci ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); 3498c2ecf20Sopenharmony_ci ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 3508c2ecf20Sopenharmony_ci} 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci#ifdef CONFIG_QUOTA 3538c2ecf20Sopenharmony_ciqsize_t *ext4_get_reserved_space(struct inode *inode) 3548c2ecf20Sopenharmony_ci{ 3558c2ecf20Sopenharmony_ci return &EXT4_I(inode)->i_reserved_quota; 3568c2ecf20Sopenharmony_ci} 3578c2ecf20Sopenharmony_ci#endif 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci/* 3608c2ecf20Sopenharmony_ci * Called with i_data_sem down, which is important since we can call 3618c2ecf20Sopenharmony_ci * ext4_discard_preallocations() from here. 3628c2ecf20Sopenharmony_ci */ 3638c2ecf20Sopenharmony_civoid ext4_da_update_reserve_space(struct inode *inode, 3648c2ecf20Sopenharmony_ci int used, int quota_claim) 3658c2ecf20Sopenharmony_ci{ 3668c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3678c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci spin_lock(&ei->i_block_reservation_lock); 3708c2ecf20Sopenharmony_ci trace_ext4_da_update_reserve_space(inode, used, quota_claim); 3718c2ecf20Sopenharmony_ci if (unlikely(used > ei->i_reserved_data_blocks)) { 3728c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, "%s: ino %lu, used %d " 3738c2ecf20Sopenharmony_ci "with only %d reserved data blocks", 3748c2ecf20Sopenharmony_ci __func__, inode->i_ino, used, 3758c2ecf20Sopenharmony_ci ei->i_reserved_data_blocks); 3768c2ecf20Sopenharmony_ci WARN_ON(1); 3778c2ecf20Sopenharmony_ci used = ei->i_reserved_data_blocks; 3788c2ecf20Sopenharmony_ci } 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci /* Update per-inode reservations */ 3818c2ecf20Sopenharmony_ci ei->i_reserved_data_blocks -= used; 3828c2ecf20Sopenharmony_ci percpu_counter_sub(&sbi->s_dirtyclusters_counter, used); 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 3858c2ecf20Sopenharmony_ci 3868c2ecf20Sopenharmony_ci /* Update quota subsystem for data blocks */ 3878c2ecf20Sopenharmony_ci if (quota_claim) 3888c2ecf20Sopenharmony_ci dquot_claim_block(inode, EXT4_C2B(sbi, used)); 3898c2ecf20Sopenharmony_ci else { 3908c2ecf20Sopenharmony_ci /* 3918c2ecf20Sopenharmony_ci * We did fallocate with an offset that is already delayed 3928c2ecf20Sopenharmony_ci * allocated. So on delayed allocated writeback we should 3938c2ecf20Sopenharmony_ci * not re-claim the quota for fallocated blocks. 3948c2ecf20Sopenharmony_ci */ 3958c2ecf20Sopenharmony_ci dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci /* 3998c2ecf20Sopenharmony_ci * If we have done all the pending block allocations and if 4008c2ecf20Sopenharmony_ci * there aren't any writers on the inode, we can discard the 4018c2ecf20Sopenharmony_ci * inode's preallocations. 4028c2ecf20Sopenharmony_ci */ 4038c2ecf20Sopenharmony_ci if ((ei->i_reserved_data_blocks == 0) && 4048c2ecf20Sopenharmony_ci !inode_is_open_for_write(inode)) 4058c2ecf20Sopenharmony_ci ext4_discard_preallocations(inode, 0); 4068c2ecf20Sopenharmony_ci} 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_cistatic int __check_block_validity(struct inode *inode, const char *func, 4098c2ecf20Sopenharmony_ci unsigned int line, 4108c2ecf20Sopenharmony_ci struct ext4_map_blocks *map) 4118c2ecf20Sopenharmony_ci{ 4128c2ecf20Sopenharmony_ci if (ext4_has_feature_journal(inode->i_sb) && 4138c2ecf20Sopenharmony_ci (inode->i_ino == 4148c2ecf20Sopenharmony_ci le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) 4158c2ecf20Sopenharmony_ci return 0; 4168c2ecf20Sopenharmony_ci if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { 4178c2ecf20Sopenharmony_ci ext4_error_inode(inode, func, line, map->m_pblk, 4188c2ecf20Sopenharmony_ci "lblock %lu mapped to illegal pblock %llu " 4198c2ecf20Sopenharmony_ci "(length %d)", (unsigned long) map->m_lblk, 4208c2ecf20Sopenharmony_ci map->m_pblk, map->m_len); 4218c2ecf20Sopenharmony_ci return -EFSCORRUPTED; 4228c2ecf20Sopenharmony_ci } 4238c2ecf20Sopenharmony_ci return 0; 4248c2ecf20Sopenharmony_ci} 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ciint ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, 4278c2ecf20Sopenharmony_ci ext4_lblk_t len) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci int ret; 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) 4328c2ecf20Sopenharmony_ci return fscrypt_zeroout_range(inode, lblk, pblk, len); 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); 4358c2ecf20Sopenharmony_ci if (ret > 0) 4368c2ecf20Sopenharmony_ci ret = 0; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci return ret; 4398c2ecf20Sopenharmony_ci} 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_ci#define check_block_validity(inode, map) \ 4428c2ecf20Sopenharmony_ci __check_block_validity((inode), __func__, __LINE__, (map)) 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 4458c2ecf20Sopenharmony_cistatic void ext4_map_blocks_es_recheck(handle_t *handle, 4468c2ecf20Sopenharmony_ci struct inode *inode, 4478c2ecf20Sopenharmony_ci struct ext4_map_blocks *es_map, 4488c2ecf20Sopenharmony_ci struct ext4_map_blocks *map, 4498c2ecf20Sopenharmony_ci int flags) 4508c2ecf20Sopenharmony_ci{ 4518c2ecf20Sopenharmony_ci int retval; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci map->m_flags = 0; 4548c2ecf20Sopenharmony_ci /* 4558c2ecf20Sopenharmony_ci * There is a race window that the result is not the same. 4568c2ecf20Sopenharmony_ci * e.g. xfstests #223 when dioread_nolock enables. The reason 4578c2ecf20Sopenharmony_ci * is that we lookup a block mapping in extent status tree with 4588c2ecf20Sopenharmony_ci * out taking i_data_sem. So at the time the unwritten extent 4598c2ecf20Sopenharmony_ci * could be converted. 4608c2ecf20Sopenharmony_ci */ 4618c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 4628c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 4638c2ecf20Sopenharmony_ci retval = ext4_ext_map_blocks(handle, inode, map, 0); 4648c2ecf20Sopenharmony_ci } else { 4658c2ecf20Sopenharmony_ci retval = ext4_ind_map_blocks(handle, inode, map, 0); 4668c2ecf20Sopenharmony_ci } 4678c2ecf20Sopenharmony_ci up_read((&EXT4_I(inode)->i_data_sem)); 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci /* 4708c2ecf20Sopenharmony_ci * We don't check m_len because extent will be collpased in status 4718c2ecf20Sopenharmony_ci * tree. So the m_len might not equal. 4728c2ecf20Sopenharmony_ci */ 4738c2ecf20Sopenharmony_ci if (es_map->m_lblk != map->m_lblk || 4748c2ecf20Sopenharmony_ci es_map->m_flags != map->m_flags || 4758c2ecf20Sopenharmony_ci es_map->m_pblk != map->m_pblk) { 4768c2ecf20Sopenharmony_ci printk("ES cache assertion failed for inode: %lu " 4778c2ecf20Sopenharmony_ci "es_cached ex [%d/%d/%llu/%x] != " 4788c2ecf20Sopenharmony_ci "found ex [%d/%d/%llu/%x] retval %d flags %x\n", 4798c2ecf20Sopenharmony_ci inode->i_ino, es_map->m_lblk, es_map->m_len, 4808c2ecf20Sopenharmony_ci es_map->m_pblk, es_map->m_flags, map->m_lblk, 4818c2ecf20Sopenharmony_ci map->m_len, map->m_pblk, map->m_flags, 4828c2ecf20Sopenharmony_ci retval, flags); 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci} 4858c2ecf20Sopenharmony_ci#endif /* ES_AGGRESSIVE_TEST */ 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci/* 4888c2ecf20Sopenharmony_ci * The ext4_map_blocks() function tries to look up the requested blocks, 4898c2ecf20Sopenharmony_ci * and returns if the blocks are already mapped. 4908c2ecf20Sopenharmony_ci * 4918c2ecf20Sopenharmony_ci * Otherwise it takes the write lock of the i_data_sem and allocate blocks 4928c2ecf20Sopenharmony_ci * and store the allocated blocks in the result buffer head and mark it 4938c2ecf20Sopenharmony_ci * mapped. 4948c2ecf20Sopenharmony_ci * 4958c2ecf20Sopenharmony_ci * If file type is extents based, it will call ext4_ext_map_blocks(), 4968c2ecf20Sopenharmony_ci * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping 4978c2ecf20Sopenharmony_ci * based files 4988c2ecf20Sopenharmony_ci * 4998c2ecf20Sopenharmony_ci * On success, it returns the number of blocks being mapped or allocated. if 5008c2ecf20Sopenharmony_ci * create==0 and the blocks are pre-allocated and unwritten, the resulting @map 5018c2ecf20Sopenharmony_ci * is marked as unwritten. If the create == 1, it will mark @map as mapped. 5028c2ecf20Sopenharmony_ci * 5038c2ecf20Sopenharmony_ci * It returns 0 if plain look up failed (blocks have not been allocated), in 5048c2ecf20Sopenharmony_ci * that case, @map is returned as unmapped but we still do fill map->m_len to 5058c2ecf20Sopenharmony_ci * indicate the length of a hole starting at map->m_lblk. 5068c2ecf20Sopenharmony_ci * 5078c2ecf20Sopenharmony_ci * It returns the error in case of allocation failure. 5088c2ecf20Sopenharmony_ci */ 5098c2ecf20Sopenharmony_ciint ext4_map_blocks(handle_t *handle, struct inode *inode, 5108c2ecf20Sopenharmony_ci struct ext4_map_blocks *map, int flags) 5118c2ecf20Sopenharmony_ci{ 5128c2ecf20Sopenharmony_ci struct extent_status es; 5138c2ecf20Sopenharmony_ci int retval; 5148c2ecf20Sopenharmony_ci int ret = 0; 5158c2ecf20Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 5168c2ecf20Sopenharmony_ci struct ext4_map_blocks orig_map; 5178c2ecf20Sopenharmony_ci 5188c2ecf20Sopenharmony_ci memcpy(&orig_map, map, sizeof(*map)); 5198c2ecf20Sopenharmony_ci#endif 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci map->m_flags = 0; 5228c2ecf20Sopenharmony_ci ext_debug(inode, "flag 0x%x, max_blocks %u, logical block %lu\n", 5238c2ecf20Sopenharmony_ci flags, map->m_len, (unsigned long) map->m_lblk); 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci /* 5268c2ecf20Sopenharmony_ci * ext4_map_blocks returns an int, and m_len is an unsigned int 5278c2ecf20Sopenharmony_ci */ 5288c2ecf20Sopenharmony_ci if (unlikely(map->m_len > INT_MAX)) 5298c2ecf20Sopenharmony_ci map->m_len = INT_MAX; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci /* We can handle the block number less than EXT_MAX_BLOCKS */ 5328c2ecf20Sopenharmony_ci if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) 5338c2ecf20Sopenharmony_ci return -EFSCORRUPTED; 5348c2ecf20Sopenharmony_ci 5358c2ecf20Sopenharmony_ci /* Lookup extent status tree firstly */ 5368c2ecf20Sopenharmony_ci if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) && 5378c2ecf20Sopenharmony_ci ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 5388c2ecf20Sopenharmony_ci if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { 5398c2ecf20Sopenharmony_ci map->m_pblk = ext4_es_pblock(&es) + 5408c2ecf20Sopenharmony_ci map->m_lblk - es.es_lblk; 5418c2ecf20Sopenharmony_ci map->m_flags |= ext4_es_is_written(&es) ? 5428c2ecf20Sopenharmony_ci EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; 5438c2ecf20Sopenharmony_ci retval = es.es_len - (map->m_lblk - es.es_lblk); 5448c2ecf20Sopenharmony_ci if (retval > map->m_len) 5458c2ecf20Sopenharmony_ci retval = map->m_len; 5468c2ecf20Sopenharmony_ci map->m_len = retval; 5478c2ecf20Sopenharmony_ci } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { 5488c2ecf20Sopenharmony_ci map->m_pblk = 0; 5498c2ecf20Sopenharmony_ci retval = es.es_len - (map->m_lblk - es.es_lblk); 5508c2ecf20Sopenharmony_ci if (retval > map->m_len) 5518c2ecf20Sopenharmony_ci retval = map->m_len; 5528c2ecf20Sopenharmony_ci map->m_len = retval; 5538c2ecf20Sopenharmony_ci retval = 0; 5548c2ecf20Sopenharmony_ci } else { 5558c2ecf20Sopenharmony_ci BUG(); 5568c2ecf20Sopenharmony_ci } 5578c2ecf20Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 5588c2ecf20Sopenharmony_ci ext4_map_blocks_es_recheck(handle, inode, map, 5598c2ecf20Sopenharmony_ci &orig_map, flags); 5608c2ecf20Sopenharmony_ci#endif 5618c2ecf20Sopenharmony_ci goto found; 5628c2ecf20Sopenharmony_ci } 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci /* 5658c2ecf20Sopenharmony_ci * Try to see if we can get the block without requesting a new 5668c2ecf20Sopenharmony_ci * file system block. 5678c2ecf20Sopenharmony_ci */ 5688c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 5698c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 5708c2ecf20Sopenharmony_ci retval = ext4_ext_map_blocks(handle, inode, map, 0); 5718c2ecf20Sopenharmony_ci } else { 5728c2ecf20Sopenharmony_ci retval = ext4_ind_map_blocks(handle, inode, map, 0); 5738c2ecf20Sopenharmony_ci } 5748c2ecf20Sopenharmony_ci if (retval > 0) { 5758c2ecf20Sopenharmony_ci unsigned int status; 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_ci if (unlikely(retval != map->m_len)) { 5788c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, 5798c2ecf20Sopenharmony_ci "ES len assertion failed for inode " 5808c2ecf20Sopenharmony_ci "%lu: retval %d != map->m_len %d", 5818c2ecf20Sopenharmony_ci inode->i_ino, retval, map->m_len); 5828c2ecf20Sopenharmony_ci WARN_ON(1); 5838c2ecf20Sopenharmony_ci } 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci status = map->m_flags & EXT4_MAP_UNWRITTEN ? 5868c2ecf20Sopenharmony_ci EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 5878c2ecf20Sopenharmony_ci if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 5888c2ecf20Sopenharmony_ci !(status & EXTENT_STATUS_WRITTEN) && 5898c2ecf20Sopenharmony_ci ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, 5908c2ecf20Sopenharmony_ci map->m_lblk + map->m_len - 1)) 5918c2ecf20Sopenharmony_ci status |= EXTENT_STATUS_DELAYED; 5928c2ecf20Sopenharmony_ci ret = ext4_es_insert_extent(inode, map->m_lblk, 5938c2ecf20Sopenharmony_ci map->m_len, map->m_pblk, status); 5948c2ecf20Sopenharmony_ci if (ret < 0) 5958c2ecf20Sopenharmony_ci retval = ret; 5968c2ecf20Sopenharmony_ci } 5978c2ecf20Sopenharmony_ci up_read((&EXT4_I(inode)->i_data_sem)); 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_cifound: 6008c2ecf20Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 6018c2ecf20Sopenharmony_ci ret = check_block_validity(inode, map); 6028c2ecf20Sopenharmony_ci if (ret != 0) 6038c2ecf20Sopenharmony_ci return ret; 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci 6068c2ecf20Sopenharmony_ci /* If it is only a block(s) look up */ 6078c2ecf20Sopenharmony_ci if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) 6088c2ecf20Sopenharmony_ci return retval; 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_ci /* 6118c2ecf20Sopenharmony_ci * Returns if the blocks have already allocated 6128c2ecf20Sopenharmony_ci * 6138c2ecf20Sopenharmony_ci * Note that if blocks have been preallocated 6148c2ecf20Sopenharmony_ci * ext4_ext_get_block() returns the create = 0 6158c2ecf20Sopenharmony_ci * with buffer head unmapped. 6168c2ecf20Sopenharmony_ci */ 6178c2ecf20Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 6188c2ecf20Sopenharmony_ci /* 6198c2ecf20Sopenharmony_ci * If we need to convert extent to unwritten 6208c2ecf20Sopenharmony_ci * we continue and do the actual work in 6218c2ecf20Sopenharmony_ci * ext4_ext_map_blocks() 6228c2ecf20Sopenharmony_ci */ 6238c2ecf20Sopenharmony_ci if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) 6248c2ecf20Sopenharmony_ci return retval; 6258c2ecf20Sopenharmony_ci 6268c2ecf20Sopenharmony_ci /* 6278c2ecf20Sopenharmony_ci * Here we clear m_flags because after allocating an new extent, 6288c2ecf20Sopenharmony_ci * it will be set again. 6298c2ecf20Sopenharmony_ci */ 6308c2ecf20Sopenharmony_ci map->m_flags &= ~EXT4_MAP_FLAGS; 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci /* 6338c2ecf20Sopenharmony_ci * New blocks allocate and/or writing to unwritten extent 6348c2ecf20Sopenharmony_ci * will possibly result in updating i_data, so we take 6358c2ecf20Sopenharmony_ci * the write lock of i_data_sem, and call get_block() 6368c2ecf20Sopenharmony_ci * with create == 1 flag. 6378c2ecf20Sopenharmony_ci */ 6388c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci /* 6418c2ecf20Sopenharmony_ci * We need to check for EXT4 here because migrate 6428c2ecf20Sopenharmony_ci * could have changed the inode type in between 6438c2ecf20Sopenharmony_ci */ 6448c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 6458c2ecf20Sopenharmony_ci retval = ext4_ext_map_blocks(handle, inode, map, flags); 6468c2ecf20Sopenharmony_ci } else { 6478c2ecf20Sopenharmony_ci retval = ext4_ind_map_blocks(handle, inode, map, flags); 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { 6508c2ecf20Sopenharmony_ci /* 6518c2ecf20Sopenharmony_ci * We allocated new blocks which will result in 6528c2ecf20Sopenharmony_ci * i_data's format changing. Force the migrate 6538c2ecf20Sopenharmony_ci * to fail by clearing migrate flags 6548c2ecf20Sopenharmony_ci */ 6558c2ecf20Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 6568c2ecf20Sopenharmony_ci } 6578c2ecf20Sopenharmony_ci } 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci if (retval > 0) { 6608c2ecf20Sopenharmony_ci unsigned int status; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci if (unlikely(retval != map->m_len)) { 6638c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, 6648c2ecf20Sopenharmony_ci "ES len assertion failed for inode " 6658c2ecf20Sopenharmony_ci "%lu: retval %d != map->m_len %d", 6668c2ecf20Sopenharmony_ci inode->i_ino, retval, map->m_len); 6678c2ecf20Sopenharmony_ci WARN_ON(1); 6688c2ecf20Sopenharmony_ci } 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci /* 6718c2ecf20Sopenharmony_ci * We have to zeroout blocks before inserting them into extent 6728c2ecf20Sopenharmony_ci * status tree. Otherwise someone could look them up there and 6738c2ecf20Sopenharmony_ci * use them before they are really zeroed. We also have to 6748c2ecf20Sopenharmony_ci * unmap metadata before zeroing as otherwise writeback can 6758c2ecf20Sopenharmony_ci * overwrite zeros with stale data from block device. 6768c2ecf20Sopenharmony_ci */ 6778c2ecf20Sopenharmony_ci if (flags & EXT4_GET_BLOCKS_ZERO && 6788c2ecf20Sopenharmony_ci map->m_flags & EXT4_MAP_MAPPED && 6798c2ecf20Sopenharmony_ci map->m_flags & EXT4_MAP_NEW) { 6808c2ecf20Sopenharmony_ci ret = ext4_issue_zeroout(inode, map->m_lblk, 6818c2ecf20Sopenharmony_ci map->m_pblk, map->m_len); 6828c2ecf20Sopenharmony_ci if (ret) { 6838c2ecf20Sopenharmony_ci retval = ret; 6848c2ecf20Sopenharmony_ci goto out_sem; 6858c2ecf20Sopenharmony_ci } 6868c2ecf20Sopenharmony_ci } 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci /* 6898c2ecf20Sopenharmony_ci * If the extent has been zeroed out, we don't need to update 6908c2ecf20Sopenharmony_ci * extent status tree. 6918c2ecf20Sopenharmony_ci */ 6928c2ecf20Sopenharmony_ci if ((flags & EXT4_GET_BLOCKS_PRE_IO) && 6938c2ecf20Sopenharmony_ci ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 6948c2ecf20Sopenharmony_ci if (ext4_es_is_written(&es)) 6958c2ecf20Sopenharmony_ci goto out_sem; 6968c2ecf20Sopenharmony_ci } 6978c2ecf20Sopenharmony_ci status = map->m_flags & EXT4_MAP_UNWRITTEN ? 6988c2ecf20Sopenharmony_ci EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 6998c2ecf20Sopenharmony_ci if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 7008c2ecf20Sopenharmony_ci !(status & EXTENT_STATUS_WRITTEN) && 7018c2ecf20Sopenharmony_ci ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, 7028c2ecf20Sopenharmony_ci map->m_lblk + map->m_len - 1)) 7038c2ecf20Sopenharmony_ci status |= EXTENT_STATUS_DELAYED; 7048c2ecf20Sopenharmony_ci ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 7058c2ecf20Sopenharmony_ci map->m_pblk, status); 7068c2ecf20Sopenharmony_ci if (ret < 0) { 7078c2ecf20Sopenharmony_ci retval = ret; 7088c2ecf20Sopenharmony_ci goto out_sem; 7098c2ecf20Sopenharmony_ci } 7108c2ecf20Sopenharmony_ci } 7118c2ecf20Sopenharmony_ci 7128c2ecf20Sopenharmony_ciout_sem: 7138c2ecf20Sopenharmony_ci up_write((&EXT4_I(inode)->i_data_sem)); 7148c2ecf20Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 7158c2ecf20Sopenharmony_ci ret = check_block_validity(inode, map); 7168c2ecf20Sopenharmony_ci if (ret != 0) 7178c2ecf20Sopenharmony_ci return ret; 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci /* 7208c2ecf20Sopenharmony_ci * Inodes with freshly allocated blocks where contents will be 7218c2ecf20Sopenharmony_ci * visible after transaction commit must be on transaction's 7228c2ecf20Sopenharmony_ci * ordered data list. 7238c2ecf20Sopenharmony_ci */ 7248c2ecf20Sopenharmony_ci if (map->m_flags & EXT4_MAP_NEW && 7258c2ecf20Sopenharmony_ci !(map->m_flags & EXT4_MAP_UNWRITTEN) && 7268c2ecf20Sopenharmony_ci !(flags & EXT4_GET_BLOCKS_ZERO) && 7278c2ecf20Sopenharmony_ci !ext4_is_quota_file(inode) && 7288c2ecf20Sopenharmony_ci ext4_should_order_data(inode)) { 7298c2ecf20Sopenharmony_ci loff_t start_byte = 7308c2ecf20Sopenharmony_ci (loff_t)map->m_lblk << inode->i_blkbits; 7318c2ecf20Sopenharmony_ci loff_t length = (loff_t)map->m_len << inode->i_blkbits; 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) 7348c2ecf20Sopenharmony_ci ret = ext4_jbd2_inode_add_wait(handle, inode, 7358c2ecf20Sopenharmony_ci start_byte, length); 7368c2ecf20Sopenharmony_ci else 7378c2ecf20Sopenharmony_ci ret = ext4_jbd2_inode_add_write(handle, inode, 7388c2ecf20Sopenharmony_ci start_byte, length); 7398c2ecf20Sopenharmony_ci if (ret) 7408c2ecf20Sopenharmony_ci return ret; 7418c2ecf20Sopenharmony_ci } 7428c2ecf20Sopenharmony_ci } 7438c2ecf20Sopenharmony_ci if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN || 7448c2ecf20Sopenharmony_ci map->m_flags & EXT4_MAP_MAPPED)) 7458c2ecf20Sopenharmony_ci ext4_fc_track_range(handle, inode, map->m_lblk, 7468c2ecf20Sopenharmony_ci map->m_lblk + map->m_len - 1); 7478c2ecf20Sopenharmony_ci if (retval < 0) 7488c2ecf20Sopenharmony_ci ext_debug(inode, "failed with err %d\n", retval); 7498c2ecf20Sopenharmony_ci return retval; 7508c2ecf20Sopenharmony_ci} 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci/* 7538c2ecf20Sopenharmony_ci * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages 7548c2ecf20Sopenharmony_ci * we have to be careful as someone else may be manipulating b_state as well. 7558c2ecf20Sopenharmony_ci */ 7568c2ecf20Sopenharmony_cistatic void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) 7578c2ecf20Sopenharmony_ci{ 7588c2ecf20Sopenharmony_ci unsigned long old_state; 7598c2ecf20Sopenharmony_ci unsigned long new_state; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci flags &= EXT4_MAP_FLAGS; 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci /* Dummy buffer_head? Set non-atomically. */ 7648c2ecf20Sopenharmony_ci if (!bh->b_page) { 7658c2ecf20Sopenharmony_ci bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; 7668c2ecf20Sopenharmony_ci return; 7678c2ecf20Sopenharmony_ci } 7688c2ecf20Sopenharmony_ci /* 7698c2ecf20Sopenharmony_ci * Someone else may be modifying b_state. Be careful! This is ugly but 7708c2ecf20Sopenharmony_ci * once we get rid of using bh as a container for mapping information 7718c2ecf20Sopenharmony_ci * to pass to / from get_block functions, this can go away. 7728c2ecf20Sopenharmony_ci */ 7738c2ecf20Sopenharmony_ci do { 7748c2ecf20Sopenharmony_ci old_state = READ_ONCE(bh->b_state); 7758c2ecf20Sopenharmony_ci new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; 7768c2ecf20Sopenharmony_ci } while (unlikely( 7778c2ecf20Sopenharmony_ci cmpxchg(&bh->b_state, old_state, new_state) != old_state)); 7788c2ecf20Sopenharmony_ci} 7798c2ecf20Sopenharmony_ci 7808c2ecf20Sopenharmony_cistatic int _ext4_get_block(struct inode *inode, sector_t iblock, 7818c2ecf20Sopenharmony_ci struct buffer_head *bh, int flags) 7828c2ecf20Sopenharmony_ci{ 7838c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 7848c2ecf20Sopenharmony_ci int ret = 0; 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 7878c2ecf20Sopenharmony_ci return -ERANGE; 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci map.m_lblk = iblock; 7908c2ecf20Sopenharmony_ci map.m_len = bh->b_size >> inode->i_blkbits; 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map, 7938c2ecf20Sopenharmony_ci flags); 7948c2ecf20Sopenharmony_ci if (ret > 0) { 7958c2ecf20Sopenharmony_ci map_bh(bh, inode->i_sb, map.m_pblk); 7968c2ecf20Sopenharmony_ci ext4_update_bh_state(bh, map.m_flags); 7978c2ecf20Sopenharmony_ci bh->b_size = inode->i_sb->s_blocksize * map.m_len; 7988c2ecf20Sopenharmony_ci ret = 0; 7998c2ecf20Sopenharmony_ci } else if (ret == 0) { 8008c2ecf20Sopenharmony_ci /* hole case, need to fill in bh->b_size */ 8018c2ecf20Sopenharmony_ci bh->b_size = inode->i_sb->s_blocksize * map.m_len; 8028c2ecf20Sopenharmony_ci } 8038c2ecf20Sopenharmony_ci return ret; 8048c2ecf20Sopenharmony_ci} 8058c2ecf20Sopenharmony_ci 8068c2ecf20Sopenharmony_ciint ext4_get_block(struct inode *inode, sector_t iblock, 8078c2ecf20Sopenharmony_ci struct buffer_head *bh, int create) 8088c2ecf20Sopenharmony_ci{ 8098c2ecf20Sopenharmony_ci return _ext4_get_block(inode, iblock, bh, 8108c2ecf20Sopenharmony_ci create ? EXT4_GET_BLOCKS_CREATE : 0); 8118c2ecf20Sopenharmony_ci} 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci/* 8148c2ecf20Sopenharmony_ci * Get block function used when preparing for buffered write if we require 8158c2ecf20Sopenharmony_ci * creating an unwritten extent if blocks haven't been allocated. The extent 8168c2ecf20Sopenharmony_ci * will be converted to written after the IO is complete. 8178c2ecf20Sopenharmony_ci */ 8188c2ecf20Sopenharmony_ciint ext4_get_block_unwritten(struct inode *inode, sector_t iblock, 8198c2ecf20Sopenharmony_ci struct buffer_head *bh_result, int create) 8208c2ecf20Sopenharmony_ci{ 8218c2ecf20Sopenharmony_ci ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", 8228c2ecf20Sopenharmony_ci inode->i_ino, create); 8238c2ecf20Sopenharmony_ci return _ext4_get_block(inode, iblock, bh_result, 8248c2ecf20Sopenharmony_ci EXT4_GET_BLOCKS_IO_CREATE_EXT); 8258c2ecf20Sopenharmony_ci} 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci/* Maximum number of blocks we map for direct IO at once. */ 8288c2ecf20Sopenharmony_ci#define DIO_MAX_BLOCKS 4096 8298c2ecf20Sopenharmony_ci 8308c2ecf20Sopenharmony_ci/* 8318c2ecf20Sopenharmony_ci * `handle' can be NULL if create is zero 8328c2ecf20Sopenharmony_ci */ 8338c2ecf20Sopenharmony_cistruct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 8348c2ecf20Sopenharmony_ci ext4_lblk_t block, int map_flags) 8358c2ecf20Sopenharmony_ci{ 8368c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 8378c2ecf20Sopenharmony_ci struct buffer_head *bh; 8388c2ecf20Sopenharmony_ci int create = map_flags & EXT4_GET_BLOCKS_CREATE; 8398c2ecf20Sopenharmony_ci int err; 8408c2ecf20Sopenharmony_ci 8418c2ecf20Sopenharmony_ci J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 8428c2ecf20Sopenharmony_ci || handle != NULL || create == 0); 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci map.m_lblk = block; 8458c2ecf20Sopenharmony_ci map.m_len = 1; 8468c2ecf20Sopenharmony_ci err = ext4_map_blocks(handle, inode, &map, map_flags); 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci if (err == 0) 8498c2ecf20Sopenharmony_ci return create ? ERR_PTR(-ENOSPC) : NULL; 8508c2ecf20Sopenharmony_ci if (err < 0) 8518c2ecf20Sopenharmony_ci return ERR_PTR(err); 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci bh = sb_getblk(inode->i_sb, map.m_pblk); 8548c2ecf20Sopenharmony_ci if (unlikely(!bh)) 8558c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 8568c2ecf20Sopenharmony_ci if (map.m_flags & EXT4_MAP_NEW) { 8578c2ecf20Sopenharmony_ci J_ASSERT(create != 0); 8588c2ecf20Sopenharmony_ci J_ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 8598c2ecf20Sopenharmony_ci || (handle != NULL)); 8608c2ecf20Sopenharmony_ci 8618c2ecf20Sopenharmony_ci /* 8628c2ecf20Sopenharmony_ci * Now that we do not always journal data, we should 8638c2ecf20Sopenharmony_ci * keep in mind whether this should always journal the 8648c2ecf20Sopenharmony_ci * new buffer as metadata. For now, regular file 8658c2ecf20Sopenharmony_ci * writes use ext4_get_block instead, so it's not a 8668c2ecf20Sopenharmony_ci * problem. 8678c2ecf20Sopenharmony_ci */ 8688c2ecf20Sopenharmony_ci lock_buffer(bh); 8698c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "call get_create_access"); 8708c2ecf20Sopenharmony_ci err = ext4_journal_get_create_access(handle, bh); 8718c2ecf20Sopenharmony_ci if (unlikely(err)) { 8728c2ecf20Sopenharmony_ci unlock_buffer(bh); 8738c2ecf20Sopenharmony_ci goto errout; 8748c2ecf20Sopenharmony_ci } 8758c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 8768c2ecf20Sopenharmony_ci memset(bh->b_data, 0, inode->i_sb->s_blocksize); 8778c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 8788c2ecf20Sopenharmony_ci } 8798c2ecf20Sopenharmony_ci unlock_buffer(bh); 8808c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 8818c2ecf20Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, inode, bh); 8828c2ecf20Sopenharmony_ci if (unlikely(err)) 8838c2ecf20Sopenharmony_ci goto errout; 8848c2ecf20Sopenharmony_ci } else 8858c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "not a new buffer"); 8868c2ecf20Sopenharmony_ci return bh; 8878c2ecf20Sopenharmony_cierrout: 8888c2ecf20Sopenharmony_ci brelse(bh); 8898c2ecf20Sopenharmony_ci return ERR_PTR(err); 8908c2ecf20Sopenharmony_ci} 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_cistruct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 8938c2ecf20Sopenharmony_ci ext4_lblk_t block, int map_flags) 8948c2ecf20Sopenharmony_ci{ 8958c2ecf20Sopenharmony_ci struct buffer_head *bh; 8968c2ecf20Sopenharmony_ci int ret; 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci bh = ext4_getblk(handle, inode, block, map_flags); 8998c2ecf20Sopenharmony_ci if (IS_ERR(bh)) 9008c2ecf20Sopenharmony_ci return bh; 9018c2ecf20Sopenharmony_ci if (!bh || ext4_buffer_uptodate(bh)) 9028c2ecf20Sopenharmony_ci return bh; 9038c2ecf20Sopenharmony_ci 9048c2ecf20Sopenharmony_ci ret = ext4_read_bh_lock(bh, REQ_META | REQ_PRIO, true); 9058c2ecf20Sopenharmony_ci if (ret) { 9068c2ecf20Sopenharmony_ci put_bh(bh); 9078c2ecf20Sopenharmony_ci return ERR_PTR(ret); 9088c2ecf20Sopenharmony_ci } 9098c2ecf20Sopenharmony_ci return bh; 9108c2ecf20Sopenharmony_ci} 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci/* Read a contiguous batch of blocks. */ 9138c2ecf20Sopenharmony_ciint ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, 9148c2ecf20Sopenharmony_ci bool wait, struct buffer_head **bhs) 9158c2ecf20Sopenharmony_ci{ 9168c2ecf20Sopenharmony_ci int i, err; 9178c2ecf20Sopenharmony_ci 9188c2ecf20Sopenharmony_ci for (i = 0; i < bh_count; i++) { 9198c2ecf20Sopenharmony_ci bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */); 9208c2ecf20Sopenharmony_ci if (IS_ERR(bhs[i])) { 9218c2ecf20Sopenharmony_ci err = PTR_ERR(bhs[i]); 9228c2ecf20Sopenharmony_ci bh_count = i; 9238c2ecf20Sopenharmony_ci goto out_brelse; 9248c2ecf20Sopenharmony_ci } 9258c2ecf20Sopenharmony_ci } 9268c2ecf20Sopenharmony_ci 9278c2ecf20Sopenharmony_ci for (i = 0; i < bh_count; i++) 9288c2ecf20Sopenharmony_ci /* Note that NULL bhs[i] is valid because of holes. */ 9298c2ecf20Sopenharmony_ci if (bhs[i] && !ext4_buffer_uptodate(bhs[i])) 9308c2ecf20Sopenharmony_ci ext4_read_bh_lock(bhs[i], REQ_META | REQ_PRIO, false); 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci if (!wait) 9338c2ecf20Sopenharmony_ci return 0; 9348c2ecf20Sopenharmony_ci 9358c2ecf20Sopenharmony_ci for (i = 0; i < bh_count; i++) 9368c2ecf20Sopenharmony_ci if (bhs[i]) 9378c2ecf20Sopenharmony_ci wait_on_buffer(bhs[i]); 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci for (i = 0; i < bh_count; i++) { 9408c2ecf20Sopenharmony_ci if (bhs[i] && !buffer_uptodate(bhs[i])) { 9418c2ecf20Sopenharmony_ci err = -EIO; 9428c2ecf20Sopenharmony_ci goto out_brelse; 9438c2ecf20Sopenharmony_ci } 9448c2ecf20Sopenharmony_ci } 9458c2ecf20Sopenharmony_ci return 0; 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ciout_brelse: 9488c2ecf20Sopenharmony_ci for (i = 0; i < bh_count; i++) { 9498c2ecf20Sopenharmony_ci brelse(bhs[i]); 9508c2ecf20Sopenharmony_ci bhs[i] = NULL; 9518c2ecf20Sopenharmony_ci } 9528c2ecf20Sopenharmony_ci return err; 9538c2ecf20Sopenharmony_ci} 9548c2ecf20Sopenharmony_ci 9558c2ecf20Sopenharmony_ciint ext4_walk_page_buffers(handle_t *handle, 9568c2ecf20Sopenharmony_ci struct buffer_head *head, 9578c2ecf20Sopenharmony_ci unsigned from, 9588c2ecf20Sopenharmony_ci unsigned to, 9598c2ecf20Sopenharmony_ci int *partial, 9608c2ecf20Sopenharmony_ci int (*fn)(handle_t *handle, 9618c2ecf20Sopenharmony_ci struct buffer_head *bh)) 9628c2ecf20Sopenharmony_ci{ 9638c2ecf20Sopenharmony_ci struct buffer_head *bh; 9648c2ecf20Sopenharmony_ci unsigned block_start, block_end; 9658c2ecf20Sopenharmony_ci unsigned blocksize = head->b_size; 9668c2ecf20Sopenharmony_ci int err, ret = 0; 9678c2ecf20Sopenharmony_ci struct buffer_head *next; 9688c2ecf20Sopenharmony_ci 9698c2ecf20Sopenharmony_ci for (bh = head, block_start = 0; 9708c2ecf20Sopenharmony_ci ret == 0 && (bh != head || !block_start); 9718c2ecf20Sopenharmony_ci block_start = block_end, bh = next) { 9728c2ecf20Sopenharmony_ci next = bh->b_this_page; 9738c2ecf20Sopenharmony_ci block_end = block_start + blocksize; 9748c2ecf20Sopenharmony_ci if (block_end <= from || block_start >= to) { 9758c2ecf20Sopenharmony_ci if (partial && !buffer_uptodate(bh)) 9768c2ecf20Sopenharmony_ci *partial = 1; 9778c2ecf20Sopenharmony_ci continue; 9788c2ecf20Sopenharmony_ci } 9798c2ecf20Sopenharmony_ci err = (*fn)(handle, bh); 9808c2ecf20Sopenharmony_ci if (!ret) 9818c2ecf20Sopenharmony_ci ret = err; 9828c2ecf20Sopenharmony_ci } 9838c2ecf20Sopenharmony_ci return ret; 9848c2ecf20Sopenharmony_ci} 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci/* 9878c2ecf20Sopenharmony_ci * To preserve ordering, it is essential that the hole instantiation and 9888c2ecf20Sopenharmony_ci * the data write be encapsulated in a single transaction. We cannot 9898c2ecf20Sopenharmony_ci * close off a transaction and start a new one between the ext4_get_block() 9908c2ecf20Sopenharmony_ci * and the commit_write(). So doing the jbd2_journal_start at the start of 9918c2ecf20Sopenharmony_ci * prepare_write() is the right place. 9928c2ecf20Sopenharmony_ci * 9938c2ecf20Sopenharmony_ci * Also, this function can nest inside ext4_writepage(). In that case, we 9948c2ecf20Sopenharmony_ci * *know* that ext4_writepage() has generated enough buffer credits to do the 9958c2ecf20Sopenharmony_ci * whole page. So we won't block on the journal in that case, which is good, 9968c2ecf20Sopenharmony_ci * because the caller may be PF_MEMALLOC. 9978c2ecf20Sopenharmony_ci * 9988c2ecf20Sopenharmony_ci * By accident, ext4 can be reentered when a transaction is open via 9998c2ecf20Sopenharmony_ci * quota file writes. If we were to commit the transaction while thus 10008c2ecf20Sopenharmony_ci * reentered, there can be a deadlock - we would be holding a quota 10018c2ecf20Sopenharmony_ci * lock, and the commit would never complete if another thread had a 10028c2ecf20Sopenharmony_ci * transaction open and was blocking on the quota lock - a ranking 10038c2ecf20Sopenharmony_ci * violation. 10048c2ecf20Sopenharmony_ci * 10058c2ecf20Sopenharmony_ci * So what we do is to rely on the fact that jbd2_journal_stop/journal_start 10068c2ecf20Sopenharmony_ci * will _not_ run commit under these circumstances because handle->h_ref 10078c2ecf20Sopenharmony_ci * is elevated. We'll still have enough credits for the tiny quotafile 10088c2ecf20Sopenharmony_ci * write. 10098c2ecf20Sopenharmony_ci */ 10108c2ecf20Sopenharmony_ciint do_journal_get_write_access(handle_t *handle, 10118c2ecf20Sopenharmony_ci struct buffer_head *bh) 10128c2ecf20Sopenharmony_ci{ 10138c2ecf20Sopenharmony_ci int dirty = buffer_dirty(bh); 10148c2ecf20Sopenharmony_ci int ret; 10158c2ecf20Sopenharmony_ci 10168c2ecf20Sopenharmony_ci if (!buffer_mapped(bh) || buffer_freed(bh)) 10178c2ecf20Sopenharmony_ci return 0; 10188c2ecf20Sopenharmony_ci /* 10198c2ecf20Sopenharmony_ci * __block_write_begin() could have dirtied some buffers. Clean 10208c2ecf20Sopenharmony_ci * the dirty bit as jbd2_journal_get_write_access() could complain 10218c2ecf20Sopenharmony_ci * otherwise about fs integrity issues. Setting of the dirty bit 10228c2ecf20Sopenharmony_ci * by __block_write_begin() isn't a real problem here as we clear 10238c2ecf20Sopenharmony_ci * the bit before releasing a page lock and thus writeback cannot 10248c2ecf20Sopenharmony_ci * ever write the buffer. 10258c2ecf20Sopenharmony_ci */ 10268c2ecf20Sopenharmony_ci if (dirty) 10278c2ecf20Sopenharmony_ci clear_buffer_dirty(bh); 10288c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "get write access"); 10298c2ecf20Sopenharmony_ci ret = ext4_journal_get_write_access(handle, bh); 10308c2ecf20Sopenharmony_ci if (!ret && dirty) 10318c2ecf20Sopenharmony_ci ret = ext4_handle_dirty_metadata(handle, NULL, bh); 10328c2ecf20Sopenharmony_ci return ret; 10338c2ecf20Sopenharmony_ci} 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 10368c2ecf20Sopenharmony_cistatic int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, 10378c2ecf20Sopenharmony_ci get_block_t *get_block) 10388c2ecf20Sopenharmony_ci{ 10398c2ecf20Sopenharmony_ci unsigned from = pos & (PAGE_SIZE - 1); 10408c2ecf20Sopenharmony_ci unsigned to = from + len; 10418c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 10428c2ecf20Sopenharmony_ci unsigned block_start, block_end; 10438c2ecf20Sopenharmony_ci sector_t block; 10448c2ecf20Sopenharmony_ci int err = 0; 10458c2ecf20Sopenharmony_ci unsigned blocksize = inode->i_sb->s_blocksize; 10468c2ecf20Sopenharmony_ci unsigned bbits; 10478c2ecf20Sopenharmony_ci struct buffer_head *bh, *head, *wait[2]; 10488c2ecf20Sopenharmony_ci int nr_wait = 0; 10498c2ecf20Sopenharmony_ci int i; 10508c2ecf20Sopenharmony_ci 10518c2ecf20Sopenharmony_ci BUG_ON(!PageLocked(page)); 10528c2ecf20Sopenharmony_ci BUG_ON(from > PAGE_SIZE); 10538c2ecf20Sopenharmony_ci BUG_ON(to > PAGE_SIZE); 10548c2ecf20Sopenharmony_ci BUG_ON(from > to); 10558c2ecf20Sopenharmony_ci 10568c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 10578c2ecf20Sopenharmony_ci create_empty_buffers(page, blocksize, 0); 10588c2ecf20Sopenharmony_ci head = page_buffers(page); 10598c2ecf20Sopenharmony_ci bbits = ilog2(blocksize); 10608c2ecf20Sopenharmony_ci block = (sector_t)page->index << (PAGE_SHIFT - bbits); 10618c2ecf20Sopenharmony_ci 10628c2ecf20Sopenharmony_ci for (bh = head, block_start = 0; bh != head || !block_start; 10638c2ecf20Sopenharmony_ci block++, block_start = block_end, bh = bh->b_this_page) { 10648c2ecf20Sopenharmony_ci block_end = block_start + blocksize; 10658c2ecf20Sopenharmony_ci if (block_end <= from || block_start >= to) { 10668c2ecf20Sopenharmony_ci if (PageUptodate(page)) { 10678c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) 10688c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 10698c2ecf20Sopenharmony_ci } 10708c2ecf20Sopenharmony_ci continue; 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci if (buffer_new(bh)) 10738c2ecf20Sopenharmony_ci clear_buffer_new(bh); 10748c2ecf20Sopenharmony_ci if (!buffer_mapped(bh)) { 10758c2ecf20Sopenharmony_ci WARN_ON(bh->b_size != blocksize); 10768c2ecf20Sopenharmony_ci err = get_block(inode, block, bh, 1); 10778c2ecf20Sopenharmony_ci if (err) 10788c2ecf20Sopenharmony_ci break; 10798c2ecf20Sopenharmony_ci if (buffer_new(bh)) { 10808c2ecf20Sopenharmony_ci if (PageUptodate(page)) { 10818c2ecf20Sopenharmony_ci clear_buffer_new(bh); 10828c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 10838c2ecf20Sopenharmony_ci mark_buffer_dirty(bh); 10848c2ecf20Sopenharmony_ci continue; 10858c2ecf20Sopenharmony_ci } 10868c2ecf20Sopenharmony_ci if (block_end > to || block_start < from) 10878c2ecf20Sopenharmony_ci zero_user_segments(page, to, block_end, 10888c2ecf20Sopenharmony_ci block_start, from); 10898c2ecf20Sopenharmony_ci continue; 10908c2ecf20Sopenharmony_ci } 10918c2ecf20Sopenharmony_ci } 10928c2ecf20Sopenharmony_ci if (PageUptodate(page)) { 10938c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) 10948c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 10958c2ecf20Sopenharmony_ci continue; 10968c2ecf20Sopenharmony_ci } 10978c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh) && !buffer_delay(bh) && 10988c2ecf20Sopenharmony_ci !buffer_unwritten(bh) && 10998c2ecf20Sopenharmony_ci (block_start < from || block_end > to)) { 11008c2ecf20Sopenharmony_ci ext4_read_bh_lock(bh, 0, false); 11018c2ecf20Sopenharmony_ci wait[nr_wait++] = bh; 11028c2ecf20Sopenharmony_ci } 11038c2ecf20Sopenharmony_ci } 11048c2ecf20Sopenharmony_ci /* 11058c2ecf20Sopenharmony_ci * If we issued read requests, let them complete. 11068c2ecf20Sopenharmony_ci */ 11078c2ecf20Sopenharmony_ci for (i = 0; i < nr_wait; i++) { 11088c2ecf20Sopenharmony_ci wait_on_buffer(wait[i]); 11098c2ecf20Sopenharmony_ci if (!buffer_uptodate(wait[i])) 11108c2ecf20Sopenharmony_ci err = -EIO; 11118c2ecf20Sopenharmony_ci } 11128c2ecf20Sopenharmony_ci if (unlikely(err)) { 11138c2ecf20Sopenharmony_ci page_zero_new_buffers(page, from, to); 11148c2ecf20Sopenharmony_ci } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { 11158c2ecf20Sopenharmony_ci for (i = 0; i < nr_wait; i++) { 11168c2ecf20Sopenharmony_ci int err2; 11178c2ecf20Sopenharmony_ci 11188c2ecf20Sopenharmony_ci err2 = fscrypt_decrypt_pagecache_blocks(page, blocksize, 11198c2ecf20Sopenharmony_ci bh_offset(wait[i])); 11208c2ecf20Sopenharmony_ci if (err2) { 11218c2ecf20Sopenharmony_ci clear_buffer_uptodate(wait[i]); 11228c2ecf20Sopenharmony_ci err = err2; 11238c2ecf20Sopenharmony_ci } 11248c2ecf20Sopenharmony_ci } 11258c2ecf20Sopenharmony_ci } 11268c2ecf20Sopenharmony_ci 11278c2ecf20Sopenharmony_ci return err; 11288c2ecf20Sopenharmony_ci} 11298c2ecf20Sopenharmony_ci#endif 11308c2ecf20Sopenharmony_ci 11318c2ecf20Sopenharmony_cistatic int ext4_write_begin(struct file *file, struct address_space *mapping, 11328c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned flags, 11338c2ecf20Sopenharmony_ci struct page **pagep, void **fsdata) 11348c2ecf20Sopenharmony_ci{ 11358c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 11368c2ecf20Sopenharmony_ci int ret, needed_blocks; 11378c2ecf20Sopenharmony_ci handle_t *handle; 11388c2ecf20Sopenharmony_ci int retries = 0; 11398c2ecf20Sopenharmony_ci struct page *page; 11408c2ecf20Sopenharmony_ci pgoff_t index; 11418c2ecf20Sopenharmony_ci unsigned from, to; 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 11448c2ecf20Sopenharmony_ci return -EIO; 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci trace_ext4_write_begin(inode, pos, len, flags); 11478c2ecf20Sopenharmony_ci /* 11488c2ecf20Sopenharmony_ci * Reserve one block more for addition to orphan list in case 11498c2ecf20Sopenharmony_ci * we allocate blocks but write fails for some reason 11508c2ecf20Sopenharmony_ci */ 11518c2ecf20Sopenharmony_ci needed_blocks = ext4_writepage_trans_blocks(inode) + 1; 11528c2ecf20Sopenharmony_ci index = pos >> PAGE_SHIFT; 11538c2ecf20Sopenharmony_ci from = pos & (PAGE_SIZE - 1); 11548c2ecf20Sopenharmony_ci to = from + len; 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 11578c2ecf20Sopenharmony_ci ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 11588c2ecf20Sopenharmony_ci flags, pagep); 11598c2ecf20Sopenharmony_ci if (ret < 0) 11608c2ecf20Sopenharmony_ci return ret; 11618c2ecf20Sopenharmony_ci if (ret == 1) 11628c2ecf20Sopenharmony_ci return 0; 11638c2ecf20Sopenharmony_ci } 11648c2ecf20Sopenharmony_ci 11658c2ecf20Sopenharmony_ci /* 11668c2ecf20Sopenharmony_ci * grab_cache_page_write_begin() can take a long time if the 11678c2ecf20Sopenharmony_ci * system is thrashing due to memory pressure, or if the page 11688c2ecf20Sopenharmony_ci * is being written back. So grab it first before we start 11698c2ecf20Sopenharmony_ci * the transaction handle. This also allows us to allocate 11708c2ecf20Sopenharmony_ci * the page (if needed) without using GFP_NOFS. 11718c2ecf20Sopenharmony_ci */ 11728c2ecf20Sopenharmony_ciretry_grab: 11738c2ecf20Sopenharmony_ci page = grab_cache_page_write_begin(mapping, index, flags); 11748c2ecf20Sopenharmony_ci if (!page) 11758c2ecf20Sopenharmony_ci return -ENOMEM; 11768c2ecf20Sopenharmony_ci /* 11778c2ecf20Sopenharmony_ci * The same as page allocation, we prealloc buffer heads before 11788c2ecf20Sopenharmony_ci * starting the handle. 11798c2ecf20Sopenharmony_ci */ 11808c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 11818c2ecf20Sopenharmony_ci create_empty_buffers(page, inode->i_sb->s_blocksize, 0); 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci unlock_page(page); 11848c2ecf20Sopenharmony_ci 11858c2ecf20Sopenharmony_ciretry_journal: 11868c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 11878c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 11888c2ecf20Sopenharmony_ci put_page(page); 11898c2ecf20Sopenharmony_ci return PTR_ERR(handle); 11908c2ecf20Sopenharmony_ci } 11918c2ecf20Sopenharmony_ci 11928c2ecf20Sopenharmony_ci lock_page(page); 11938c2ecf20Sopenharmony_ci if (page->mapping != mapping) { 11948c2ecf20Sopenharmony_ci /* The page got truncated from under us */ 11958c2ecf20Sopenharmony_ci unlock_page(page); 11968c2ecf20Sopenharmony_ci put_page(page); 11978c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 11988c2ecf20Sopenharmony_ci goto retry_grab; 11998c2ecf20Sopenharmony_ci } 12008c2ecf20Sopenharmony_ci /* In case writeback began while the page was unlocked */ 12018c2ecf20Sopenharmony_ci wait_for_stable_page(page); 12028c2ecf20Sopenharmony_ci 12038c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 12048c2ecf20Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) 12058c2ecf20Sopenharmony_ci ret = ext4_block_write_begin(page, pos, len, 12068c2ecf20Sopenharmony_ci ext4_get_block_unwritten); 12078c2ecf20Sopenharmony_ci else 12088c2ecf20Sopenharmony_ci ret = ext4_block_write_begin(page, pos, len, 12098c2ecf20Sopenharmony_ci ext4_get_block); 12108c2ecf20Sopenharmony_ci#else 12118c2ecf20Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) 12128c2ecf20Sopenharmony_ci ret = __block_write_begin(page, pos, len, 12138c2ecf20Sopenharmony_ci ext4_get_block_unwritten); 12148c2ecf20Sopenharmony_ci else 12158c2ecf20Sopenharmony_ci ret = __block_write_begin(page, pos, len, ext4_get_block); 12168c2ecf20Sopenharmony_ci#endif 12178c2ecf20Sopenharmony_ci if (!ret && ext4_should_journal_data(inode)) { 12188c2ecf20Sopenharmony_ci ret = ext4_walk_page_buffers(handle, page_buffers(page), 12198c2ecf20Sopenharmony_ci from, to, NULL, 12208c2ecf20Sopenharmony_ci do_journal_get_write_access); 12218c2ecf20Sopenharmony_ci } 12228c2ecf20Sopenharmony_ci 12238c2ecf20Sopenharmony_ci if (ret) { 12248c2ecf20Sopenharmony_ci bool extended = (pos + len > inode->i_size) && 12258c2ecf20Sopenharmony_ci !ext4_verity_in_progress(inode); 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_ci unlock_page(page); 12288c2ecf20Sopenharmony_ci /* 12298c2ecf20Sopenharmony_ci * __block_write_begin may have instantiated a few blocks 12308c2ecf20Sopenharmony_ci * outside i_size. Trim these off again. Don't need 12318c2ecf20Sopenharmony_ci * i_size_read because we hold i_mutex. 12328c2ecf20Sopenharmony_ci * 12338c2ecf20Sopenharmony_ci * Add inode to orphan list in case we crash before 12348c2ecf20Sopenharmony_ci * truncate finishes 12358c2ecf20Sopenharmony_ci */ 12368c2ecf20Sopenharmony_ci if (extended && ext4_can_truncate(inode)) 12378c2ecf20Sopenharmony_ci ext4_orphan_add(handle, inode); 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 12408c2ecf20Sopenharmony_ci if (extended) { 12418c2ecf20Sopenharmony_ci ext4_truncate_failed_write(inode); 12428c2ecf20Sopenharmony_ci /* 12438c2ecf20Sopenharmony_ci * If truncate failed early the inode might 12448c2ecf20Sopenharmony_ci * still be on the orphan list; we need to 12458c2ecf20Sopenharmony_ci * make sure the inode is removed from the 12468c2ecf20Sopenharmony_ci * orphan list in that case. 12478c2ecf20Sopenharmony_ci */ 12488c2ecf20Sopenharmony_ci if (inode->i_nlink) 12498c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 12508c2ecf20Sopenharmony_ci } 12518c2ecf20Sopenharmony_ci 12528c2ecf20Sopenharmony_ci if (ret == -ENOSPC && 12538c2ecf20Sopenharmony_ci ext4_should_retry_alloc(inode->i_sb, &retries)) 12548c2ecf20Sopenharmony_ci goto retry_journal; 12558c2ecf20Sopenharmony_ci put_page(page); 12568c2ecf20Sopenharmony_ci return ret; 12578c2ecf20Sopenharmony_ci } 12588c2ecf20Sopenharmony_ci *pagep = page; 12598c2ecf20Sopenharmony_ci return ret; 12608c2ecf20Sopenharmony_ci} 12618c2ecf20Sopenharmony_ci 12628c2ecf20Sopenharmony_ci/* For write_end() in data=journal mode */ 12638c2ecf20Sopenharmony_cistatic int write_end_fn(handle_t *handle, struct buffer_head *bh) 12648c2ecf20Sopenharmony_ci{ 12658c2ecf20Sopenharmony_ci int ret; 12668c2ecf20Sopenharmony_ci if (!buffer_mapped(bh) || buffer_freed(bh)) 12678c2ecf20Sopenharmony_ci return 0; 12688c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 12698c2ecf20Sopenharmony_ci ret = ext4_handle_dirty_metadata(handle, NULL, bh); 12708c2ecf20Sopenharmony_ci clear_buffer_meta(bh); 12718c2ecf20Sopenharmony_ci clear_buffer_prio(bh); 12728c2ecf20Sopenharmony_ci return ret; 12738c2ecf20Sopenharmony_ci} 12748c2ecf20Sopenharmony_ci 12758c2ecf20Sopenharmony_ci/* 12768c2ecf20Sopenharmony_ci * We need to pick up the new inode size which generic_commit_write gave us 12778c2ecf20Sopenharmony_ci * `file' can be NULL - eg, when called from page_symlink(). 12788c2ecf20Sopenharmony_ci * 12798c2ecf20Sopenharmony_ci * ext4 never places buffers on inode->i_mapping->private_list. metadata 12808c2ecf20Sopenharmony_ci * buffers are managed internally. 12818c2ecf20Sopenharmony_ci */ 12828c2ecf20Sopenharmony_cistatic int ext4_write_end(struct file *file, 12838c2ecf20Sopenharmony_ci struct address_space *mapping, 12848c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 12858c2ecf20Sopenharmony_ci struct page *page, void *fsdata) 12868c2ecf20Sopenharmony_ci{ 12878c2ecf20Sopenharmony_ci handle_t *handle = ext4_journal_current_handle(); 12888c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 12898c2ecf20Sopenharmony_ci loff_t old_size = inode->i_size; 12908c2ecf20Sopenharmony_ci int ret = 0, ret2; 12918c2ecf20Sopenharmony_ci int i_size_changed = 0; 12928c2ecf20Sopenharmony_ci bool verity = ext4_verity_in_progress(inode); 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci trace_ext4_write_end(inode, pos, len, copied); 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode) && 12978c2ecf20Sopenharmony_ci ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 12988c2ecf20Sopenharmony_ci return ext4_write_inline_data_end(inode, pos, len, copied, page); 12998c2ecf20Sopenharmony_ci 13008c2ecf20Sopenharmony_ci copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 13018c2ecf20Sopenharmony_ci /* 13028c2ecf20Sopenharmony_ci * it's important to update i_size while still holding page lock: 13038c2ecf20Sopenharmony_ci * page writeout could otherwise come in and zero beyond i_size. 13048c2ecf20Sopenharmony_ci * 13058c2ecf20Sopenharmony_ci * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree 13068c2ecf20Sopenharmony_ci * blocks are being written past EOF, so skip the i_size update. 13078c2ecf20Sopenharmony_ci */ 13088c2ecf20Sopenharmony_ci if (!verity) 13098c2ecf20Sopenharmony_ci i_size_changed = ext4_update_inode_size(inode, pos + copied); 13108c2ecf20Sopenharmony_ci unlock_page(page); 13118c2ecf20Sopenharmony_ci put_page(page); 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_ci if (old_size < pos && !verity) 13148c2ecf20Sopenharmony_ci pagecache_isize_extended(inode, old_size, pos); 13158c2ecf20Sopenharmony_ci /* 13168c2ecf20Sopenharmony_ci * Don't mark the inode dirty under page lock. First, it unnecessarily 13178c2ecf20Sopenharmony_ci * makes the holding time of page lock longer. Second, it forces lock 13188c2ecf20Sopenharmony_ci * ordering of page lock and transaction start for journaling 13198c2ecf20Sopenharmony_ci * filesystems. 13208c2ecf20Sopenharmony_ci */ 13218c2ecf20Sopenharmony_ci if (i_size_changed) 13228c2ecf20Sopenharmony_ci ret = ext4_mark_inode_dirty(handle, inode); 13238c2ecf20Sopenharmony_ci 13248c2ecf20Sopenharmony_ci if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) 13258c2ecf20Sopenharmony_ci /* if we have allocated more blocks and copied 13268c2ecf20Sopenharmony_ci * less. We will have blocks allocated outside 13278c2ecf20Sopenharmony_ci * inode->i_size. So truncate them 13288c2ecf20Sopenharmony_ci */ 13298c2ecf20Sopenharmony_ci ext4_orphan_add(handle, inode); 13308c2ecf20Sopenharmony_ci 13318c2ecf20Sopenharmony_ci ret2 = ext4_journal_stop(handle); 13328c2ecf20Sopenharmony_ci if (!ret) 13338c2ecf20Sopenharmony_ci ret = ret2; 13348c2ecf20Sopenharmony_ci 13358c2ecf20Sopenharmony_ci if (pos + len > inode->i_size && !verity) { 13368c2ecf20Sopenharmony_ci ext4_truncate_failed_write(inode); 13378c2ecf20Sopenharmony_ci /* 13388c2ecf20Sopenharmony_ci * If truncate failed early the inode might still be 13398c2ecf20Sopenharmony_ci * on the orphan list; we need to make sure the inode 13408c2ecf20Sopenharmony_ci * is removed from the orphan list in that case. 13418c2ecf20Sopenharmony_ci */ 13428c2ecf20Sopenharmony_ci if (inode->i_nlink) 13438c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 13448c2ecf20Sopenharmony_ci } 13458c2ecf20Sopenharmony_ci 13468c2ecf20Sopenharmony_ci return ret ? ret : copied; 13478c2ecf20Sopenharmony_ci} 13488c2ecf20Sopenharmony_ci 13498c2ecf20Sopenharmony_ci/* 13508c2ecf20Sopenharmony_ci * This is a private version of page_zero_new_buffers() which doesn't 13518c2ecf20Sopenharmony_ci * set the buffer to be dirty, since in data=journalled mode we need 13528c2ecf20Sopenharmony_ci * to call ext4_handle_dirty_metadata() instead. 13538c2ecf20Sopenharmony_ci */ 13548c2ecf20Sopenharmony_cistatic void ext4_journalled_zero_new_buffers(handle_t *handle, 13558c2ecf20Sopenharmony_ci struct page *page, 13568c2ecf20Sopenharmony_ci unsigned from, unsigned to) 13578c2ecf20Sopenharmony_ci{ 13588c2ecf20Sopenharmony_ci unsigned int block_start = 0, block_end; 13598c2ecf20Sopenharmony_ci struct buffer_head *head, *bh; 13608c2ecf20Sopenharmony_ci 13618c2ecf20Sopenharmony_ci bh = head = page_buffers(page); 13628c2ecf20Sopenharmony_ci do { 13638c2ecf20Sopenharmony_ci block_end = block_start + bh->b_size; 13648c2ecf20Sopenharmony_ci if (buffer_new(bh)) { 13658c2ecf20Sopenharmony_ci if (block_end > from && block_start < to) { 13668c2ecf20Sopenharmony_ci if (!PageUptodate(page)) { 13678c2ecf20Sopenharmony_ci unsigned start, size; 13688c2ecf20Sopenharmony_ci 13698c2ecf20Sopenharmony_ci start = max(from, block_start); 13708c2ecf20Sopenharmony_ci size = min(to, block_end) - start; 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci zero_user(page, start, size); 13738c2ecf20Sopenharmony_ci write_end_fn(handle, bh); 13748c2ecf20Sopenharmony_ci } 13758c2ecf20Sopenharmony_ci clear_buffer_new(bh); 13768c2ecf20Sopenharmony_ci } 13778c2ecf20Sopenharmony_ci } 13788c2ecf20Sopenharmony_ci block_start = block_end; 13798c2ecf20Sopenharmony_ci bh = bh->b_this_page; 13808c2ecf20Sopenharmony_ci } while (bh != head); 13818c2ecf20Sopenharmony_ci} 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_cistatic int ext4_journalled_write_end(struct file *file, 13848c2ecf20Sopenharmony_ci struct address_space *mapping, 13858c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 13868c2ecf20Sopenharmony_ci struct page *page, void *fsdata) 13878c2ecf20Sopenharmony_ci{ 13888c2ecf20Sopenharmony_ci handle_t *handle = ext4_journal_current_handle(); 13898c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 13908c2ecf20Sopenharmony_ci loff_t old_size = inode->i_size; 13918c2ecf20Sopenharmony_ci int ret = 0, ret2; 13928c2ecf20Sopenharmony_ci int partial = 0; 13938c2ecf20Sopenharmony_ci unsigned from, to; 13948c2ecf20Sopenharmony_ci int size_changed = 0; 13958c2ecf20Sopenharmony_ci bool verity = ext4_verity_in_progress(inode); 13968c2ecf20Sopenharmony_ci 13978c2ecf20Sopenharmony_ci trace_ext4_journalled_write_end(inode, pos, len, copied); 13988c2ecf20Sopenharmony_ci from = pos & (PAGE_SIZE - 1); 13998c2ecf20Sopenharmony_ci to = from + len; 14008c2ecf20Sopenharmony_ci 14018c2ecf20Sopenharmony_ci BUG_ON(!ext4_handle_valid(handle)); 14028c2ecf20Sopenharmony_ci 14038c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 14048c2ecf20Sopenharmony_ci return ext4_write_inline_data_end(inode, pos, len, copied, page); 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci if (unlikely(copied < len) && !PageUptodate(page)) { 14078c2ecf20Sopenharmony_ci copied = 0; 14088c2ecf20Sopenharmony_ci ext4_journalled_zero_new_buffers(handle, page, from, to); 14098c2ecf20Sopenharmony_ci } else { 14108c2ecf20Sopenharmony_ci if (unlikely(copied < len)) 14118c2ecf20Sopenharmony_ci ext4_journalled_zero_new_buffers(handle, page, 14128c2ecf20Sopenharmony_ci from + copied, to); 14138c2ecf20Sopenharmony_ci ret = ext4_walk_page_buffers(handle, page_buffers(page), from, 14148c2ecf20Sopenharmony_ci from + copied, &partial, 14158c2ecf20Sopenharmony_ci write_end_fn); 14168c2ecf20Sopenharmony_ci if (!partial) 14178c2ecf20Sopenharmony_ci SetPageUptodate(page); 14188c2ecf20Sopenharmony_ci } 14198c2ecf20Sopenharmony_ci if (!verity) 14208c2ecf20Sopenharmony_ci size_changed = ext4_update_inode_size(inode, pos + copied); 14218c2ecf20Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_JDATA); 14228c2ecf20Sopenharmony_ci EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 14238c2ecf20Sopenharmony_ci unlock_page(page); 14248c2ecf20Sopenharmony_ci put_page(page); 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci if (old_size < pos && !verity) 14278c2ecf20Sopenharmony_ci pagecache_isize_extended(inode, old_size, pos); 14288c2ecf20Sopenharmony_ci 14298c2ecf20Sopenharmony_ci if (size_changed) { 14308c2ecf20Sopenharmony_ci ret2 = ext4_mark_inode_dirty(handle, inode); 14318c2ecf20Sopenharmony_ci if (!ret) 14328c2ecf20Sopenharmony_ci ret = ret2; 14338c2ecf20Sopenharmony_ci } 14348c2ecf20Sopenharmony_ci 14358c2ecf20Sopenharmony_ci if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) 14368c2ecf20Sopenharmony_ci /* if we have allocated more blocks and copied 14378c2ecf20Sopenharmony_ci * less. We will have blocks allocated outside 14388c2ecf20Sopenharmony_ci * inode->i_size. So truncate them 14398c2ecf20Sopenharmony_ci */ 14408c2ecf20Sopenharmony_ci ext4_orphan_add(handle, inode); 14418c2ecf20Sopenharmony_ci 14428c2ecf20Sopenharmony_ci ret2 = ext4_journal_stop(handle); 14438c2ecf20Sopenharmony_ci if (!ret) 14448c2ecf20Sopenharmony_ci ret = ret2; 14458c2ecf20Sopenharmony_ci if (pos + len > inode->i_size && !verity) { 14468c2ecf20Sopenharmony_ci ext4_truncate_failed_write(inode); 14478c2ecf20Sopenharmony_ci /* 14488c2ecf20Sopenharmony_ci * If truncate failed early the inode might still be 14498c2ecf20Sopenharmony_ci * on the orphan list; we need to make sure the inode 14508c2ecf20Sopenharmony_ci * is removed from the orphan list in that case. 14518c2ecf20Sopenharmony_ci */ 14528c2ecf20Sopenharmony_ci if (inode->i_nlink) 14538c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 14548c2ecf20Sopenharmony_ci } 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_ci return ret ? ret : copied; 14578c2ecf20Sopenharmony_ci} 14588c2ecf20Sopenharmony_ci 14598c2ecf20Sopenharmony_ci/* 14608c2ecf20Sopenharmony_ci * Reserve space for a single cluster 14618c2ecf20Sopenharmony_ci */ 14628c2ecf20Sopenharmony_cistatic int ext4_da_reserve_space(struct inode *inode) 14638c2ecf20Sopenharmony_ci{ 14648c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 14658c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 14668c2ecf20Sopenharmony_ci int ret; 14678c2ecf20Sopenharmony_ci 14688c2ecf20Sopenharmony_ci /* 14698c2ecf20Sopenharmony_ci * We will charge metadata quota at writeout time; this saves 14708c2ecf20Sopenharmony_ci * us from metadata over-estimation, though we may go over by 14718c2ecf20Sopenharmony_ci * a small amount in the end. Here we just reserve for data. 14728c2ecf20Sopenharmony_ci */ 14738c2ecf20Sopenharmony_ci ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); 14748c2ecf20Sopenharmony_ci if (ret) 14758c2ecf20Sopenharmony_ci return ret; 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_ci spin_lock(&ei->i_block_reservation_lock); 14788c2ecf20Sopenharmony_ci if (ext4_claim_free_clusters(sbi, 1, 0)) { 14798c2ecf20Sopenharmony_ci spin_unlock(&ei->i_block_reservation_lock); 14808c2ecf20Sopenharmony_ci dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 14818c2ecf20Sopenharmony_ci return -ENOSPC; 14828c2ecf20Sopenharmony_ci } 14838c2ecf20Sopenharmony_ci ei->i_reserved_data_blocks++; 14848c2ecf20Sopenharmony_ci trace_ext4_da_reserve_space(inode); 14858c2ecf20Sopenharmony_ci spin_unlock(&ei->i_block_reservation_lock); 14868c2ecf20Sopenharmony_ci 14878c2ecf20Sopenharmony_ci return 0; /* success */ 14888c2ecf20Sopenharmony_ci} 14898c2ecf20Sopenharmony_ci 14908c2ecf20Sopenharmony_civoid ext4_da_release_space(struct inode *inode, int to_free) 14918c2ecf20Sopenharmony_ci{ 14928c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 14938c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 14948c2ecf20Sopenharmony_ci 14958c2ecf20Sopenharmony_ci if (!to_free) 14968c2ecf20Sopenharmony_ci return; /* Nothing to release, exit */ 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 14998c2ecf20Sopenharmony_ci 15008c2ecf20Sopenharmony_ci trace_ext4_da_release_space(inode, to_free); 15018c2ecf20Sopenharmony_ci if (unlikely(to_free > ei->i_reserved_data_blocks)) { 15028c2ecf20Sopenharmony_ci /* 15038c2ecf20Sopenharmony_ci * if there aren't enough reserved blocks, then the 15048c2ecf20Sopenharmony_ci * counter is messed up somewhere. Since this 15058c2ecf20Sopenharmony_ci * function is called from invalidate page, it's 15068c2ecf20Sopenharmony_ci * harmless to return without any action. 15078c2ecf20Sopenharmony_ci */ 15088c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, "ext4_da_release_space: " 15098c2ecf20Sopenharmony_ci "ino %lu, to_free %d with only %d reserved " 15108c2ecf20Sopenharmony_ci "data blocks", inode->i_ino, to_free, 15118c2ecf20Sopenharmony_ci ei->i_reserved_data_blocks); 15128c2ecf20Sopenharmony_ci WARN_ON(1); 15138c2ecf20Sopenharmony_ci to_free = ei->i_reserved_data_blocks; 15148c2ecf20Sopenharmony_ci } 15158c2ecf20Sopenharmony_ci ei->i_reserved_data_blocks -= to_free; 15168c2ecf20Sopenharmony_ci 15178c2ecf20Sopenharmony_ci /* update fs dirty data blocks counter */ 15188c2ecf20Sopenharmony_ci percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); 15198c2ecf20Sopenharmony_ci 15208c2ecf20Sopenharmony_ci spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 15218c2ecf20Sopenharmony_ci 15228c2ecf20Sopenharmony_ci dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); 15238c2ecf20Sopenharmony_ci} 15248c2ecf20Sopenharmony_ci 15258c2ecf20Sopenharmony_ci/* 15268c2ecf20Sopenharmony_ci * Delayed allocation stuff 15278c2ecf20Sopenharmony_ci */ 15288c2ecf20Sopenharmony_ci 15298c2ecf20Sopenharmony_cistruct mpage_da_data { 15308c2ecf20Sopenharmony_ci struct inode *inode; 15318c2ecf20Sopenharmony_ci struct writeback_control *wbc; 15328c2ecf20Sopenharmony_ci 15338c2ecf20Sopenharmony_ci pgoff_t first_page; /* The first page to write */ 15348c2ecf20Sopenharmony_ci pgoff_t next_page; /* Current page to examine */ 15358c2ecf20Sopenharmony_ci pgoff_t last_page; /* Last page to examine */ 15368c2ecf20Sopenharmony_ci /* 15378c2ecf20Sopenharmony_ci * Extent to map - this can be after first_page because that can be 15388c2ecf20Sopenharmony_ci * fully mapped. We somewhat abuse m_flags to store whether the extent 15398c2ecf20Sopenharmony_ci * is delalloc or unwritten. 15408c2ecf20Sopenharmony_ci */ 15418c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 15428c2ecf20Sopenharmony_ci struct ext4_io_submit io_submit; /* IO submission data */ 15438c2ecf20Sopenharmony_ci unsigned int do_map:1; 15448c2ecf20Sopenharmony_ci unsigned int scanned_until_end:1; 15458c2ecf20Sopenharmony_ci}; 15468c2ecf20Sopenharmony_ci 15478c2ecf20Sopenharmony_cistatic void mpage_release_unused_pages(struct mpage_da_data *mpd, 15488c2ecf20Sopenharmony_ci bool invalidate) 15498c2ecf20Sopenharmony_ci{ 15508c2ecf20Sopenharmony_ci int nr_pages, i; 15518c2ecf20Sopenharmony_ci pgoff_t index, end; 15528c2ecf20Sopenharmony_ci struct pagevec pvec; 15538c2ecf20Sopenharmony_ci struct inode *inode = mpd->inode; 15548c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 15558c2ecf20Sopenharmony_ci 15568c2ecf20Sopenharmony_ci /* This is necessary when next_page == 0. */ 15578c2ecf20Sopenharmony_ci if (mpd->first_page >= mpd->next_page) 15588c2ecf20Sopenharmony_ci return; 15598c2ecf20Sopenharmony_ci 15608c2ecf20Sopenharmony_ci mpd->scanned_until_end = 0; 15618c2ecf20Sopenharmony_ci index = mpd->first_page; 15628c2ecf20Sopenharmony_ci end = mpd->next_page - 1; 15638c2ecf20Sopenharmony_ci if (invalidate) { 15648c2ecf20Sopenharmony_ci ext4_lblk_t start, last; 15658c2ecf20Sopenharmony_ci start = index << (PAGE_SHIFT - inode->i_blkbits); 15668c2ecf20Sopenharmony_ci last = end << (PAGE_SHIFT - inode->i_blkbits); 15678c2ecf20Sopenharmony_ci 15688c2ecf20Sopenharmony_ci /* 15698c2ecf20Sopenharmony_ci * avoid racing with extent status tree scans made by 15708c2ecf20Sopenharmony_ci * ext4_insert_delayed_block() 15718c2ecf20Sopenharmony_ci */ 15728c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 15738c2ecf20Sopenharmony_ci ext4_es_remove_extent(inode, start, last - start + 1); 15748c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 15758c2ecf20Sopenharmony_ci } 15768c2ecf20Sopenharmony_ci 15778c2ecf20Sopenharmony_ci pagevec_init(&pvec); 15788c2ecf20Sopenharmony_ci while (index <= end) { 15798c2ecf20Sopenharmony_ci nr_pages = pagevec_lookup_range(&pvec, mapping, &index, end); 15808c2ecf20Sopenharmony_ci if (nr_pages == 0) 15818c2ecf20Sopenharmony_ci break; 15828c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 15838c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 15848c2ecf20Sopenharmony_ci 15858c2ecf20Sopenharmony_ci BUG_ON(!PageLocked(page)); 15868c2ecf20Sopenharmony_ci BUG_ON(PageWriteback(page)); 15878c2ecf20Sopenharmony_ci if (invalidate) { 15888c2ecf20Sopenharmony_ci if (page_mapped(page)) 15898c2ecf20Sopenharmony_ci clear_page_dirty_for_io(page); 15908c2ecf20Sopenharmony_ci block_invalidatepage(page, 0, PAGE_SIZE); 15918c2ecf20Sopenharmony_ci ClearPageUptodate(page); 15928c2ecf20Sopenharmony_ci } 15938c2ecf20Sopenharmony_ci unlock_page(page); 15948c2ecf20Sopenharmony_ci } 15958c2ecf20Sopenharmony_ci pagevec_release(&pvec); 15968c2ecf20Sopenharmony_ci } 15978c2ecf20Sopenharmony_ci} 15988c2ecf20Sopenharmony_ci 15998c2ecf20Sopenharmony_cistatic void ext4_print_free_blocks(struct inode *inode) 16008c2ecf20Sopenharmony_ci{ 16018c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 16028c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 16038c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", 16068c2ecf20Sopenharmony_ci EXT4_C2B(EXT4_SB(inode->i_sb), 16078c2ecf20Sopenharmony_ci ext4_count_free_clusters(sb))); 16088c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); 16098c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", 16108c2ecf20Sopenharmony_ci (long long) EXT4_C2B(EXT4_SB(sb), 16118c2ecf20Sopenharmony_ci percpu_counter_sum(&sbi->s_freeclusters_counter))); 16128c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", 16138c2ecf20Sopenharmony_ci (long long) EXT4_C2B(EXT4_SB(sb), 16148c2ecf20Sopenharmony_ci percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 16158c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "Block reservation details"); 16168c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 16178c2ecf20Sopenharmony_ci ei->i_reserved_data_blocks); 16188c2ecf20Sopenharmony_ci return; 16198c2ecf20Sopenharmony_ci} 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_cistatic int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) 16228c2ecf20Sopenharmony_ci{ 16238c2ecf20Sopenharmony_ci return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); 16248c2ecf20Sopenharmony_ci} 16258c2ecf20Sopenharmony_ci 16268c2ecf20Sopenharmony_ci/* 16278c2ecf20Sopenharmony_ci * ext4_insert_delayed_block - adds a delayed block to the extents status 16288c2ecf20Sopenharmony_ci * tree, incrementing the reserved cluster/block 16298c2ecf20Sopenharmony_ci * count or making a pending reservation 16308c2ecf20Sopenharmony_ci * where needed 16318c2ecf20Sopenharmony_ci * 16328c2ecf20Sopenharmony_ci * @inode - file containing the newly added block 16338c2ecf20Sopenharmony_ci * @lblk - logical block to be added 16348c2ecf20Sopenharmony_ci * 16358c2ecf20Sopenharmony_ci * Returns 0 on success, negative error code on failure. 16368c2ecf20Sopenharmony_ci */ 16378c2ecf20Sopenharmony_cistatic int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) 16388c2ecf20Sopenharmony_ci{ 16398c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 16408c2ecf20Sopenharmony_ci int ret; 16418c2ecf20Sopenharmony_ci bool allocated = false; 16428c2ecf20Sopenharmony_ci bool reserved = false; 16438c2ecf20Sopenharmony_ci 16448c2ecf20Sopenharmony_ci /* 16458c2ecf20Sopenharmony_ci * If the cluster containing lblk is shared with a delayed, 16468c2ecf20Sopenharmony_ci * written, or unwritten extent in a bigalloc file system, it's 16478c2ecf20Sopenharmony_ci * already been accounted for and does not need to be reserved. 16488c2ecf20Sopenharmony_ci * A pending reservation must be made for the cluster if it's 16498c2ecf20Sopenharmony_ci * shared with a written or unwritten extent and doesn't already 16508c2ecf20Sopenharmony_ci * have one. Written and unwritten extents can be purged from the 16518c2ecf20Sopenharmony_ci * extents status tree if the system is under memory pressure, so 16528c2ecf20Sopenharmony_ci * it's necessary to examine the extent tree if a search of the 16538c2ecf20Sopenharmony_ci * extents status tree doesn't get a match. 16548c2ecf20Sopenharmony_ci */ 16558c2ecf20Sopenharmony_ci if (sbi->s_cluster_ratio == 1) { 16568c2ecf20Sopenharmony_ci ret = ext4_da_reserve_space(inode); 16578c2ecf20Sopenharmony_ci if (ret != 0) /* ENOSPC */ 16588c2ecf20Sopenharmony_ci goto errout; 16598c2ecf20Sopenharmony_ci reserved = true; 16608c2ecf20Sopenharmony_ci } else { /* bigalloc */ 16618c2ecf20Sopenharmony_ci if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) { 16628c2ecf20Sopenharmony_ci if (!ext4_es_scan_clu(inode, 16638c2ecf20Sopenharmony_ci &ext4_es_is_mapped, lblk)) { 16648c2ecf20Sopenharmony_ci ret = ext4_clu_mapped(inode, 16658c2ecf20Sopenharmony_ci EXT4_B2C(sbi, lblk)); 16668c2ecf20Sopenharmony_ci if (ret < 0) 16678c2ecf20Sopenharmony_ci goto errout; 16688c2ecf20Sopenharmony_ci if (ret == 0) { 16698c2ecf20Sopenharmony_ci ret = ext4_da_reserve_space(inode); 16708c2ecf20Sopenharmony_ci if (ret != 0) /* ENOSPC */ 16718c2ecf20Sopenharmony_ci goto errout; 16728c2ecf20Sopenharmony_ci reserved = true; 16738c2ecf20Sopenharmony_ci } else { 16748c2ecf20Sopenharmony_ci allocated = true; 16758c2ecf20Sopenharmony_ci } 16768c2ecf20Sopenharmony_ci } else { 16778c2ecf20Sopenharmony_ci allocated = true; 16788c2ecf20Sopenharmony_ci } 16798c2ecf20Sopenharmony_ci } 16808c2ecf20Sopenharmony_ci } 16818c2ecf20Sopenharmony_ci 16828c2ecf20Sopenharmony_ci ret = ext4_es_insert_delayed_block(inode, lblk, allocated); 16838c2ecf20Sopenharmony_ci if (ret && reserved) 16848c2ecf20Sopenharmony_ci ext4_da_release_space(inode, 1); 16858c2ecf20Sopenharmony_ci 16868c2ecf20Sopenharmony_cierrout: 16878c2ecf20Sopenharmony_ci return ret; 16888c2ecf20Sopenharmony_ci} 16898c2ecf20Sopenharmony_ci 16908c2ecf20Sopenharmony_ci/* 16918c2ecf20Sopenharmony_ci * This function is grabs code from the very beginning of 16928c2ecf20Sopenharmony_ci * ext4_map_blocks, but assumes that the caller is from delayed write 16938c2ecf20Sopenharmony_ci * time. This function looks up the requested blocks and sets the 16948c2ecf20Sopenharmony_ci * buffer delay bit under the protection of i_data_sem. 16958c2ecf20Sopenharmony_ci */ 16968c2ecf20Sopenharmony_cistatic int ext4_da_map_blocks(struct inode *inode, sector_t iblock, 16978c2ecf20Sopenharmony_ci struct ext4_map_blocks *map, 16988c2ecf20Sopenharmony_ci struct buffer_head *bh) 16998c2ecf20Sopenharmony_ci{ 17008c2ecf20Sopenharmony_ci struct extent_status es; 17018c2ecf20Sopenharmony_ci int retval; 17028c2ecf20Sopenharmony_ci sector_t invalid_block = ~((sector_t) 0xffff); 17038c2ecf20Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 17048c2ecf20Sopenharmony_ci struct ext4_map_blocks orig_map; 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_ci memcpy(&orig_map, map, sizeof(*map)); 17078c2ecf20Sopenharmony_ci#endif 17088c2ecf20Sopenharmony_ci 17098c2ecf20Sopenharmony_ci if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) 17108c2ecf20Sopenharmony_ci invalid_block = ~0; 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci map->m_flags = 0; 17138c2ecf20Sopenharmony_ci ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len, 17148c2ecf20Sopenharmony_ci (unsigned long) map->m_lblk); 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci /* Lookup extent status tree firstly */ 17178c2ecf20Sopenharmony_ci if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { 17188c2ecf20Sopenharmony_ci if (ext4_es_is_hole(&es)) { 17198c2ecf20Sopenharmony_ci retval = 0; 17208c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 17218c2ecf20Sopenharmony_ci goto add_delayed; 17228c2ecf20Sopenharmony_ci } 17238c2ecf20Sopenharmony_ci 17248c2ecf20Sopenharmony_ci /* 17258c2ecf20Sopenharmony_ci * Delayed extent could be allocated by fallocate. 17268c2ecf20Sopenharmony_ci * So we need to check it. 17278c2ecf20Sopenharmony_ci */ 17288c2ecf20Sopenharmony_ci if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { 17298c2ecf20Sopenharmony_ci map_bh(bh, inode->i_sb, invalid_block); 17308c2ecf20Sopenharmony_ci set_buffer_new(bh); 17318c2ecf20Sopenharmony_ci set_buffer_delay(bh); 17328c2ecf20Sopenharmony_ci return 0; 17338c2ecf20Sopenharmony_ci } 17348c2ecf20Sopenharmony_ci 17358c2ecf20Sopenharmony_ci map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; 17368c2ecf20Sopenharmony_ci retval = es.es_len - (iblock - es.es_lblk); 17378c2ecf20Sopenharmony_ci if (retval > map->m_len) 17388c2ecf20Sopenharmony_ci retval = map->m_len; 17398c2ecf20Sopenharmony_ci map->m_len = retval; 17408c2ecf20Sopenharmony_ci if (ext4_es_is_written(&es)) 17418c2ecf20Sopenharmony_ci map->m_flags |= EXT4_MAP_MAPPED; 17428c2ecf20Sopenharmony_ci else if (ext4_es_is_unwritten(&es)) 17438c2ecf20Sopenharmony_ci map->m_flags |= EXT4_MAP_UNWRITTEN; 17448c2ecf20Sopenharmony_ci else 17458c2ecf20Sopenharmony_ci BUG(); 17468c2ecf20Sopenharmony_ci 17478c2ecf20Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 17488c2ecf20Sopenharmony_ci ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); 17498c2ecf20Sopenharmony_ci#endif 17508c2ecf20Sopenharmony_ci return retval; 17518c2ecf20Sopenharmony_ci } 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci /* 17548c2ecf20Sopenharmony_ci * Try to see if we can get the block without requesting a new 17558c2ecf20Sopenharmony_ci * file system block. 17568c2ecf20Sopenharmony_ci */ 17578c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 17588c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 17598c2ecf20Sopenharmony_ci retval = 0; 17608c2ecf20Sopenharmony_ci else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 17618c2ecf20Sopenharmony_ci retval = ext4_ext_map_blocks(NULL, inode, map, 0); 17628c2ecf20Sopenharmony_ci else 17638c2ecf20Sopenharmony_ci retval = ext4_ind_map_blocks(NULL, inode, map, 0); 17648c2ecf20Sopenharmony_ci 17658c2ecf20Sopenharmony_ciadd_delayed: 17668c2ecf20Sopenharmony_ci if (retval == 0) { 17678c2ecf20Sopenharmony_ci int ret; 17688c2ecf20Sopenharmony_ci 17698c2ecf20Sopenharmony_ci /* 17708c2ecf20Sopenharmony_ci * XXX: __block_prepare_write() unmaps passed block, 17718c2ecf20Sopenharmony_ci * is it OK? 17728c2ecf20Sopenharmony_ci */ 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_ci ret = ext4_insert_delayed_block(inode, map->m_lblk); 17758c2ecf20Sopenharmony_ci if (ret != 0) { 17768c2ecf20Sopenharmony_ci retval = ret; 17778c2ecf20Sopenharmony_ci goto out_unlock; 17788c2ecf20Sopenharmony_ci } 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci map_bh(bh, inode->i_sb, invalid_block); 17818c2ecf20Sopenharmony_ci set_buffer_new(bh); 17828c2ecf20Sopenharmony_ci set_buffer_delay(bh); 17838c2ecf20Sopenharmony_ci } else if (retval > 0) { 17848c2ecf20Sopenharmony_ci int ret; 17858c2ecf20Sopenharmony_ci unsigned int status; 17868c2ecf20Sopenharmony_ci 17878c2ecf20Sopenharmony_ci if (unlikely(retval != map->m_len)) { 17888c2ecf20Sopenharmony_ci ext4_warning(inode->i_sb, 17898c2ecf20Sopenharmony_ci "ES len assertion failed for inode " 17908c2ecf20Sopenharmony_ci "%lu: retval %d != map->m_len %d", 17918c2ecf20Sopenharmony_ci inode->i_ino, retval, map->m_len); 17928c2ecf20Sopenharmony_ci WARN_ON(1); 17938c2ecf20Sopenharmony_ci } 17948c2ecf20Sopenharmony_ci 17958c2ecf20Sopenharmony_ci status = map->m_flags & EXT4_MAP_UNWRITTEN ? 17968c2ecf20Sopenharmony_ci EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 17978c2ecf20Sopenharmony_ci ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 17988c2ecf20Sopenharmony_ci map->m_pblk, status); 17998c2ecf20Sopenharmony_ci if (ret != 0) 18008c2ecf20Sopenharmony_ci retval = ret; 18018c2ecf20Sopenharmony_ci } 18028c2ecf20Sopenharmony_ci 18038c2ecf20Sopenharmony_ciout_unlock: 18048c2ecf20Sopenharmony_ci up_read((&EXT4_I(inode)->i_data_sem)); 18058c2ecf20Sopenharmony_ci 18068c2ecf20Sopenharmony_ci return retval; 18078c2ecf20Sopenharmony_ci} 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci/* 18108c2ecf20Sopenharmony_ci * This is a special get_block_t callback which is used by 18118c2ecf20Sopenharmony_ci * ext4_da_write_begin(). It will either return mapped block or 18128c2ecf20Sopenharmony_ci * reserve space for a single block. 18138c2ecf20Sopenharmony_ci * 18148c2ecf20Sopenharmony_ci * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. 18158c2ecf20Sopenharmony_ci * We also have b_blocknr = -1 and b_bdev initialized properly 18168c2ecf20Sopenharmony_ci * 18178c2ecf20Sopenharmony_ci * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. 18188c2ecf20Sopenharmony_ci * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev 18198c2ecf20Sopenharmony_ci * initialized properly. 18208c2ecf20Sopenharmony_ci */ 18218c2ecf20Sopenharmony_ciint ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 18228c2ecf20Sopenharmony_ci struct buffer_head *bh, int create) 18238c2ecf20Sopenharmony_ci{ 18248c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 18258c2ecf20Sopenharmony_ci int ret = 0; 18268c2ecf20Sopenharmony_ci 18278c2ecf20Sopenharmony_ci BUG_ON(create == 0); 18288c2ecf20Sopenharmony_ci BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci map.m_lblk = iblock; 18318c2ecf20Sopenharmony_ci map.m_len = 1; 18328c2ecf20Sopenharmony_ci 18338c2ecf20Sopenharmony_ci /* 18348c2ecf20Sopenharmony_ci * first, we need to know whether the block is allocated already 18358c2ecf20Sopenharmony_ci * preallocated blocks are unmapped but should treated 18368c2ecf20Sopenharmony_ci * the same as allocated blocks. 18378c2ecf20Sopenharmony_ci */ 18388c2ecf20Sopenharmony_ci ret = ext4_da_map_blocks(inode, iblock, &map, bh); 18398c2ecf20Sopenharmony_ci if (ret <= 0) 18408c2ecf20Sopenharmony_ci return ret; 18418c2ecf20Sopenharmony_ci 18428c2ecf20Sopenharmony_ci map_bh(bh, inode->i_sb, map.m_pblk); 18438c2ecf20Sopenharmony_ci ext4_update_bh_state(bh, map.m_flags); 18448c2ecf20Sopenharmony_ci 18458c2ecf20Sopenharmony_ci if (buffer_unwritten(bh)) { 18468c2ecf20Sopenharmony_ci /* A delayed write to unwritten bh should be marked 18478c2ecf20Sopenharmony_ci * new and mapped. Mapped ensures that we don't do 18488c2ecf20Sopenharmony_ci * get_block multiple times when we write to the same 18498c2ecf20Sopenharmony_ci * offset and new ensures that we do proper zero out 18508c2ecf20Sopenharmony_ci * for partial write. 18518c2ecf20Sopenharmony_ci */ 18528c2ecf20Sopenharmony_ci set_buffer_new(bh); 18538c2ecf20Sopenharmony_ci set_buffer_mapped(bh); 18548c2ecf20Sopenharmony_ci } 18558c2ecf20Sopenharmony_ci return 0; 18568c2ecf20Sopenharmony_ci} 18578c2ecf20Sopenharmony_ci 18588c2ecf20Sopenharmony_cistatic int bget_one(handle_t *handle, struct buffer_head *bh) 18598c2ecf20Sopenharmony_ci{ 18608c2ecf20Sopenharmony_ci get_bh(bh); 18618c2ecf20Sopenharmony_ci return 0; 18628c2ecf20Sopenharmony_ci} 18638c2ecf20Sopenharmony_ci 18648c2ecf20Sopenharmony_cistatic int bput_one(handle_t *handle, struct buffer_head *bh) 18658c2ecf20Sopenharmony_ci{ 18668c2ecf20Sopenharmony_ci put_bh(bh); 18678c2ecf20Sopenharmony_ci return 0; 18688c2ecf20Sopenharmony_ci} 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_cistatic int __ext4_journalled_writepage(struct page *page, 18718c2ecf20Sopenharmony_ci unsigned int len) 18728c2ecf20Sopenharmony_ci{ 18738c2ecf20Sopenharmony_ci struct address_space *mapping = page->mapping; 18748c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 18758c2ecf20Sopenharmony_ci struct buffer_head *page_bufs = NULL; 18768c2ecf20Sopenharmony_ci handle_t *handle = NULL; 18778c2ecf20Sopenharmony_ci int ret = 0, err = 0; 18788c2ecf20Sopenharmony_ci int inline_data = ext4_has_inline_data(inode); 18798c2ecf20Sopenharmony_ci struct buffer_head *inode_bh = NULL; 18808c2ecf20Sopenharmony_ci 18818c2ecf20Sopenharmony_ci ClearPageChecked(page); 18828c2ecf20Sopenharmony_ci 18838c2ecf20Sopenharmony_ci if (inline_data) { 18848c2ecf20Sopenharmony_ci BUG_ON(page->index != 0); 18858c2ecf20Sopenharmony_ci BUG_ON(len > ext4_get_max_inline_size(inode)); 18868c2ecf20Sopenharmony_ci inode_bh = ext4_journalled_write_inline_data(inode, len, page); 18878c2ecf20Sopenharmony_ci if (inode_bh == NULL) 18888c2ecf20Sopenharmony_ci goto out; 18898c2ecf20Sopenharmony_ci } else { 18908c2ecf20Sopenharmony_ci page_bufs = page_buffers(page); 18918c2ecf20Sopenharmony_ci if (!page_bufs) { 18928c2ecf20Sopenharmony_ci BUG(); 18938c2ecf20Sopenharmony_ci goto out; 18948c2ecf20Sopenharmony_ci } 18958c2ecf20Sopenharmony_ci ext4_walk_page_buffers(handle, page_bufs, 0, len, 18968c2ecf20Sopenharmony_ci NULL, bget_one); 18978c2ecf20Sopenharmony_ci } 18988c2ecf20Sopenharmony_ci /* 18998c2ecf20Sopenharmony_ci * We need to release the page lock before we start the 19008c2ecf20Sopenharmony_ci * journal, so grab a reference so the page won't disappear 19018c2ecf20Sopenharmony_ci * out from under us. 19028c2ecf20Sopenharmony_ci */ 19038c2ecf20Sopenharmony_ci get_page(page); 19048c2ecf20Sopenharmony_ci unlock_page(page); 19058c2ecf20Sopenharmony_ci 19068c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 19078c2ecf20Sopenharmony_ci ext4_writepage_trans_blocks(inode)); 19088c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 19098c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 19108c2ecf20Sopenharmony_ci put_page(page); 19118c2ecf20Sopenharmony_ci goto out_no_pagelock; 19128c2ecf20Sopenharmony_ci } 19138c2ecf20Sopenharmony_ci BUG_ON(!ext4_handle_valid(handle)); 19148c2ecf20Sopenharmony_ci 19158c2ecf20Sopenharmony_ci lock_page(page); 19168c2ecf20Sopenharmony_ci put_page(page); 19178c2ecf20Sopenharmony_ci if (page->mapping != mapping) { 19188c2ecf20Sopenharmony_ci /* The page got truncated from under us */ 19198c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 19208c2ecf20Sopenharmony_ci ret = 0; 19218c2ecf20Sopenharmony_ci goto out; 19228c2ecf20Sopenharmony_ci } 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci if (inline_data) { 19258c2ecf20Sopenharmony_ci ret = ext4_mark_inode_dirty(handle, inode); 19268c2ecf20Sopenharmony_ci } else { 19278c2ecf20Sopenharmony_ci ret = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, 19288c2ecf20Sopenharmony_ci do_journal_get_write_access); 19298c2ecf20Sopenharmony_ci 19308c2ecf20Sopenharmony_ci err = ext4_walk_page_buffers(handle, page_bufs, 0, len, NULL, 19318c2ecf20Sopenharmony_ci write_end_fn); 19328c2ecf20Sopenharmony_ci } 19338c2ecf20Sopenharmony_ci if (ret == 0) 19348c2ecf20Sopenharmony_ci ret = err; 19358c2ecf20Sopenharmony_ci err = ext4_jbd2_inode_add_write(handle, inode, page_offset(page), len); 19368c2ecf20Sopenharmony_ci if (ret == 0) 19378c2ecf20Sopenharmony_ci ret = err; 19388c2ecf20Sopenharmony_ci EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 19398c2ecf20Sopenharmony_ci err = ext4_journal_stop(handle); 19408c2ecf20Sopenharmony_ci if (!ret) 19418c2ecf20Sopenharmony_ci ret = err; 19428c2ecf20Sopenharmony_ci 19438c2ecf20Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_JDATA); 19448c2ecf20Sopenharmony_ciout: 19458c2ecf20Sopenharmony_ci unlock_page(page); 19468c2ecf20Sopenharmony_ciout_no_pagelock: 19478c2ecf20Sopenharmony_ci if (!inline_data && page_bufs) 19488c2ecf20Sopenharmony_ci ext4_walk_page_buffers(NULL, page_bufs, 0, len, 19498c2ecf20Sopenharmony_ci NULL, bput_one); 19508c2ecf20Sopenharmony_ci brelse(inode_bh); 19518c2ecf20Sopenharmony_ci return ret; 19528c2ecf20Sopenharmony_ci} 19538c2ecf20Sopenharmony_ci 19548c2ecf20Sopenharmony_cistatic void cancel_page_dirty_status(struct page *page) 19558c2ecf20Sopenharmony_ci{ 19568c2ecf20Sopenharmony_ci struct address_space *mapping = page_mapping(page); 19578c2ecf20Sopenharmony_ci unsigned long flags; 19588c2ecf20Sopenharmony_ci 19598c2ecf20Sopenharmony_ci cancel_dirty_page(page); 19608c2ecf20Sopenharmony_ci xa_lock_irqsave(&mapping->i_pages, flags); 19618c2ecf20Sopenharmony_ci __xa_clear_mark(&mapping->i_pages, page_index(page), 19628c2ecf20Sopenharmony_ci PAGECACHE_TAG_DIRTY); 19638c2ecf20Sopenharmony_ci __xa_clear_mark(&mapping->i_pages, page_index(page), 19648c2ecf20Sopenharmony_ci PAGECACHE_TAG_TOWRITE); 19658c2ecf20Sopenharmony_ci xa_unlock_irqrestore(&mapping->i_pages, flags); 19668c2ecf20Sopenharmony_ci} 19678c2ecf20Sopenharmony_ci 19688c2ecf20Sopenharmony_ci/* 19698c2ecf20Sopenharmony_ci * Note that we don't need to start a transaction unless we're journaling data 19708c2ecf20Sopenharmony_ci * because we should have holes filled from ext4_page_mkwrite(). We even don't 19718c2ecf20Sopenharmony_ci * need to file the inode to the transaction's list in ordered mode because if 19728c2ecf20Sopenharmony_ci * we are writing back data added by write(), the inode is already there and if 19738c2ecf20Sopenharmony_ci * we are writing back data modified via mmap(), no one guarantees in which 19748c2ecf20Sopenharmony_ci * transaction the data will hit the disk. In case we are journaling data, we 19758c2ecf20Sopenharmony_ci * cannot start transaction directly because transaction start ranks above page 19768c2ecf20Sopenharmony_ci * lock so we have to do some magic. 19778c2ecf20Sopenharmony_ci * 19788c2ecf20Sopenharmony_ci * This function can get called via... 19798c2ecf20Sopenharmony_ci * - ext4_writepages after taking page lock (have journal handle) 19808c2ecf20Sopenharmony_ci * - journal_submit_inode_data_buffers (no journal handle) 19818c2ecf20Sopenharmony_ci * - shrink_page_list via the kswapd/direct reclaim (no journal handle) 19828c2ecf20Sopenharmony_ci * - grab_page_cache when doing write_begin (have journal handle) 19838c2ecf20Sopenharmony_ci * 19848c2ecf20Sopenharmony_ci * We don't do any block allocation in this function. If we have page with 19858c2ecf20Sopenharmony_ci * multiple blocks we need to write those buffer_heads that are mapped. This 19868c2ecf20Sopenharmony_ci * is important for mmaped based write. So if we do with blocksize 1K 19878c2ecf20Sopenharmony_ci * truncate(f, 1024); 19888c2ecf20Sopenharmony_ci * a = mmap(f, 0, 4096); 19898c2ecf20Sopenharmony_ci * a[0] = 'a'; 19908c2ecf20Sopenharmony_ci * truncate(f, 4096); 19918c2ecf20Sopenharmony_ci * we have in the page first buffer_head mapped via page_mkwrite call back 19928c2ecf20Sopenharmony_ci * but other buffer_heads would be unmapped but dirty (dirty done via the 19938c2ecf20Sopenharmony_ci * do_wp_page). So writepage should write the first block. If we modify 19948c2ecf20Sopenharmony_ci * the mmap area beyond 1024 we will again get a page_fault and the 19958c2ecf20Sopenharmony_ci * page_mkwrite callback will do the block allocation and mark the 19968c2ecf20Sopenharmony_ci * buffer_heads mapped. 19978c2ecf20Sopenharmony_ci * 19988c2ecf20Sopenharmony_ci * We redirty the page if we have any buffer_heads that is either delay or 19998c2ecf20Sopenharmony_ci * unwritten in the page. 20008c2ecf20Sopenharmony_ci * 20018c2ecf20Sopenharmony_ci * We can get recursively called as show below. 20028c2ecf20Sopenharmony_ci * 20038c2ecf20Sopenharmony_ci * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> 20048c2ecf20Sopenharmony_ci * ext4_writepage() 20058c2ecf20Sopenharmony_ci * 20068c2ecf20Sopenharmony_ci * But since we don't do any block allocation we should not deadlock. 20078c2ecf20Sopenharmony_ci * Page also have the dirty flag cleared so we don't get recurive page_lock. 20088c2ecf20Sopenharmony_ci */ 20098c2ecf20Sopenharmony_cistatic int ext4_writepage(struct page *page, 20108c2ecf20Sopenharmony_ci struct writeback_control *wbc) 20118c2ecf20Sopenharmony_ci{ 20128c2ecf20Sopenharmony_ci int ret = 0; 20138c2ecf20Sopenharmony_ci loff_t size; 20148c2ecf20Sopenharmony_ci unsigned int len; 20158c2ecf20Sopenharmony_ci struct buffer_head *page_bufs = NULL; 20168c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 20178c2ecf20Sopenharmony_ci struct ext4_io_submit io_submit; 20188c2ecf20Sopenharmony_ci bool keep_towrite = false; 20198c2ecf20Sopenharmony_ci 20208c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { 20218c2ecf20Sopenharmony_ci inode->i_mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE); 20228c2ecf20Sopenharmony_ci unlock_page(page); 20238c2ecf20Sopenharmony_ci return -EIO; 20248c2ecf20Sopenharmony_ci } 20258c2ecf20Sopenharmony_ci 20268c2ecf20Sopenharmony_ci if (WARN_ON(!page_has_buffers(page))) { 20278c2ecf20Sopenharmony_ci cancel_page_dirty_status(page); 20288c2ecf20Sopenharmony_ci unlock_page(page); 20298c2ecf20Sopenharmony_ci return 0; 20308c2ecf20Sopenharmony_ci } 20318c2ecf20Sopenharmony_ci 20328c2ecf20Sopenharmony_ci trace_ext4_writepage(page); 20338c2ecf20Sopenharmony_ci size = i_size_read(inode); 20348c2ecf20Sopenharmony_ci if (page->index == size >> PAGE_SHIFT && 20358c2ecf20Sopenharmony_ci !ext4_verity_in_progress(inode)) 20368c2ecf20Sopenharmony_ci len = size & ~PAGE_MASK; 20378c2ecf20Sopenharmony_ci else 20388c2ecf20Sopenharmony_ci len = PAGE_SIZE; 20398c2ecf20Sopenharmony_ci 20408c2ecf20Sopenharmony_ci /* Should never happen but for bugs in other kernel subsystems */ 20418c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) { 20428c2ecf20Sopenharmony_ci ext4_warning_inode(inode, 20438c2ecf20Sopenharmony_ci "page %lu does not have buffers attached", page->index); 20448c2ecf20Sopenharmony_ci ClearPageDirty(page); 20458c2ecf20Sopenharmony_ci unlock_page(page); 20468c2ecf20Sopenharmony_ci return 0; 20478c2ecf20Sopenharmony_ci } 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ci page_bufs = page_buffers(page); 20508c2ecf20Sopenharmony_ci /* 20518c2ecf20Sopenharmony_ci * We cannot do block allocation or other extent handling in this 20528c2ecf20Sopenharmony_ci * function. If there are buffers needing that, we have to redirty 20538c2ecf20Sopenharmony_ci * the page. But we may reach here when we do a journal commit via 20548c2ecf20Sopenharmony_ci * journal_submit_inode_data_buffers() and in that case we must write 20558c2ecf20Sopenharmony_ci * allocated buffers to achieve data=ordered mode guarantees. 20568c2ecf20Sopenharmony_ci * 20578c2ecf20Sopenharmony_ci * Also, if there is only one buffer per page (the fs block 20588c2ecf20Sopenharmony_ci * size == the page size), if one buffer needs block 20598c2ecf20Sopenharmony_ci * allocation or needs to modify the extent tree to clear the 20608c2ecf20Sopenharmony_ci * unwritten flag, we know that the page can't be written at 20618c2ecf20Sopenharmony_ci * all, so we might as well refuse the write immediately. 20628c2ecf20Sopenharmony_ci * Unfortunately if the block size != page size, we can't as 20638c2ecf20Sopenharmony_ci * easily detect this case using ext4_walk_page_buffers(), but 20648c2ecf20Sopenharmony_ci * for the extremely common case, this is an optimization that 20658c2ecf20Sopenharmony_ci * skips a useless round trip through ext4_bio_write_page(). 20668c2ecf20Sopenharmony_ci */ 20678c2ecf20Sopenharmony_ci if (ext4_walk_page_buffers(NULL, page_bufs, 0, len, NULL, 20688c2ecf20Sopenharmony_ci ext4_bh_delay_or_unwritten)) { 20698c2ecf20Sopenharmony_ci redirty_page_for_writepage(wbc, page); 20708c2ecf20Sopenharmony_ci if ((current->flags & PF_MEMALLOC) || 20718c2ecf20Sopenharmony_ci (inode->i_sb->s_blocksize == PAGE_SIZE)) { 20728c2ecf20Sopenharmony_ci /* 20738c2ecf20Sopenharmony_ci * For memory cleaning there's no point in writing only 20748c2ecf20Sopenharmony_ci * some buffers. So just bail out. Warn if we came here 20758c2ecf20Sopenharmony_ci * from direct reclaim. 20768c2ecf20Sopenharmony_ci */ 20778c2ecf20Sopenharmony_ci WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) 20788c2ecf20Sopenharmony_ci == PF_MEMALLOC); 20798c2ecf20Sopenharmony_ci unlock_page(page); 20808c2ecf20Sopenharmony_ci return 0; 20818c2ecf20Sopenharmony_ci } 20828c2ecf20Sopenharmony_ci keep_towrite = true; 20838c2ecf20Sopenharmony_ci } 20848c2ecf20Sopenharmony_ci 20858c2ecf20Sopenharmony_ci if (PageChecked(page) && ext4_should_journal_data(inode)) 20868c2ecf20Sopenharmony_ci /* 20878c2ecf20Sopenharmony_ci * It's mmapped pagecache. Add buffers and journal it. There 20888c2ecf20Sopenharmony_ci * doesn't seem much point in redirtying the page here. 20898c2ecf20Sopenharmony_ci */ 20908c2ecf20Sopenharmony_ci return __ext4_journalled_writepage(page, len); 20918c2ecf20Sopenharmony_ci 20928c2ecf20Sopenharmony_ci ext4_io_submit_init(&io_submit, wbc); 20938c2ecf20Sopenharmony_ci io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); 20948c2ecf20Sopenharmony_ci if (!io_submit.io_end) { 20958c2ecf20Sopenharmony_ci redirty_page_for_writepage(wbc, page); 20968c2ecf20Sopenharmony_ci unlock_page(page); 20978c2ecf20Sopenharmony_ci return -ENOMEM; 20988c2ecf20Sopenharmony_ci } 20998c2ecf20Sopenharmony_ci ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite); 21008c2ecf20Sopenharmony_ci ext4_io_submit(&io_submit); 21018c2ecf20Sopenharmony_ci /* Drop io_end reference we got from init */ 21028c2ecf20Sopenharmony_ci ext4_put_io_end_defer(io_submit.io_end); 21038c2ecf20Sopenharmony_ci return ret; 21048c2ecf20Sopenharmony_ci} 21058c2ecf20Sopenharmony_ci 21068c2ecf20Sopenharmony_cistatic int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) 21078c2ecf20Sopenharmony_ci{ 21088c2ecf20Sopenharmony_ci int len; 21098c2ecf20Sopenharmony_ci loff_t size; 21108c2ecf20Sopenharmony_ci int err; 21118c2ecf20Sopenharmony_ci 21128c2ecf20Sopenharmony_ci BUG_ON(page->index != mpd->first_page); 21138c2ecf20Sopenharmony_ci clear_page_dirty_for_io(page); 21148c2ecf20Sopenharmony_ci /* 21158c2ecf20Sopenharmony_ci * We have to be very careful here! Nothing protects writeback path 21168c2ecf20Sopenharmony_ci * against i_size changes and the page can be writeably mapped into 21178c2ecf20Sopenharmony_ci * page tables. So an application can be growing i_size and writing 21188c2ecf20Sopenharmony_ci * data through mmap while writeback runs. clear_page_dirty_for_io() 21198c2ecf20Sopenharmony_ci * write-protects our page in page tables and the page cannot get 21208c2ecf20Sopenharmony_ci * written to again until we release page lock. So only after 21218c2ecf20Sopenharmony_ci * clear_page_dirty_for_io() we are safe to sample i_size for 21228c2ecf20Sopenharmony_ci * ext4_bio_write_page() to zero-out tail of the written page. We rely 21238c2ecf20Sopenharmony_ci * on the barrier provided by TestClearPageDirty in 21248c2ecf20Sopenharmony_ci * clear_page_dirty_for_io() to make sure i_size is really sampled only 21258c2ecf20Sopenharmony_ci * after page tables are updated. 21268c2ecf20Sopenharmony_ci */ 21278c2ecf20Sopenharmony_ci size = i_size_read(mpd->inode); 21288c2ecf20Sopenharmony_ci if (page->index == size >> PAGE_SHIFT && 21298c2ecf20Sopenharmony_ci !ext4_verity_in_progress(mpd->inode)) 21308c2ecf20Sopenharmony_ci len = size & ~PAGE_MASK; 21318c2ecf20Sopenharmony_ci else 21328c2ecf20Sopenharmony_ci len = PAGE_SIZE; 21338c2ecf20Sopenharmony_ci err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false); 21348c2ecf20Sopenharmony_ci if (!err) 21358c2ecf20Sopenharmony_ci mpd->wbc->nr_to_write--; 21368c2ecf20Sopenharmony_ci mpd->first_page++; 21378c2ecf20Sopenharmony_ci 21388c2ecf20Sopenharmony_ci return err; 21398c2ecf20Sopenharmony_ci} 21408c2ecf20Sopenharmony_ci 21418c2ecf20Sopenharmony_ci#define BH_FLAGS (BIT(BH_Unwritten) | BIT(BH_Delay)) 21428c2ecf20Sopenharmony_ci 21438c2ecf20Sopenharmony_ci/* 21448c2ecf20Sopenharmony_ci * mballoc gives us at most this number of blocks... 21458c2ecf20Sopenharmony_ci * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 21468c2ecf20Sopenharmony_ci * The rest of mballoc seems to handle chunks up to full group size. 21478c2ecf20Sopenharmony_ci */ 21488c2ecf20Sopenharmony_ci#define MAX_WRITEPAGES_EXTENT_LEN 2048 21498c2ecf20Sopenharmony_ci 21508c2ecf20Sopenharmony_ci/* 21518c2ecf20Sopenharmony_ci * mpage_add_bh_to_extent - try to add bh to extent of blocks to map 21528c2ecf20Sopenharmony_ci * 21538c2ecf20Sopenharmony_ci * @mpd - extent of blocks 21548c2ecf20Sopenharmony_ci * @lblk - logical number of the block in the file 21558c2ecf20Sopenharmony_ci * @bh - buffer head we want to add to the extent 21568c2ecf20Sopenharmony_ci * 21578c2ecf20Sopenharmony_ci * The function is used to collect contig. blocks in the same state. If the 21588c2ecf20Sopenharmony_ci * buffer doesn't require mapping for writeback and we haven't started the 21598c2ecf20Sopenharmony_ci * extent of buffers to map yet, the function returns 'true' immediately - the 21608c2ecf20Sopenharmony_ci * caller can write the buffer right away. Otherwise the function returns true 21618c2ecf20Sopenharmony_ci * if the block has been added to the extent, false if the block couldn't be 21628c2ecf20Sopenharmony_ci * added. 21638c2ecf20Sopenharmony_ci */ 21648c2ecf20Sopenharmony_cistatic bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, 21658c2ecf20Sopenharmony_ci struct buffer_head *bh) 21668c2ecf20Sopenharmony_ci{ 21678c2ecf20Sopenharmony_ci struct ext4_map_blocks *map = &mpd->map; 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci /* Buffer that doesn't need mapping for writeback? */ 21708c2ecf20Sopenharmony_ci if (!buffer_dirty(bh) || !buffer_mapped(bh) || 21718c2ecf20Sopenharmony_ci (!buffer_delay(bh) && !buffer_unwritten(bh))) { 21728c2ecf20Sopenharmony_ci /* So far no extent to map => we write the buffer right away */ 21738c2ecf20Sopenharmony_ci if (map->m_len == 0) 21748c2ecf20Sopenharmony_ci return true; 21758c2ecf20Sopenharmony_ci return false; 21768c2ecf20Sopenharmony_ci } 21778c2ecf20Sopenharmony_ci 21788c2ecf20Sopenharmony_ci /* First block in the extent? */ 21798c2ecf20Sopenharmony_ci if (map->m_len == 0) { 21808c2ecf20Sopenharmony_ci /* We cannot map unless handle is started... */ 21818c2ecf20Sopenharmony_ci if (!mpd->do_map) 21828c2ecf20Sopenharmony_ci return false; 21838c2ecf20Sopenharmony_ci map->m_lblk = lblk; 21848c2ecf20Sopenharmony_ci map->m_len = 1; 21858c2ecf20Sopenharmony_ci map->m_flags = bh->b_state & BH_FLAGS; 21868c2ecf20Sopenharmony_ci return true; 21878c2ecf20Sopenharmony_ci } 21888c2ecf20Sopenharmony_ci 21898c2ecf20Sopenharmony_ci /* Don't go larger than mballoc is willing to allocate */ 21908c2ecf20Sopenharmony_ci if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) 21918c2ecf20Sopenharmony_ci return false; 21928c2ecf20Sopenharmony_ci 21938c2ecf20Sopenharmony_ci /* Can we merge the block to our big extent? */ 21948c2ecf20Sopenharmony_ci if (lblk == map->m_lblk + map->m_len && 21958c2ecf20Sopenharmony_ci (bh->b_state & BH_FLAGS) == map->m_flags) { 21968c2ecf20Sopenharmony_ci map->m_len++; 21978c2ecf20Sopenharmony_ci return true; 21988c2ecf20Sopenharmony_ci } 21998c2ecf20Sopenharmony_ci return false; 22008c2ecf20Sopenharmony_ci} 22018c2ecf20Sopenharmony_ci 22028c2ecf20Sopenharmony_ci/* 22038c2ecf20Sopenharmony_ci * mpage_process_page_bufs - submit page buffers for IO or add them to extent 22048c2ecf20Sopenharmony_ci * 22058c2ecf20Sopenharmony_ci * @mpd - extent of blocks for mapping 22068c2ecf20Sopenharmony_ci * @head - the first buffer in the page 22078c2ecf20Sopenharmony_ci * @bh - buffer we should start processing from 22088c2ecf20Sopenharmony_ci * @lblk - logical number of the block in the file corresponding to @bh 22098c2ecf20Sopenharmony_ci * 22108c2ecf20Sopenharmony_ci * Walk through page buffers from @bh upto @head (exclusive) and either submit 22118c2ecf20Sopenharmony_ci * the page for IO if all buffers in this page were mapped and there's no 22128c2ecf20Sopenharmony_ci * accumulated extent of buffers to map or add buffers in the page to the 22138c2ecf20Sopenharmony_ci * extent of buffers to map. The function returns 1 if the caller can continue 22148c2ecf20Sopenharmony_ci * by processing the next page, 0 if it should stop adding buffers to the 22158c2ecf20Sopenharmony_ci * extent to map because we cannot extend it anymore. It can also return value 22168c2ecf20Sopenharmony_ci * < 0 in case of error during IO submission. 22178c2ecf20Sopenharmony_ci */ 22188c2ecf20Sopenharmony_cistatic int mpage_process_page_bufs(struct mpage_da_data *mpd, 22198c2ecf20Sopenharmony_ci struct buffer_head *head, 22208c2ecf20Sopenharmony_ci struct buffer_head *bh, 22218c2ecf20Sopenharmony_ci ext4_lblk_t lblk) 22228c2ecf20Sopenharmony_ci{ 22238c2ecf20Sopenharmony_ci struct inode *inode = mpd->inode; 22248c2ecf20Sopenharmony_ci int err; 22258c2ecf20Sopenharmony_ci ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) 22268c2ecf20Sopenharmony_ci >> inode->i_blkbits; 22278c2ecf20Sopenharmony_ci 22288c2ecf20Sopenharmony_ci if (ext4_verity_in_progress(inode)) 22298c2ecf20Sopenharmony_ci blocks = EXT_MAX_BLOCKS; 22308c2ecf20Sopenharmony_ci 22318c2ecf20Sopenharmony_ci do { 22328c2ecf20Sopenharmony_ci BUG_ON(buffer_locked(bh)); 22338c2ecf20Sopenharmony_ci 22348c2ecf20Sopenharmony_ci if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) { 22358c2ecf20Sopenharmony_ci /* Found extent to map? */ 22368c2ecf20Sopenharmony_ci if (mpd->map.m_len) 22378c2ecf20Sopenharmony_ci return 0; 22388c2ecf20Sopenharmony_ci /* Buffer needs mapping and handle is not started? */ 22398c2ecf20Sopenharmony_ci if (!mpd->do_map) 22408c2ecf20Sopenharmony_ci return 0; 22418c2ecf20Sopenharmony_ci /* Everything mapped so far and we hit EOF */ 22428c2ecf20Sopenharmony_ci break; 22438c2ecf20Sopenharmony_ci } 22448c2ecf20Sopenharmony_ci } while (lblk++, (bh = bh->b_this_page) != head); 22458c2ecf20Sopenharmony_ci /* So far everything mapped? Submit the page for IO. */ 22468c2ecf20Sopenharmony_ci if (mpd->map.m_len == 0) { 22478c2ecf20Sopenharmony_ci err = mpage_submit_page(mpd, head->b_page); 22488c2ecf20Sopenharmony_ci if (err < 0) 22498c2ecf20Sopenharmony_ci return err; 22508c2ecf20Sopenharmony_ci } 22518c2ecf20Sopenharmony_ci if (lblk >= blocks) { 22528c2ecf20Sopenharmony_ci mpd->scanned_until_end = 1; 22538c2ecf20Sopenharmony_ci return 0; 22548c2ecf20Sopenharmony_ci } 22558c2ecf20Sopenharmony_ci return 1; 22568c2ecf20Sopenharmony_ci} 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci/* 22598c2ecf20Sopenharmony_ci * mpage_process_page - update page buffers corresponding to changed extent and 22608c2ecf20Sopenharmony_ci * may submit fully mapped page for IO 22618c2ecf20Sopenharmony_ci * 22628c2ecf20Sopenharmony_ci * @mpd - description of extent to map, on return next extent to map 22638c2ecf20Sopenharmony_ci * @m_lblk - logical block mapping. 22648c2ecf20Sopenharmony_ci * @m_pblk - corresponding physical mapping. 22658c2ecf20Sopenharmony_ci * @map_bh - determines on return whether this page requires any further 22668c2ecf20Sopenharmony_ci * mapping or not. 22678c2ecf20Sopenharmony_ci * Scan given page buffers corresponding to changed extent and update buffer 22688c2ecf20Sopenharmony_ci * state according to new extent state. 22698c2ecf20Sopenharmony_ci * We map delalloc buffers to their physical location, clear unwritten bits. 22708c2ecf20Sopenharmony_ci * If the given page is not fully mapped, we update @map to the next extent in 22718c2ecf20Sopenharmony_ci * the given page that needs mapping & return @map_bh as true. 22728c2ecf20Sopenharmony_ci */ 22738c2ecf20Sopenharmony_cistatic int mpage_process_page(struct mpage_da_data *mpd, struct page *page, 22748c2ecf20Sopenharmony_ci ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk, 22758c2ecf20Sopenharmony_ci bool *map_bh) 22768c2ecf20Sopenharmony_ci{ 22778c2ecf20Sopenharmony_ci struct buffer_head *head, *bh; 22788c2ecf20Sopenharmony_ci ext4_io_end_t *io_end = mpd->io_submit.io_end; 22798c2ecf20Sopenharmony_ci ext4_lblk_t lblk = *m_lblk; 22808c2ecf20Sopenharmony_ci ext4_fsblk_t pblock = *m_pblk; 22818c2ecf20Sopenharmony_ci int err = 0; 22828c2ecf20Sopenharmony_ci int blkbits = mpd->inode->i_blkbits; 22838c2ecf20Sopenharmony_ci ssize_t io_end_size = 0; 22848c2ecf20Sopenharmony_ci struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end); 22858c2ecf20Sopenharmony_ci 22868c2ecf20Sopenharmony_ci bh = head = page_buffers(page); 22878c2ecf20Sopenharmony_ci do { 22888c2ecf20Sopenharmony_ci if (lblk < mpd->map.m_lblk) 22898c2ecf20Sopenharmony_ci continue; 22908c2ecf20Sopenharmony_ci if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { 22918c2ecf20Sopenharmony_ci /* 22928c2ecf20Sopenharmony_ci * Buffer after end of mapped extent. 22938c2ecf20Sopenharmony_ci * Find next buffer in the page to map. 22948c2ecf20Sopenharmony_ci */ 22958c2ecf20Sopenharmony_ci mpd->map.m_len = 0; 22968c2ecf20Sopenharmony_ci mpd->map.m_flags = 0; 22978c2ecf20Sopenharmony_ci io_end_vec->size += io_end_size; 22988c2ecf20Sopenharmony_ci io_end_size = 0; 22998c2ecf20Sopenharmony_ci 23008c2ecf20Sopenharmony_ci err = mpage_process_page_bufs(mpd, head, bh, lblk); 23018c2ecf20Sopenharmony_ci if (err > 0) 23028c2ecf20Sopenharmony_ci err = 0; 23038c2ecf20Sopenharmony_ci if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) { 23048c2ecf20Sopenharmony_ci io_end_vec = ext4_alloc_io_end_vec(io_end); 23058c2ecf20Sopenharmony_ci if (IS_ERR(io_end_vec)) { 23068c2ecf20Sopenharmony_ci err = PTR_ERR(io_end_vec); 23078c2ecf20Sopenharmony_ci goto out; 23088c2ecf20Sopenharmony_ci } 23098c2ecf20Sopenharmony_ci io_end_vec->offset = (loff_t)mpd->map.m_lblk << blkbits; 23108c2ecf20Sopenharmony_ci } 23118c2ecf20Sopenharmony_ci *map_bh = true; 23128c2ecf20Sopenharmony_ci goto out; 23138c2ecf20Sopenharmony_ci } 23148c2ecf20Sopenharmony_ci if (buffer_delay(bh)) { 23158c2ecf20Sopenharmony_ci clear_buffer_delay(bh); 23168c2ecf20Sopenharmony_ci bh->b_blocknr = pblock++; 23178c2ecf20Sopenharmony_ci } 23188c2ecf20Sopenharmony_ci clear_buffer_unwritten(bh); 23198c2ecf20Sopenharmony_ci io_end_size += (1 << blkbits); 23208c2ecf20Sopenharmony_ci } while (lblk++, (bh = bh->b_this_page) != head); 23218c2ecf20Sopenharmony_ci 23228c2ecf20Sopenharmony_ci io_end_vec->size += io_end_size; 23238c2ecf20Sopenharmony_ci io_end_size = 0; 23248c2ecf20Sopenharmony_ci *map_bh = false; 23258c2ecf20Sopenharmony_ciout: 23268c2ecf20Sopenharmony_ci *m_lblk = lblk; 23278c2ecf20Sopenharmony_ci *m_pblk = pblock; 23288c2ecf20Sopenharmony_ci return err; 23298c2ecf20Sopenharmony_ci} 23308c2ecf20Sopenharmony_ci 23318c2ecf20Sopenharmony_ci/* 23328c2ecf20Sopenharmony_ci * mpage_map_buffers - update buffers corresponding to changed extent and 23338c2ecf20Sopenharmony_ci * submit fully mapped pages for IO 23348c2ecf20Sopenharmony_ci * 23358c2ecf20Sopenharmony_ci * @mpd - description of extent to map, on return next extent to map 23368c2ecf20Sopenharmony_ci * 23378c2ecf20Sopenharmony_ci * Scan buffers corresponding to changed extent (we expect corresponding pages 23388c2ecf20Sopenharmony_ci * to be already locked) and update buffer state according to new extent state. 23398c2ecf20Sopenharmony_ci * We map delalloc buffers to their physical location, clear unwritten bits, 23408c2ecf20Sopenharmony_ci * and mark buffers as uninit when we perform writes to unwritten extents 23418c2ecf20Sopenharmony_ci * and do extent conversion after IO is finished. If the last page is not fully 23428c2ecf20Sopenharmony_ci * mapped, we update @map to the next extent in the last page that needs 23438c2ecf20Sopenharmony_ci * mapping. Otherwise we submit the page for IO. 23448c2ecf20Sopenharmony_ci */ 23458c2ecf20Sopenharmony_cistatic int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) 23468c2ecf20Sopenharmony_ci{ 23478c2ecf20Sopenharmony_ci struct pagevec pvec; 23488c2ecf20Sopenharmony_ci int nr_pages, i; 23498c2ecf20Sopenharmony_ci struct inode *inode = mpd->inode; 23508c2ecf20Sopenharmony_ci int bpp_bits = PAGE_SHIFT - inode->i_blkbits; 23518c2ecf20Sopenharmony_ci pgoff_t start, end; 23528c2ecf20Sopenharmony_ci ext4_lblk_t lblk; 23538c2ecf20Sopenharmony_ci ext4_fsblk_t pblock; 23548c2ecf20Sopenharmony_ci int err; 23558c2ecf20Sopenharmony_ci bool map_bh = false; 23568c2ecf20Sopenharmony_ci 23578c2ecf20Sopenharmony_ci start = mpd->map.m_lblk >> bpp_bits; 23588c2ecf20Sopenharmony_ci end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; 23598c2ecf20Sopenharmony_ci lblk = start << bpp_bits; 23608c2ecf20Sopenharmony_ci pblock = mpd->map.m_pblk; 23618c2ecf20Sopenharmony_ci 23628c2ecf20Sopenharmony_ci pagevec_init(&pvec); 23638c2ecf20Sopenharmony_ci while (start <= end) { 23648c2ecf20Sopenharmony_ci nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, 23658c2ecf20Sopenharmony_ci &start, end); 23668c2ecf20Sopenharmony_ci if (nr_pages == 0) 23678c2ecf20Sopenharmony_ci break; 23688c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 23698c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 23708c2ecf20Sopenharmony_ci 23718c2ecf20Sopenharmony_ci err = mpage_process_page(mpd, page, &lblk, &pblock, 23728c2ecf20Sopenharmony_ci &map_bh); 23738c2ecf20Sopenharmony_ci /* 23748c2ecf20Sopenharmony_ci * If map_bh is true, means page may require further bh 23758c2ecf20Sopenharmony_ci * mapping, or maybe the page was submitted for IO. 23768c2ecf20Sopenharmony_ci * So we return to call further extent mapping. 23778c2ecf20Sopenharmony_ci */ 23788c2ecf20Sopenharmony_ci if (err < 0 || map_bh) 23798c2ecf20Sopenharmony_ci goto out; 23808c2ecf20Sopenharmony_ci /* Page fully mapped - let IO run! */ 23818c2ecf20Sopenharmony_ci err = mpage_submit_page(mpd, page); 23828c2ecf20Sopenharmony_ci if (err < 0) 23838c2ecf20Sopenharmony_ci goto out; 23848c2ecf20Sopenharmony_ci } 23858c2ecf20Sopenharmony_ci pagevec_release(&pvec); 23868c2ecf20Sopenharmony_ci } 23878c2ecf20Sopenharmony_ci /* Extent fully mapped and matches with page boundary. We are done. */ 23888c2ecf20Sopenharmony_ci mpd->map.m_len = 0; 23898c2ecf20Sopenharmony_ci mpd->map.m_flags = 0; 23908c2ecf20Sopenharmony_ci return 0; 23918c2ecf20Sopenharmony_ciout: 23928c2ecf20Sopenharmony_ci pagevec_release(&pvec); 23938c2ecf20Sopenharmony_ci return err; 23948c2ecf20Sopenharmony_ci} 23958c2ecf20Sopenharmony_ci 23968c2ecf20Sopenharmony_cistatic int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) 23978c2ecf20Sopenharmony_ci{ 23988c2ecf20Sopenharmony_ci struct inode *inode = mpd->inode; 23998c2ecf20Sopenharmony_ci struct ext4_map_blocks *map = &mpd->map; 24008c2ecf20Sopenharmony_ci int get_blocks_flags; 24018c2ecf20Sopenharmony_ci int err, dioread_nolock; 24028c2ecf20Sopenharmony_ci 24038c2ecf20Sopenharmony_ci trace_ext4_da_write_pages_extent(inode, map); 24048c2ecf20Sopenharmony_ci /* 24058c2ecf20Sopenharmony_ci * Call ext4_map_blocks() to allocate any delayed allocation blocks, or 24068c2ecf20Sopenharmony_ci * to convert an unwritten extent to be initialized (in the case 24078c2ecf20Sopenharmony_ci * where we have written into one or more preallocated blocks). It is 24088c2ecf20Sopenharmony_ci * possible that we're going to need more metadata blocks than 24098c2ecf20Sopenharmony_ci * previously reserved. However we must not fail because we're in 24108c2ecf20Sopenharmony_ci * writeback and there is nothing we can do about it so it might result 24118c2ecf20Sopenharmony_ci * in data loss. So use reserved blocks to allocate metadata if 24128c2ecf20Sopenharmony_ci * possible. 24138c2ecf20Sopenharmony_ci * 24148c2ecf20Sopenharmony_ci * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if 24158c2ecf20Sopenharmony_ci * the blocks in question are delalloc blocks. This indicates 24168c2ecf20Sopenharmony_ci * that the blocks and quotas has already been checked when 24178c2ecf20Sopenharmony_ci * the data was copied into the page cache. 24188c2ecf20Sopenharmony_ci */ 24198c2ecf20Sopenharmony_ci get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 24208c2ecf20Sopenharmony_ci EXT4_GET_BLOCKS_METADATA_NOFAIL | 24218c2ecf20Sopenharmony_ci EXT4_GET_BLOCKS_IO_SUBMIT; 24228c2ecf20Sopenharmony_ci dioread_nolock = ext4_should_dioread_nolock(inode); 24238c2ecf20Sopenharmony_ci if (dioread_nolock) 24248c2ecf20Sopenharmony_ci get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; 24258c2ecf20Sopenharmony_ci if (map->m_flags & BIT(BH_Delay)) 24268c2ecf20Sopenharmony_ci get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 24278c2ecf20Sopenharmony_ci 24288c2ecf20Sopenharmony_ci err = ext4_map_blocks(handle, inode, map, get_blocks_flags); 24298c2ecf20Sopenharmony_ci if (err < 0) 24308c2ecf20Sopenharmony_ci return err; 24318c2ecf20Sopenharmony_ci if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { 24328c2ecf20Sopenharmony_ci if (!mpd->io_submit.io_end->handle && 24338c2ecf20Sopenharmony_ci ext4_handle_valid(handle)) { 24348c2ecf20Sopenharmony_ci mpd->io_submit.io_end->handle = handle->h_rsv_handle; 24358c2ecf20Sopenharmony_ci handle->h_rsv_handle = NULL; 24368c2ecf20Sopenharmony_ci } 24378c2ecf20Sopenharmony_ci ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); 24388c2ecf20Sopenharmony_ci } 24398c2ecf20Sopenharmony_ci 24408c2ecf20Sopenharmony_ci BUG_ON(map->m_len == 0); 24418c2ecf20Sopenharmony_ci return 0; 24428c2ecf20Sopenharmony_ci} 24438c2ecf20Sopenharmony_ci 24448c2ecf20Sopenharmony_ci/* 24458c2ecf20Sopenharmony_ci * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length 24468c2ecf20Sopenharmony_ci * mpd->len and submit pages underlying it for IO 24478c2ecf20Sopenharmony_ci * 24488c2ecf20Sopenharmony_ci * @handle - handle for journal operations 24498c2ecf20Sopenharmony_ci * @mpd - extent to map 24508c2ecf20Sopenharmony_ci * @give_up_on_write - we set this to true iff there is a fatal error and there 24518c2ecf20Sopenharmony_ci * is no hope of writing the data. The caller should discard 24528c2ecf20Sopenharmony_ci * dirty pages to avoid infinite loops. 24538c2ecf20Sopenharmony_ci * 24548c2ecf20Sopenharmony_ci * The function maps extent starting at mpd->lblk of length mpd->len. If it is 24558c2ecf20Sopenharmony_ci * delayed, blocks are allocated, if it is unwritten, we may need to convert 24568c2ecf20Sopenharmony_ci * them to initialized or split the described range from larger unwritten 24578c2ecf20Sopenharmony_ci * extent. Note that we need not map all the described range since allocation 24588c2ecf20Sopenharmony_ci * can return less blocks or the range is covered by more unwritten extents. We 24598c2ecf20Sopenharmony_ci * cannot map more because we are limited by reserved transaction credits. On 24608c2ecf20Sopenharmony_ci * the other hand we always make sure that the last touched page is fully 24618c2ecf20Sopenharmony_ci * mapped so that it can be written out (and thus forward progress is 24628c2ecf20Sopenharmony_ci * guaranteed). After mapping we submit all mapped pages for IO. 24638c2ecf20Sopenharmony_ci */ 24648c2ecf20Sopenharmony_cistatic int mpage_map_and_submit_extent(handle_t *handle, 24658c2ecf20Sopenharmony_ci struct mpage_da_data *mpd, 24668c2ecf20Sopenharmony_ci bool *give_up_on_write) 24678c2ecf20Sopenharmony_ci{ 24688c2ecf20Sopenharmony_ci struct inode *inode = mpd->inode; 24698c2ecf20Sopenharmony_ci struct ext4_map_blocks *map = &mpd->map; 24708c2ecf20Sopenharmony_ci int err; 24718c2ecf20Sopenharmony_ci loff_t disksize; 24728c2ecf20Sopenharmony_ci int progress = 0; 24738c2ecf20Sopenharmony_ci ext4_io_end_t *io_end = mpd->io_submit.io_end; 24748c2ecf20Sopenharmony_ci struct ext4_io_end_vec *io_end_vec; 24758c2ecf20Sopenharmony_ci 24768c2ecf20Sopenharmony_ci io_end_vec = ext4_alloc_io_end_vec(io_end); 24778c2ecf20Sopenharmony_ci if (IS_ERR(io_end_vec)) 24788c2ecf20Sopenharmony_ci return PTR_ERR(io_end_vec); 24798c2ecf20Sopenharmony_ci io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; 24808c2ecf20Sopenharmony_ci do { 24818c2ecf20Sopenharmony_ci err = mpage_map_one_extent(handle, mpd); 24828c2ecf20Sopenharmony_ci if (err < 0) { 24838c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 24848c2ecf20Sopenharmony_ci 24858c2ecf20Sopenharmony_ci if (ext4_forced_shutdown(EXT4_SB(sb)) || 24868c2ecf20Sopenharmony_ci ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) 24878c2ecf20Sopenharmony_ci goto invalidate_dirty_pages; 24888c2ecf20Sopenharmony_ci /* 24898c2ecf20Sopenharmony_ci * Let the uper layers retry transient errors. 24908c2ecf20Sopenharmony_ci * In the case of ENOSPC, if ext4_count_free_blocks() 24918c2ecf20Sopenharmony_ci * is non-zero, a commit should free up blocks. 24928c2ecf20Sopenharmony_ci */ 24938c2ecf20Sopenharmony_ci if ((err == -ENOMEM) || 24948c2ecf20Sopenharmony_ci (err == -ENOSPC && ext4_count_free_clusters(sb))) { 24958c2ecf20Sopenharmony_ci if (progress) 24968c2ecf20Sopenharmony_ci goto update_disksize; 24978c2ecf20Sopenharmony_ci return err; 24988c2ecf20Sopenharmony_ci } 24998c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, 25008c2ecf20Sopenharmony_ci "Delayed block allocation failed for " 25018c2ecf20Sopenharmony_ci "inode %lu at logical offset %llu with" 25028c2ecf20Sopenharmony_ci " max blocks %u with error %d", 25038c2ecf20Sopenharmony_ci inode->i_ino, 25048c2ecf20Sopenharmony_ci (unsigned long long)map->m_lblk, 25058c2ecf20Sopenharmony_ci (unsigned)map->m_len, -err); 25068c2ecf20Sopenharmony_ci ext4_msg(sb, KERN_CRIT, 25078c2ecf20Sopenharmony_ci "This should not happen!! Data will " 25088c2ecf20Sopenharmony_ci "be lost\n"); 25098c2ecf20Sopenharmony_ci if (err == -ENOSPC) 25108c2ecf20Sopenharmony_ci ext4_print_free_blocks(inode); 25118c2ecf20Sopenharmony_ci invalidate_dirty_pages: 25128c2ecf20Sopenharmony_ci *give_up_on_write = true; 25138c2ecf20Sopenharmony_ci return err; 25148c2ecf20Sopenharmony_ci } 25158c2ecf20Sopenharmony_ci progress = 1; 25168c2ecf20Sopenharmony_ci /* 25178c2ecf20Sopenharmony_ci * Update buffer state, submit mapped pages, and get us new 25188c2ecf20Sopenharmony_ci * extent to map 25198c2ecf20Sopenharmony_ci */ 25208c2ecf20Sopenharmony_ci err = mpage_map_and_submit_buffers(mpd); 25218c2ecf20Sopenharmony_ci if (err < 0) 25228c2ecf20Sopenharmony_ci goto update_disksize; 25238c2ecf20Sopenharmony_ci } while (map->m_len); 25248c2ecf20Sopenharmony_ci 25258c2ecf20Sopenharmony_ciupdate_disksize: 25268c2ecf20Sopenharmony_ci /* 25278c2ecf20Sopenharmony_ci * Update on-disk size after IO is submitted. Races with 25288c2ecf20Sopenharmony_ci * truncate are avoided by checking i_size under i_data_sem. 25298c2ecf20Sopenharmony_ci */ 25308c2ecf20Sopenharmony_ci disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; 25318c2ecf20Sopenharmony_ci if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { 25328c2ecf20Sopenharmony_ci int err2; 25338c2ecf20Sopenharmony_ci loff_t i_size; 25348c2ecf20Sopenharmony_ci 25358c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 25368c2ecf20Sopenharmony_ci i_size = i_size_read(inode); 25378c2ecf20Sopenharmony_ci if (disksize > i_size) 25388c2ecf20Sopenharmony_ci disksize = i_size; 25398c2ecf20Sopenharmony_ci if (disksize > EXT4_I(inode)->i_disksize) 25408c2ecf20Sopenharmony_ci EXT4_I(inode)->i_disksize = disksize; 25418c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 25428c2ecf20Sopenharmony_ci err2 = ext4_mark_inode_dirty(handle, inode); 25438c2ecf20Sopenharmony_ci if (err2) { 25448c2ecf20Sopenharmony_ci ext4_error_err(inode->i_sb, -err2, 25458c2ecf20Sopenharmony_ci "Failed to mark inode %lu dirty", 25468c2ecf20Sopenharmony_ci inode->i_ino); 25478c2ecf20Sopenharmony_ci } 25488c2ecf20Sopenharmony_ci if (!err) 25498c2ecf20Sopenharmony_ci err = err2; 25508c2ecf20Sopenharmony_ci } 25518c2ecf20Sopenharmony_ci return err; 25528c2ecf20Sopenharmony_ci} 25538c2ecf20Sopenharmony_ci 25548c2ecf20Sopenharmony_ci/* 25558c2ecf20Sopenharmony_ci * Calculate the total number of credits to reserve for one writepages 25568c2ecf20Sopenharmony_ci * iteration. This is called from ext4_writepages(). We map an extent of 25578c2ecf20Sopenharmony_ci * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 25588c2ecf20Sopenharmony_ci * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + 25598c2ecf20Sopenharmony_ci * bpp - 1 blocks in bpp different extents. 25608c2ecf20Sopenharmony_ci */ 25618c2ecf20Sopenharmony_cistatic int ext4_da_writepages_trans_blocks(struct inode *inode) 25628c2ecf20Sopenharmony_ci{ 25638c2ecf20Sopenharmony_ci int bpp = ext4_journal_blocks_per_page(inode); 25648c2ecf20Sopenharmony_ci 25658c2ecf20Sopenharmony_ci return ext4_meta_trans_blocks(inode, 25668c2ecf20Sopenharmony_ci MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); 25678c2ecf20Sopenharmony_ci} 25688c2ecf20Sopenharmony_ci 25698c2ecf20Sopenharmony_ci/* 25708c2ecf20Sopenharmony_ci * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages 25718c2ecf20Sopenharmony_ci * and underlying extent to map 25728c2ecf20Sopenharmony_ci * 25738c2ecf20Sopenharmony_ci * @mpd - where to look for pages 25748c2ecf20Sopenharmony_ci * 25758c2ecf20Sopenharmony_ci * Walk dirty pages in the mapping. If they are fully mapped, submit them for 25768c2ecf20Sopenharmony_ci * IO immediately. When we find a page which isn't mapped we start accumulating 25778c2ecf20Sopenharmony_ci * extent of buffers underlying these pages that needs mapping (formed by 25788c2ecf20Sopenharmony_ci * either delayed or unwritten buffers). We also lock the pages containing 25798c2ecf20Sopenharmony_ci * these buffers. The extent found is returned in @mpd structure (starting at 25808c2ecf20Sopenharmony_ci * mpd->lblk with length mpd->len blocks). 25818c2ecf20Sopenharmony_ci * 25828c2ecf20Sopenharmony_ci * Note that this function can attach bios to one io_end structure which are 25838c2ecf20Sopenharmony_ci * neither logically nor physically contiguous. Although it may seem as an 25848c2ecf20Sopenharmony_ci * unnecessary complication, it is actually inevitable in blocksize < pagesize 25858c2ecf20Sopenharmony_ci * case as we need to track IO to all buffers underlying a page in one io_end. 25868c2ecf20Sopenharmony_ci */ 25878c2ecf20Sopenharmony_cistatic int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) 25888c2ecf20Sopenharmony_ci{ 25898c2ecf20Sopenharmony_ci struct address_space *mapping = mpd->inode->i_mapping; 25908c2ecf20Sopenharmony_ci struct pagevec pvec; 25918c2ecf20Sopenharmony_ci unsigned int nr_pages; 25928c2ecf20Sopenharmony_ci long left = mpd->wbc->nr_to_write; 25938c2ecf20Sopenharmony_ci pgoff_t index = mpd->first_page; 25948c2ecf20Sopenharmony_ci pgoff_t end = mpd->last_page; 25958c2ecf20Sopenharmony_ci xa_mark_t tag; 25968c2ecf20Sopenharmony_ci int i, err = 0; 25978c2ecf20Sopenharmony_ci int blkbits = mpd->inode->i_blkbits; 25988c2ecf20Sopenharmony_ci ext4_lblk_t lblk; 25998c2ecf20Sopenharmony_ci struct buffer_head *head; 26008c2ecf20Sopenharmony_ci 26018c2ecf20Sopenharmony_ci if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) 26028c2ecf20Sopenharmony_ci tag = PAGECACHE_TAG_TOWRITE; 26038c2ecf20Sopenharmony_ci else 26048c2ecf20Sopenharmony_ci tag = PAGECACHE_TAG_DIRTY; 26058c2ecf20Sopenharmony_ci 26068c2ecf20Sopenharmony_ci pagevec_init(&pvec); 26078c2ecf20Sopenharmony_ci mpd->map.m_len = 0; 26088c2ecf20Sopenharmony_ci mpd->next_page = index; 26098c2ecf20Sopenharmony_ci while (index <= end) { 26108c2ecf20Sopenharmony_ci nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, 26118c2ecf20Sopenharmony_ci tag); 26128c2ecf20Sopenharmony_ci if (nr_pages == 0) 26138c2ecf20Sopenharmony_ci break; 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci for (i = 0; i < nr_pages; i++) { 26168c2ecf20Sopenharmony_ci struct page *page = pvec.pages[i]; 26178c2ecf20Sopenharmony_ci 26188c2ecf20Sopenharmony_ci /* 26198c2ecf20Sopenharmony_ci * Accumulated enough dirty pages? This doesn't apply 26208c2ecf20Sopenharmony_ci * to WB_SYNC_ALL mode. For integrity sync we have to 26218c2ecf20Sopenharmony_ci * keep going because someone may be concurrently 26228c2ecf20Sopenharmony_ci * dirtying pages, and we might have synced a lot of 26238c2ecf20Sopenharmony_ci * newly appeared dirty pages, but have not synced all 26248c2ecf20Sopenharmony_ci * of the old dirty pages. 26258c2ecf20Sopenharmony_ci */ 26268c2ecf20Sopenharmony_ci if (mpd->wbc->sync_mode == WB_SYNC_NONE && left <= 0) 26278c2ecf20Sopenharmony_ci goto out; 26288c2ecf20Sopenharmony_ci 26298c2ecf20Sopenharmony_ci /* If we can't merge this page, we are done. */ 26308c2ecf20Sopenharmony_ci if (mpd->map.m_len > 0 && mpd->next_page != page->index) 26318c2ecf20Sopenharmony_ci goto out; 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_ci lock_page(page); 26348c2ecf20Sopenharmony_ci /* 26358c2ecf20Sopenharmony_ci * If the page is no longer dirty, or its mapping no 26368c2ecf20Sopenharmony_ci * longer corresponds to inode we are writing (which 26378c2ecf20Sopenharmony_ci * means it has been truncated or invalidated), or the 26388c2ecf20Sopenharmony_ci * page is already under writeback and we are not doing 26398c2ecf20Sopenharmony_ci * a data integrity writeback, skip the page 26408c2ecf20Sopenharmony_ci */ 26418c2ecf20Sopenharmony_ci if (!PageDirty(page) || 26428c2ecf20Sopenharmony_ci (PageWriteback(page) && 26438c2ecf20Sopenharmony_ci (mpd->wbc->sync_mode == WB_SYNC_NONE)) || 26448c2ecf20Sopenharmony_ci unlikely(page->mapping != mapping)) { 26458c2ecf20Sopenharmony_ci unlock_page(page); 26468c2ecf20Sopenharmony_ci continue; 26478c2ecf20Sopenharmony_ci } 26488c2ecf20Sopenharmony_ci 26498c2ecf20Sopenharmony_ci if (WARN_ON(!page_has_buffers(page))) { 26508c2ecf20Sopenharmony_ci cancel_page_dirty_status(page); 26518c2ecf20Sopenharmony_ci unlock_page(page); 26528c2ecf20Sopenharmony_ci continue; 26538c2ecf20Sopenharmony_ci } 26548c2ecf20Sopenharmony_ci 26558c2ecf20Sopenharmony_ci wait_on_page_writeback(page); 26568c2ecf20Sopenharmony_ci BUG_ON(PageWriteback(page)); 26578c2ecf20Sopenharmony_ci 26588c2ecf20Sopenharmony_ci /* 26598c2ecf20Sopenharmony_ci * Should never happen but for buggy code in 26608c2ecf20Sopenharmony_ci * other subsystems that call 26618c2ecf20Sopenharmony_ci * set_page_dirty() without properly warning 26628c2ecf20Sopenharmony_ci * the file system first. See [1] for more 26638c2ecf20Sopenharmony_ci * information. 26648c2ecf20Sopenharmony_ci * 26658c2ecf20Sopenharmony_ci * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz 26668c2ecf20Sopenharmony_ci */ 26678c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) { 26688c2ecf20Sopenharmony_ci ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); 26698c2ecf20Sopenharmony_ci ClearPageDirty(page); 26708c2ecf20Sopenharmony_ci unlock_page(page); 26718c2ecf20Sopenharmony_ci continue; 26728c2ecf20Sopenharmony_ci } 26738c2ecf20Sopenharmony_ci 26748c2ecf20Sopenharmony_ci if (mpd->map.m_len == 0) 26758c2ecf20Sopenharmony_ci mpd->first_page = page->index; 26768c2ecf20Sopenharmony_ci mpd->next_page = page->index + 1; 26778c2ecf20Sopenharmony_ci /* Add all dirty buffers to mpd */ 26788c2ecf20Sopenharmony_ci lblk = ((ext4_lblk_t)page->index) << 26798c2ecf20Sopenharmony_ci (PAGE_SHIFT - blkbits); 26808c2ecf20Sopenharmony_ci head = page_buffers(page); 26818c2ecf20Sopenharmony_ci err = mpage_process_page_bufs(mpd, head, head, lblk); 26828c2ecf20Sopenharmony_ci if (err <= 0) 26838c2ecf20Sopenharmony_ci goto out; 26848c2ecf20Sopenharmony_ci err = 0; 26858c2ecf20Sopenharmony_ci left--; 26868c2ecf20Sopenharmony_ci } 26878c2ecf20Sopenharmony_ci pagevec_release(&pvec); 26888c2ecf20Sopenharmony_ci cond_resched(); 26898c2ecf20Sopenharmony_ci } 26908c2ecf20Sopenharmony_ci mpd->scanned_until_end = 1; 26918c2ecf20Sopenharmony_ci return 0; 26928c2ecf20Sopenharmony_ciout: 26938c2ecf20Sopenharmony_ci pagevec_release(&pvec); 26948c2ecf20Sopenharmony_ci return err; 26958c2ecf20Sopenharmony_ci} 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_cistatic int ext4_writepages(struct address_space *mapping, 26988c2ecf20Sopenharmony_ci struct writeback_control *wbc) 26998c2ecf20Sopenharmony_ci{ 27008c2ecf20Sopenharmony_ci pgoff_t writeback_index = 0; 27018c2ecf20Sopenharmony_ci long nr_to_write = wbc->nr_to_write; 27028c2ecf20Sopenharmony_ci int range_whole = 0; 27038c2ecf20Sopenharmony_ci int cycled = 1; 27048c2ecf20Sopenharmony_ci handle_t *handle = NULL; 27058c2ecf20Sopenharmony_ci struct mpage_da_data mpd; 27068c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 27078c2ecf20Sopenharmony_ci int needed_blocks, rsv_blocks = 0, ret = 0; 27088c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 27098c2ecf20Sopenharmony_ci struct blk_plug plug; 27108c2ecf20Sopenharmony_ci bool give_up_on_write = false; 27118c2ecf20Sopenharmony_ci 27128c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 27138c2ecf20Sopenharmony_ci return -EIO; 27148c2ecf20Sopenharmony_ci 27158c2ecf20Sopenharmony_ci percpu_down_read(&sbi->s_writepages_rwsem); 27168c2ecf20Sopenharmony_ci trace_ext4_writepages(inode, wbc); 27178c2ecf20Sopenharmony_ci 27188c2ecf20Sopenharmony_ci /* 27198c2ecf20Sopenharmony_ci * No pages to write? This is mainly a kludge to avoid starting 27208c2ecf20Sopenharmony_ci * a transaction for special inodes like journal inode on last iput() 27218c2ecf20Sopenharmony_ci * because that could violate lock ordering on umount 27228c2ecf20Sopenharmony_ci */ 27238c2ecf20Sopenharmony_ci if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 27248c2ecf20Sopenharmony_ci goto out_writepages; 27258c2ecf20Sopenharmony_ci 27268c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) { 27278c2ecf20Sopenharmony_ci ret = generic_writepages(mapping, wbc); 27288c2ecf20Sopenharmony_ci goto out_writepages; 27298c2ecf20Sopenharmony_ci } 27308c2ecf20Sopenharmony_ci 27318c2ecf20Sopenharmony_ci /* 27328c2ecf20Sopenharmony_ci * If the filesystem has aborted, it is read-only, so return 27338c2ecf20Sopenharmony_ci * right away instead of dumping stack traces later on that 27348c2ecf20Sopenharmony_ci * will obscure the real source of the problem. We test 27358c2ecf20Sopenharmony_ci * EXT4_MF_FS_ABORTED instead of sb->s_flag's SB_RDONLY because 27368c2ecf20Sopenharmony_ci * the latter could be true if the filesystem is mounted 27378c2ecf20Sopenharmony_ci * read-only, and in that case, ext4_writepages should 27388c2ecf20Sopenharmony_ci * *never* be called, so if that ever happens, we would want 27398c2ecf20Sopenharmony_ci * the stack trace. 27408c2ecf20Sopenharmony_ci */ 27418c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(mapping->host->i_sb)) || 27428c2ecf20Sopenharmony_ci ext4_test_mount_flag(inode->i_sb, EXT4_MF_FS_ABORTED))) { 27438c2ecf20Sopenharmony_ci ret = -EROFS; 27448c2ecf20Sopenharmony_ci goto out_writepages; 27458c2ecf20Sopenharmony_ci } 27468c2ecf20Sopenharmony_ci 27478c2ecf20Sopenharmony_ci /* 27488c2ecf20Sopenharmony_ci * If we have inline data and arrive here, it means that 27498c2ecf20Sopenharmony_ci * we will soon create the block for the 1st page, so 27508c2ecf20Sopenharmony_ci * we'd better clear the inline data here. 27518c2ecf20Sopenharmony_ci */ 27528c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) { 27538c2ecf20Sopenharmony_ci /* Just inode will be modified... */ 27548c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 27558c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 27568c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 27578c2ecf20Sopenharmony_ci goto out_writepages; 27588c2ecf20Sopenharmony_ci } 27598c2ecf20Sopenharmony_ci BUG_ON(ext4_test_inode_state(inode, 27608c2ecf20Sopenharmony_ci EXT4_STATE_MAY_INLINE_DATA)); 27618c2ecf20Sopenharmony_ci ext4_destroy_inline_data(handle, inode); 27628c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 27638c2ecf20Sopenharmony_ci } 27648c2ecf20Sopenharmony_ci 27658c2ecf20Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) { 27668c2ecf20Sopenharmony_ci /* 27678c2ecf20Sopenharmony_ci * We may need to convert up to one extent per block in 27688c2ecf20Sopenharmony_ci * the page and we may dirty the inode. 27698c2ecf20Sopenharmony_ci */ 27708c2ecf20Sopenharmony_ci rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, 27718c2ecf20Sopenharmony_ci PAGE_SIZE >> inode->i_blkbits); 27728c2ecf20Sopenharmony_ci } 27738c2ecf20Sopenharmony_ci 27748c2ecf20Sopenharmony_ci if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 27758c2ecf20Sopenharmony_ci range_whole = 1; 27768c2ecf20Sopenharmony_ci 27778c2ecf20Sopenharmony_ci if (wbc->range_cyclic) { 27788c2ecf20Sopenharmony_ci writeback_index = mapping->writeback_index; 27798c2ecf20Sopenharmony_ci if (writeback_index) 27808c2ecf20Sopenharmony_ci cycled = 0; 27818c2ecf20Sopenharmony_ci mpd.first_page = writeback_index; 27828c2ecf20Sopenharmony_ci mpd.last_page = -1; 27838c2ecf20Sopenharmony_ci } else { 27848c2ecf20Sopenharmony_ci mpd.first_page = wbc->range_start >> PAGE_SHIFT; 27858c2ecf20Sopenharmony_ci mpd.last_page = wbc->range_end >> PAGE_SHIFT; 27868c2ecf20Sopenharmony_ci } 27878c2ecf20Sopenharmony_ci 27888c2ecf20Sopenharmony_ci mpd.inode = inode; 27898c2ecf20Sopenharmony_ci mpd.wbc = wbc; 27908c2ecf20Sopenharmony_ci ext4_io_submit_init(&mpd.io_submit, wbc); 27918c2ecf20Sopenharmony_ciretry: 27928c2ecf20Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 27938c2ecf20Sopenharmony_ci tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); 27948c2ecf20Sopenharmony_ci blk_start_plug(&plug); 27958c2ecf20Sopenharmony_ci 27968c2ecf20Sopenharmony_ci /* 27978c2ecf20Sopenharmony_ci * First writeback pages that don't need mapping - we can avoid 27988c2ecf20Sopenharmony_ci * starting a transaction unnecessarily and also avoid being blocked 27998c2ecf20Sopenharmony_ci * in the block layer on device congestion while having transaction 28008c2ecf20Sopenharmony_ci * started. 28018c2ecf20Sopenharmony_ci */ 28028c2ecf20Sopenharmony_ci mpd.do_map = 0; 28038c2ecf20Sopenharmony_ci mpd.scanned_until_end = 0; 28048c2ecf20Sopenharmony_ci mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); 28058c2ecf20Sopenharmony_ci if (!mpd.io_submit.io_end) { 28068c2ecf20Sopenharmony_ci ret = -ENOMEM; 28078c2ecf20Sopenharmony_ci goto unplug; 28088c2ecf20Sopenharmony_ci } 28098c2ecf20Sopenharmony_ci ret = mpage_prepare_extent_to_map(&mpd); 28108c2ecf20Sopenharmony_ci /* Unlock pages we didn't use */ 28118c2ecf20Sopenharmony_ci mpage_release_unused_pages(&mpd, false); 28128c2ecf20Sopenharmony_ci /* Submit prepared bio */ 28138c2ecf20Sopenharmony_ci ext4_io_submit(&mpd.io_submit); 28148c2ecf20Sopenharmony_ci ext4_put_io_end_defer(mpd.io_submit.io_end); 28158c2ecf20Sopenharmony_ci mpd.io_submit.io_end = NULL; 28168c2ecf20Sopenharmony_ci if (ret < 0) 28178c2ecf20Sopenharmony_ci goto unplug; 28188c2ecf20Sopenharmony_ci 28198c2ecf20Sopenharmony_ci while (!mpd.scanned_until_end && wbc->nr_to_write > 0) { 28208c2ecf20Sopenharmony_ci /* For each extent of pages we use new io_end */ 28218c2ecf20Sopenharmony_ci mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); 28228c2ecf20Sopenharmony_ci if (!mpd.io_submit.io_end) { 28238c2ecf20Sopenharmony_ci ret = -ENOMEM; 28248c2ecf20Sopenharmony_ci break; 28258c2ecf20Sopenharmony_ci } 28268c2ecf20Sopenharmony_ci 28278c2ecf20Sopenharmony_ci /* 28288c2ecf20Sopenharmony_ci * We have two constraints: We find one extent to map and we 28298c2ecf20Sopenharmony_ci * must always write out whole page (makes a difference when 28308c2ecf20Sopenharmony_ci * blocksize < pagesize) so that we don't block on IO when we 28318c2ecf20Sopenharmony_ci * try to write out the rest of the page. Journalled mode is 28328c2ecf20Sopenharmony_ci * not supported by delalloc. 28338c2ecf20Sopenharmony_ci */ 28348c2ecf20Sopenharmony_ci BUG_ON(ext4_should_journal_data(inode)); 28358c2ecf20Sopenharmony_ci needed_blocks = ext4_da_writepages_trans_blocks(inode); 28368c2ecf20Sopenharmony_ci 28378c2ecf20Sopenharmony_ci /* start a new transaction */ 28388c2ecf20Sopenharmony_ci handle = ext4_journal_start_with_reserve(inode, 28398c2ecf20Sopenharmony_ci EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); 28408c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 28418c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 28428c2ecf20Sopenharmony_ci ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 28438c2ecf20Sopenharmony_ci "%ld pages, ino %lu; err %d", __func__, 28448c2ecf20Sopenharmony_ci wbc->nr_to_write, inode->i_ino, ret); 28458c2ecf20Sopenharmony_ci /* Release allocated io_end */ 28468c2ecf20Sopenharmony_ci ext4_put_io_end(mpd.io_submit.io_end); 28478c2ecf20Sopenharmony_ci mpd.io_submit.io_end = NULL; 28488c2ecf20Sopenharmony_ci break; 28498c2ecf20Sopenharmony_ci } 28508c2ecf20Sopenharmony_ci mpd.do_map = 1; 28518c2ecf20Sopenharmony_ci 28528c2ecf20Sopenharmony_ci trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); 28538c2ecf20Sopenharmony_ci ret = mpage_prepare_extent_to_map(&mpd); 28548c2ecf20Sopenharmony_ci if (!ret && mpd.map.m_len) 28558c2ecf20Sopenharmony_ci ret = mpage_map_and_submit_extent(handle, &mpd, 28568c2ecf20Sopenharmony_ci &give_up_on_write); 28578c2ecf20Sopenharmony_ci /* 28588c2ecf20Sopenharmony_ci * Caution: If the handle is synchronous, 28598c2ecf20Sopenharmony_ci * ext4_journal_stop() can wait for transaction commit 28608c2ecf20Sopenharmony_ci * to finish which may depend on writeback of pages to 28618c2ecf20Sopenharmony_ci * complete or on page lock to be released. In that 28628c2ecf20Sopenharmony_ci * case, we have to wait until after we have 28638c2ecf20Sopenharmony_ci * submitted all the IO, released page locks we hold, 28648c2ecf20Sopenharmony_ci * and dropped io_end reference (for extent conversion 28658c2ecf20Sopenharmony_ci * to be able to complete) before stopping the handle. 28668c2ecf20Sopenharmony_ci */ 28678c2ecf20Sopenharmony_ci if (!ext4_handle_valid(handle) || handle->h_sync == 0) { 28688c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 28698c2ecf20Sopenharmony_ci handle = NULL; 28708c2ecf20Sopenharmony_ci mpd.do_map = 0; 28718c2ecf20Sopenharmony_ci } 28728c2ecf20Sopenharmony_ci /* Unlock pages we didn't use */ 28738c2ecf20Sopenharmony_ci mpage_release_unused_pages(&mpd, give_up_on_write); 28748c2ecf20Sopenharmony_ci /* Submit prepared bio */ 28758c2ecf20Sopenharmony_ci ext4_io_submit(&mpd.io_submit); 28768c2ecf20Sopenharmony_ci 28778c2ecf20Sopenharmony_ci /* 28788c2ecf20Sopenharmony_ci * Drop our io_end reference we got from init. We have 28798c2ecf20Sopenharmony_ci * to be careful and use deferred io_end finishing if 28808c2ecf20Sopenharmony_ci * we are still holding the transaction as we can 28818c2ecf20Sopenharmony_ci * release the last reference to io_end which may end 28828c2ecf20Sopenharmony_ci * up doing unwritten extent conversion. 28838c2ecf20Sopenharmony_ci */ 28848c2ecf20Sopenharmony_ci if (handle) { 28858c2ecf20Sopenharmony_ci ext4_put_io_end_defer(mpd.io_submit.io_end); 28868c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 28878c2ecf20Sopenharmony_ci } else 28888c2ecf20Sopenharmony_ci ext4_put_io_end(mpd.io_submit.io_end); 28898c2ecf20Sopenharmony_ci mpd.io_submit.io_end = NULL; 28908c2ecf20Sopenharmony_ci 28918c2ecf20Sopenharmony_ci if (ret == -ENOSPC && sbi->s_journal) { 28928c2ecf20Sopenharmony_ci /* 28938c2ecf20Sopenharmony_ci * Commit the transaction which would 28948c2ecf20Sopenharmony_ci * free blocks released in the transaction 28958c2ecf20Sopenharmony_ci * and try again 28968c2ecf20Sopenharmony_ci */ 28978c2ecf20Sopenharmony_ci jbd2_journal_force_commit_nested(sbi->s_journal); 28988c2ecf20Sopenharmony_ci ret = 0; 28998c2ecf20Sopenharmony_ci continue; 29008c2ecf20Sopenharmony_ci } 29018c2ecf20Sopenharmony_ci /* Fatal error - ENOMEM, EIO... */ 29028c2ecf20Sopenharmony_ci if (ret) 29038c2ecf20Sopenharmony_ci break; 29048c2ecf20Sopenharmony_ci } 29058c2ecf20Sopenharmony_ciunplug: 29068c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 29078c2ecf20Sopenharmony_ci if (!ret && !cycled && wbc->nr_to_write > 0) { 29088c2ecf20Sopenharmony_ci cycled = 1; 29098c2ecf20Sopenharmony_ci mpd.last_page = writeback_index - 1; 29108c2ecf20Sopenharmony_ci mpd.first_page = 0; 29118c2ecf20Sopenharmony_ci goto retry; 29128c2ecf20Sopenharmony_ci } 29138c2ecf20Sopenharmony_ci 29148c2ecf20Sopenharmony_ci /* Update index */ 29158c2ecf20Sopenharmony_ci if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 29168c2ecf20Sopenharmony_ci /* 29178c2ecf20Sopenharmony_ci * Set the writeback_index so that range_cyclic 29188c2ecf20Sopenharmony_ci * mode will write it back later 29198c2ecf20Sopenharmony_ci */ 29208c2ecf20Sopenharmony_ci mapping->writeback_index = mpd.first_page; 29218c2ecf20Sopenharmony_ci 29228c2ecf20Sopenharmony_ciout_writepages: 29238c2ecf20Sopenharmony_ci trace_ext4_writepages_result(inode, wbc, ret, 29248c2ecf20Sopenharmony_ci nr_to_write - wbc->nr_to_write); 29258c2ecf20Sopenharmony_ci percpu_up_read(&sbi->s_writepages_rwsem); 29268c2ecf20Sopenharmony_ci return ret; 29278c2ecf20Sopenharmony_ci} 29288c2ecf20Sopenharmony_ci 29298c2ecf20Sopenharmony_cistatic int ext4_dax_writepages(struct address_space *mapping, 29308c2ecf20Sopenharmony_ci struct writeback_control *wbc) 29318c2ecf20Sopenharmony_ci{ 29328c2ecf20Sopenharmony_ci int ret; 29338c2ecf20Sopenharmony_ci long nr_to_write = wbc->nr_to_write; 29348c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 29358c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 29368c2ecf20Sopenharmony_ci 29378c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 29388c2ecf20Sopenharmony_ci return -EIO; 29398c2ecf20Sopenharmony_ci 29408c2ecf20Sopenharmony_ci percpu_down_read(&sbi->s_writepages_rwsem); 29418c2ecf20Sopenharmony_ci trace_ext4_writepages(inode, wbc); 29428c2ecf20Sopenharmony_ci 29438c2ecf20Sopenharmony_ci ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc); 29448c2ecf20Sopenharmony_ci trace_ext4_writepages_result(inode, wbc, ret, 29458c2ecf20Sopenharmony_ci nr_to_write - wbc->nr_to_write); 29468c2ecf20Sopenharmony_ci percpu_up_read(&sbi->s_writepages_rwsem); 29478c2ecf20Sopenharmony_ci return ret; 29488c2ecf20Sopenharmony_ci} 29498c2ecf20Sopenharmony_ci 29508c2ecf20Sopenharmony_cistatic int ext4_nonda_switch(struct super_block *sb) 29518c2ecf20Sopenharmony_ci{ 29528c2ecf20Sopenharmony_ci s64 free_clusters, dirty_clusters; 29538c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 29548c2ecf20Sopenharmony_ci 29558c2ecf20Sopenharmony_ci /* 29568c2ecf20Sopenharmony_ci * switch to non delalloc mode if we are running low 29578c2ecf20Sopenharmony_ci * on free block. The free block accounting via percpu 29588c2ecf20Sopenharmony_ci * counters can get slightly wrong with percpu_counter_batch getting 29598c2ecf20Sopenharmony_ci * accumulated on each CPU without updating global counters 29608c2ecf20Sopenharmony_ci * Delalloc need an accurate free block accounting. So switch 29618c2ecf20Sopenharmony_ci * to non delalloc when we are near to error range. 29628c2ecf20Sopenharmony_ci */ 29638c2ecf20Sopenharmony_ci free_clusters = 29648c2ecf20Sopenharmony_ci percpu_counter_read_positive(&sbi->s_freeclusters_counter); 29658c2ecf20Sopenharmony_ci dirty_clusters = 29668c2ecf20Sopenharmony_ci percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); 29678c2ecf20Sopenharmony_ci /* 29688c2ecf20Sopenharmony_ci * Start pushing delalloc when 1/2 of free blocks are dirty. 29698c2ecf20Sopenharmony_ci */ 29708c2ecf20Sopenharmony_ci if (dirty_clusters && (free_clusters < 2 * dirty_clusters)) 29718c2ecf20Sopenharmony_ci try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); 29728c2ecf20Sopenharmony_ci 29738c2ecf20Sopenharmony_ci if (2 * free_clusters < 3 * dirty_clusters || 29748c2ecf20Sopenharmony_ci free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) { 29758c2ecf20Sopenharmony_ci /* 29768c2ecf20Sopenharmony_ci * free block count is less than 150% of dirty blocks 29778c2ecf20Sopenharmony_ci * or free blocks is less than watermark 29788c2ecf20Sopenharmony_ci */ 29798c2ecf20Sopenharmony_ci return 1; 29808c2ecf20Sopenharmony_ci } 29818c2ecf20Sopenharmony_ci return 0; 29828c2ecf20Sopenharmony_ci} 29838c2ecf20Sopenharmony_ci 29848c2ecf20Sopenharmony_cistatic int ext4_da_write_begin(struct file *file, struct address_space *mapping, 29858c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned flags, 29868c2ecf20Sopenharmony_ci struct page **pagep, void **fsdata) 29878c2ecf20Sopenharmony_ci{ 29888c2ecf20Sopenharmony_ci int ret, retries = 0; 29898c2ecf20Sopenharmony_ci struct page *page; 29908c2ecf20Sopenharmony_ci pgoff_t index; 29918c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 29928c2ecf20Sopenharmony_ci 29938c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 29948c2ecf20Sopenharmony_ci return -EIO; 29958c2ecf20Sopenharmony_ci 29968c2ecf20Sopenharmony_ci index = pos >> PAGE_SHIFT; 29978c2ecf20Sopenharmony_ci 29988c2ecf20Sopenharmony_ci if (ext4_nonda_switch(inode->i_sb) || S_ISLNK(inode->i_mode) || 29998c2ecf20Sopenharmony_ci ext4_verity_in_progress(inode)) { 30008c2ecf20Sopenharmony_ci *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 30018c2ecf20Sopenharmony_ci return ext4_write_begin(file, mapping, pos, 30028c2ecf20Sopenharmony_ci len, flags, pagep, fsdata); 30038c2ecf20Sopenharmony_ci } 30048c2ecf20Sopenharmony_ci *fsdata = (void *)0; 30058c2ecf20Sopenharmony_ci trace_ext4_da_write_begin(inode, pos, len, flags); 30068c2ecf20Sopenharmony_ci 30078c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 30088c2ecf20Sopenharmony_ci ret = ext4_da_write_inline_data_begin(mapping, inode, 30098c2ecf20Sopenharmony_ci pos, len, flags, 30108c2ecf20Sopenharmony_ci pagep, fsdata); 30118c2ecf20Sopenharmony_ci if (ret < 0) 30128c2ecf20Sopenharmony_ci return ret; 30138c2ecf20Sopenharmony_ci if (ret == 1) 30148c2ecf20Sopenharmony_ci return 0; 30158c2ecf20Sopenharmony_ci } 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_ciretry: 30188c2ecf20Sopenharmony_ci page = grab_cache_page_write_begin(mapping, index, flags); 30198c2ecf20Sopenharmony_ci if (!page) 30208c2ecf20Sopenharmony_ci return -ENOMEM; 30218c2ecf20Sopenharmony_ci 30228c2ecf20Sopenharmony_ci /* In case writeback began while the page was unlocked */ 30238c2ecf20Sopenharmony_ci wait_for_stable_page(page); 30248c2ecf20Sopenharmony_ci 30258c2ecf20Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 30268c2ecf20Sopenharmony_ci ret = ext4_block_write_begin(page, pos, len, 30278c2ecf20Sopenharmony_ci ext4_da_get_block_prep); 30288c2ecf20Sopenharmony_ci#else 30298c2ecf20Sopenharmony_ci ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); 30308c2ecf20Sopenharmony_ci#endif 30318c2ecf20Sopenharmony_ci if (ret < 0) { 30328c2ecf20Sopenharmony_ci unlock_page(page); 30338c2ecf20Sopenharmony_ci put_page(page); 30348c2ecf20Sopenharmony_ci /* 30358c2ecf20Sopenharmony_ci * block_write_begin may have instantiated a few blocks 30368c2ecf20Sopenharmony_ci * outside i_size. Trim these off again. Don't need 30378c2ecf20Sopenharmony_ci * i_size_read because we hold inode lock. 30388c2ecf20Sopenharmony_ci */ 30398c2ecf20Sopenharmony_ci if (pos + len > inode->i_size) 30408c2ecf20Sopenharmony_ci ext4_truncate_failed_write(inode); 30418c2ecf20Sopenharmony_ci 30428c2ecf20Sopenharmony_ci if (ret == -ENOSPC && 30438c2ecf20Sopenharmony_ci ext4_should_retry_alloc(inode->i_sb, &retries)) 30448c2ecf20Sopenharmony_ci goto retry; 30458c2ecf20Sopenharmony_ci return ret; 30468c2ecf20Sopenharmony_ci } 30478c2ecf20Sopenharmony_ci 30488c2ecf20Sopenharmony_ci *pagep = page; 30498c2ecf20Sopenharmony_ci return ret; 30508c2ecf20Sopenharmony_ci} 30518c2ecf20Sopenharmony_ci 30528c2ecf20Sopenharmony_ci/* 30538c2ecf20Sopenharmony_ci * Check if we should update i_disksize 30548c2ecf20Sopenharmony_ci * when write to the end of file but not require block allocation 30558c2ecf20Sopenharmony_ci */ 30568c2ecf20Sopenharmony_cistatic int ext4_da_should_update_i_disksize(struct page *page, 30578c2ecf20Sopenharmony_ci unsigned long offset) 30588c2ecf20Sopenharmony_ci{ 30598c2ecf20Sopenharmony_ci struct buffer_head *bh; 30608c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 30618c2ecf20Sopenharmony_ci unsigned int idx; 30628c2ecf20Sopenharmony_ci int i; 30638c2ecf20Sopenharmony_ci 30648c2ecf20Sopenharmony_ci bh = page_buffers(page); 30658c2ecf20Sopenharmony_ci idx = offset >> inode->i_blkbits; 30668c2ecf20Sopenharmony_ci 30678c2ecf20Sopenharmony_ci for (i = 0; i < idx; i++) 30688c2ecf20Sopenharmony_ci bh = bh->b_this_page; 30698c2ecf20Sopenharmony_ci 30708c2ecf20Sopenharmony_ci if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) 30718c2ecf20Sopenharmony_ci return 0; 30728c2ecf20Sopenharmony_ci return 1; 30738c2ecf20Sopenharmony_ci} 30748c2ecf20Sopenharmony_ci 30758c2ecf20Sopenharmony_cistatic int ext4_da_write_end(struct file *file, 30768c2ecf20Sopenharmony_ci struct address_space *mapping, 30778c2ecf20Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 30788c2ecf20Sopenharmony_ci struct page *page, void *fsdata) 30798c2ecf20Sopenharmony_ci{ 30808c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 30818c2ecf20Sopenharmony_ci loff_t new_i_size; 30828c2ecf20Sopenharmony_ci unsigned long start, end; 30838c2ecf20Sopenharmony_ci int write_mode = (int)(unsigned long)fsdata; 30848c2ecf20Sopenharmony_ci 30858c2ecf20Sopenharmony_ci if (write_mode == FALL_BACK_TO_NONDELALLOC) 30868c2ecf20Sopenharmony_ci return ext4_write_end(file, mapping, pos, 30878c2ecf20Sopenharmony_ci len, copied, page, fsdata); 30888c2ecf20Sopenharmony_ci 30898c2ecf20Sopenharmony_ci trace_ext4_da_write_end(inode, pos, len, copied); 30908c2ecf20Sopenharmony_ci 30918c2ecf20Sopenharmony_ci if (write_mode != CONVERT_INLINE_DATA && 30928c2ecf20Sopenharmony_ci ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && 30938c2ecf20Sopenharmony_ci ext4_has_inline_data(inode)) 30948c2ecf20Sopenharmony_ci return ext4_write_inline_data_end(inode, pos, len, copied, page); 30958c2ecf20Sopenharmony_ci 30968c2ecf20Sopenharmony_ci start = pos & (PAGE_SIZE - 1); 30978c2ecf20Sopenharmony_ci end = start + copied - 1; 30988c2ecf20Sopenharmony_ci 30998c2ecf20Sopenharmony_ci /* 31008c2ecf20Sopenharmony_ci * Since we are holding inode lock, we are sure i_disksize <= 31018c2ecf20Sopenharmony_ci * i_size. We also know that if i_disksize < i_size, there are 31028c2ecf20Sopenharmony_ci * delalloc writes pending in the range upto i_size. If the end of 31038c2ecf20Sopenharmony_ci * the current write is <= i_size, there's no need to touch 31048c2ecf20Sopenharmony_ci * i_disksize since writeback will push i_disksize upto i_size 31058c2ecf20Sopenharmony_ci * eventually. If the end of the current write is > i_size and 31068c2ecf20Sopenharmony_ci * inside an allocated block (ext4_da_should_update_i_disksize() 31078c2ecf20Sopenharmony_ci * check), we need to update i_disksize here as neither 31088c2ecf20Sopenharmony_ci * ext4_writepage() nor certain ext4_writepages() paths not 31098c2ecf20Sopenharmony_ci * allocating blocks update i_disksize. 31108c2ecf20Sopenharmony_ci * 31118c2ecf20Sopenharmony_ci * Note that we defer inode dirtying to generic_write_end() / 31128c2ecf20Sopenharmony_ci * ext4_da_write_inline_data_end(). 31138c2ecf20Sopenharmony_ci */ 31148c2ecf20Sopenharmony_ci new_i_size = pos + copied; 31158c2ecf20Sopenharmony_ci if (copied && new_i_size > inode->i_size && 31168c2ecf20Sopenharmony_ci ext4_da_should_update_i_disksize(page, end)) 31178c2ecf20Sopenharmony_ci ext4_update_i_disksize(inode, new_i_size); 31188c2ecf20Sopenharmony_ci 31198c2ecf20Sopenharmony_ci return generic_write_end(file, mapping, pos, len, copied, page, fsdata); 31208c2ecf20Sopenharmony_ci} 31218c2ecf20Sopenharmony_ci 31228c2ecf20Sopenharmony_ci/* 31238c2ecf20Sopenharmony_ci * Force all delayed allocation blocks to be allocated for a given inode. 31248c2ecf20Sopenharmony_ci */ 31258c2ecf20Sopenharmony_ciint ext4_alloc_da_blocks(struct inode *inode) 31268c2ecf20Sopenharmony_ci{ 31278c2ecf20Sopenharmony_ci trace_ext4_alloc_da_blocks(inode); 31288c2ecf20Sopenharmony_ci 31298c2ecf20Sopenharmony_ci if (!EXT4_I(inode)->i_reserved_data_blocks) 31308c2ecf20Sopenharmony_ci return 0; 31318c2ecf20Sopenharmony_ci 31328c2ecf20Sopenharmony_ci /* 31338c2ecf20Sopenharmony_ci * We do something simple for now. The filemap_flush() will 31348c2ecf20Sopenharmony_ci * also start triggering a write of the data blocks, which is 31358c2ecf20Sopenharmony_ci * not strictly speaking necessary (and for users of 31368c2ecf20Sopenharmony_ci * laptop_mode, not even desirable). However, to do otherwise 31378c2ecf20Sopenharmony_ci * would require replicating code paths in: 31388c2ecf20Sopenharmony_ci * 31398c2ecf20Sopenharmony_ci * ext4_writepages() -> 31408c2ecf20Sopenharmony_ci * write_cache_pages() ---> (via passed in callback function) 31418c2ecf20Sopenharmony_ci * __mpage_da_writepage() --> 31428c2ecf20Sopenharmony_ci * mpage_add_bh_to_extent() 31438c2ecf20Sopenharmony_ci * mpage_da_map_blocks() 31448c2ecf20Sopenharmony_ci * 31458c2ecf20Sopenharmony_ci * The problem is that write_cache_pages(), located in 31468c2ecf20Sopenharmony_ci * mm/page-writeback.c, marks pages clean in preparation for 31478c2ecf20Sopenharmony_ci * doing I/O, which is not desirable if we're not planning on 31488c2ecf20Sopenharmony_ci * doing I/O at all. 31498c2ecf20Sopenharmony_ci * 31508c2ecf20Sopenharmony_ci * We could call write_cache_pages(), and then redirty all of 31518c2ecf20Sopenharmony_ci * the pages by calling redirty_page_for_writepage() but that 31528c2ecf20Sopenharmony_ci * would be ugly in the extreme. So instead we would need to 31538c2ecf20Sopenharmony_ci * replicate parts of the code in the above functions, 31548c2ecf20Sopenharmony_ci * simplifying them because we wouldn't actually intend to 31558c2ecf20Sopenharmony_ci * write out the pages, but rather only collect contiguous 31568c2ecf20Sopenharmony_ci * logical block extents, call the multi-block allocator, and 31578c2ecf20Sopenharmony_ci * then update the buffer heads with the block allocations. 31588c2ecf20Sopenharmony_ci * 31598c2ecf20Sopenharmony_ci * For now, though, we'll cheat by calling filemap_flush(), 31608c2ecf20Sopenharmony_ci * which will map the blocks, and start the I/O, but not 31618c2ecf20Sopenharmony_ci * actually wait for the I/O to complete. 31628c2ecf20Sopenharmony_ci */ 31638c2ecf20Sopenharmony_ci return filemap_flush(inode->i_mapping); 31648c2ecf20Sopenharmony_ci} 31658c2ecf20Sopenharmony_ci 31668c2ecf20Sopenharmony_ci/* 31678c2ecf20Sopenharmony_ci * bmap() is special. It gets used by applications such as lilo and by 31688c2ecf20Sopenharmony_ci * the swapper to find the on-disk block of a specific piece of data. 31698c2ecf20Sopenharmony_ci * 31708c2ecf20Sopenharmony_ci * Naturally, this is dangerous if the block concerned is still in the 31718c2ecf20Sopenharmony_ci * journal. If somebody makes a swapfile on an ext4 data-journaling 31728c2ecf20Sopenharmony_ci * filesystem and enables swap, then they may get a nasty shock when the 31738c2ecf20Sopenharmony_ci * data getting swapped to that swapfile suddenly gets overwritten by 31748c2ecf20Sopenharmony_ci * the original zero's written out previously to the journal and 31758c2ecf20Sopenharmony_ci * awaiting writeback in the kernel's buffer cache. 31768c2ecf20Sopenharmony_ci * 31778c2ecf20Sopenharmony_ci * So, if we see any bmap calls here on a modified, data-journaled file, 31788c2ecf20Sopenharmony_ci * take extra steps to flush any blocks which might be in the cache. 31798c2ecf20Sopenharmony_ci */ 31808c2ecf20Sopenharmony_cistatic sector_t ext4_bmap(struct address_space *mapping, sector_t block) 31818c2ecf20Sopenharmony_ci{ 31828c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 31838c2ecf20Sopenharmony_ci journal_t *journal; 31848c2ecf20Sopenharmony_ci sector_t ret = 0; 31858c2ecf20Sopenharmony_ci int err; 31868c2ecf20Sopenharmony_ci 31878c2ecf20Sopenharmony_ci inode_lock_shared(inode); 31888c2ecf20Sopenharmony_ci /* 31898c2ecf20Sopenharmony_ci * We can get here for an inline file via the FIBMAP ioctl 31908c2ecf20Sopenharmony_ci */ 31918c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 31928c2ecf20Sopenharmony_ci goto out; 31938c2ecf20Sopenharmony_ci 31948c2ecf20Sopenharmony_ci if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && 31958c2ecf20Sopenharmony_ci test_opt(inode->i_sb, DELALLOC)) { 31968c2ecf20Sopenharmony_ci /* 31978c2ecf20Sopenharmony_ci * With delalloc we want to sync the file 31988c2ecf20Sopenharmony_ci * so that we can make sure we allocate 31998c2ecf20Sopenharmony_ci * blocks for file 32008c2ecf20Sopenharmony_ci */ 32018c2ecf20Sopenharmony_ci filemap_write_and_wait(mapping); 32028c2ecf20Sopenharmony_ci } 32038c2ecf20Sopenharmony_ci 32048c2ecf20Sopenharmony_ci if (EXT4_JOURNAL(inode) && 32058c2ecf20Sopenharmony_ci ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { 32068c2ecf20Sopenharmony_ci /* 32078c2ecf20Sopenharmony_ci * This is a REALLY heavyweight approach, but the use of 32088c2ecf20Sopenharmony_ci * bmap on dirty files is expected to be extremely rare: 32098c2ecf20Sopenharmony_ci * only if we run lilo or swapon on a freshly made file 32108c2ecf20Sopenharmony_ci * do we expect this to happen. 32118c2ecf20Sopenharmony_ci * 32128c2ecf20Sopenharmony_ci * (bmap requires CAP_SYS_RAWIO so this does not 32138c2ecf20Sopenharmony_ci * represent an unprivileged user DOS attack --- we'd be 32148c2ecf20Sopenharmony_ci * in trouble if mortal users could trigger this path at 32158c2ecf20Sopenharmony_ci * will.) 32168c2ecf20Sopenharmony_ci * 32178c2ecf20Sopenharmony_ci * NB. EXT4_STATE_JDATA is not set on files other than 32188c2ecf20Sopenharmony_ci * regular files. If somebody wants to bmap a directory 32198c2ecf20Sopenharmony_ci * or symlink and gets confused because the buffer 32208c2ecf20Sopenharmony_ci * hasn't yet been flushed to disk, they deserve 32218c2ecf20Sopenharmony_ci * everything they get. 32228c2ecf20Sopenharmony_ci */ 32238c2ecf20Sopenharmony_ci 32248c2ecf20Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_JDATA); 32258c2ecf20Sopenharmony_ci journal = EXT4_JOURNAL(inode); 32268c2ecf20Sopenharmony_ci jbd2_journal_lock_updates(journal); 32278c2ecf20Sopenharmony_ci err = jbd2_journal_flush(journal); 32288c2ecf20Sopenharmony_ci jbd2_journal_unlock_updates(journal); 32298c2ecf20Sopenharmony_ci 32308c2ecf20Sopenharmony_ci if (err) 32318c2ecf20Sopenharmony_ci goto out; 32328c2ecf20Sopenharmony_ci } 32338c2ecf20Sopenharmony_ci 32348c2ecf20Sopenharmony_ci ret = iomap_bmap(mapping, block, &ext4_iomap_ops); 32358c2ecf20Sopenharmony_ci 32368c2ecf20Sopenharmony_ciout: 32378c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 32388c2ecf20Sopenharmony_ci return ret; 32398c2ecf20Sopenharmony_ci} 32408c2ecf20Sopenharmony_ci 32418c2ecf20Sopenharmony_cistatic int ext4_readpage(struct file *file, struct page *page) 32428c2ecf20Sopenharmony_ci{ 32438c2ecf20Sopenharmony_ci int ret = -EAGAIN; 32448c2ecf20Sopenharmony_ci struct inode *inode = page->mapping->host; 32458c2ecf20Sopenharmony_ci 32468c2ecf20Sopenharmony_ci trace_ext4_readpage(page); 32478c2ecf20Sopenharmony_ci 32488c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 32498c2ecf20Sopenharmony_ci ret = ext4_readpage_inline(inode, page); 32508c2ecf20Sopenharmony_ci 32518c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 32528c2ecf20Sopenharmony_ci return ext4_mpage_readpages(inode, NULL, page); 32538c2ecf20Sopenharmony_ci 32548c2ecf20Sopenharmony_ci return ret; 32558c2ecf20Sopenharmony_ci} 32568c2ecf20Sopenharmony_ci 32578c2ecf20Sopenharmony_cistatic void ext4_readahead(struct readahead_control *rac) 32588c2ecf20Sopenharmony_ci{ 32598c2ecf20Sopenharmony_ci struct inode *inode = rac->mapping->host; 32608c2ecf20Sopenharmony_ci 32618c2ecf20Sopenharmony_ci /* If the file has inline data, no need to do readahead. */ 32628c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 32638c2ecf20Sopenharmony_ci return; 32648c2ecf20Sopenharmony_ci 32658c2ecf20Sopenharmony_ci ext4_mpage_readpages(inode, rac, NULL); 32668c2ecf20Sopenharmony_ci} 32678c2ecf20Sopenharmony_ci 32688c2ecf20Sopenharmony_cistatic void ext4_invalidatepage(struct page *page, unsigned int offset, 32698c2ecf20Sopenharmony_ci unsigned int length) 32708c2ecf20Sopenharmony_ci{ 32718c2ecf20Sopenharmony_ci trace_ext4_invalidatepage(page, offset, length); 32728c2ecf20Sopenharmony_ci 32738c2ecf20Sopenharmony_ci /* No journalling happens on data buffers when this function is used */ 32748c2ecf20Sopenharmony_ci WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); 32758c2ecf20Sopenharmony_ci 32768c2ecf20Sopenharmony_ci block_invalidatepage(page, offset, length); 32778c2ecf20Sopenharmony_ci} 32788c2ecf20Sopenharmony_ci 32798c2ecf20Sopenharmony_cistatic int __ext4_journalled_invalidatepage(struct page *page, 32808c2ecf20Sopenharmony_ci unsigned int offset, 32818c2ecf20Sopenharmony_ci unsigned int length) 32828c2ecf20Sopenharmony_ci{ 32838c2ecf20Sopenharmony_ci journal_t *journal = EXT4_JOURNAL(page->mapping->host); 32848c2ecf20Sopenharmony_ci 32858c2ecf20Sopenharmony_ci trace_ext4_journalled_invalidatepage(page, offset, length); 32868c2ecf20Sopenharmony_ci 32878c2ecf20Sopenharmony_ci /* 32888c2ecf20Sopenharmony_ci * If it's a full truncate we just forget about the pending dirtying 32898c2ecf20Sopenharmony_ci */ 32908c2ecf20Sopenharmony_ci if (offset == 0 && length == PAGE_SIZE) 32918c2ecf20Sopenharmony_ci ClearPageChecked(page); 32928c2ecf20Sopenharmony_ci 32938c2ecf20Sopenharmony_ci return jbd2_journal_invalidatepage(journal, page, offset, length); 32948c2ecf20Sopenharmony_ci} 32958c2ecf20Sopenharmony_ci 32968c2ecf20Sopenharmony_ci/* Wrapper for aops... */ 32978c2ecf20Sopenharmony_cistatic void ext4_journalled_invalidatepage(struct page *page, 32988c2ecf20Sopenharmony_ci unsigned int offset, 32998c2ecf20Sopenharmony_ci unsigned int length) 33008c2ecf20Sopenharmony_ci{ 33018c2ecf20Sopenharmony_ci WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); 33028c2ecf20Sopenharmony_ci} 33038c2ecf20Sopenharmony_ci 33048c2ecf20Sopenharmony_cistatic int ext4_releasepage(struct page *page, gfp_t wait) 33058c2ecf20Sopenharmony_ci{ 33068c2ecf20Sopenharmony_ci journal_t *journal = EXT4_JOURNAL(page->mapping->host); 33078c2ecf20Sopenharmony_ci 33088c2ecf20Sopenharmony_ci trace_ext4_releasepage(page); 33098c2ecf20Sopenharmony_ci 33108c2ecf20Sopenharmony_ci /* Page has dirty journalled data -> cannot release */ 33118c2ecf20Sopenharmony_ci if (PageChecked(page)) 33128c2ecf20Sopenharmony_ci return 0; 33138c2ecf20Sopenharmony_ci if (journal) 33148c2ecf20Sopenharmony_ci return jbd2_journal_try_to_free_buffers(journal, page); 33158c2ecf20Sopenharmony_ci else 33168c2ecf20Sopenharmony_ci return try_to_free_buffers(page); 33178c2ecf20Sopenharmony_ci} 33188c2ecf20Sopenharmony_ci 33198c2ecf20Sopenharmony_cistatic bool ext4_inode_datasync_dirty(struct inode *inode) 33208c2ecf20Sopenharmony_ci{ 33218c2ecf20Sopenharmony_ci journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 33228c2ecf20Sopenharmony_ci 33238c2ecf20Sopenharmony_ci if (journal) { 33248c2ecf20Sopenharmony_ci if (jbd2_transaction_committed(journal, 33258c2ecf20Sopenharmony_ci EXT4_I(inode)->i_datasync_tid)) 33268c2ecf20Sopenharmony_ci return false; 33278c2ecf20Sopenharmony_ci if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 33288c2ecf20Sopenharmony_ci return !list_empty(&EXT4_I(inode)->i_fc_list); 33298c2ecf20Sopenharmony_ci return true; 33308c2ecf20Sopenharmony_ci } 33318c2ecf20Sopenharmony_ci 33328c2ecf20Sopenharmony_ci /* Any metadata buffers to write? */ 33338c2ecf20Sopenharmony_ci if (!list_empty(&inode->i_mapping->private_list)) 33348c2ecf20Sopenharmony_ci return true; 33358c2ecf20Sopenharmony_ci return inode->i_state & I_DIRTY_DATASYNC; 33368c2ecf20Sopenharmony_ci} 33378c2ecf20Sopenharmony_ci 33388c2ecf20Sopenharmony_cistatic void ext4_set_iomap(struct inode *inode, struct iomap *iomap, 33398c2ecf20Sopenharmony_ci struct ext4_map_blocks *map, loff_t offset, 33408c2ecf20Sopenharmony_ci loff_t length) 33418c2ecf20Sopenharmony_ci{ 33428c2ecf20Sopenharmony_ci u8 blkbits = inode->i_blkbits; 33438c2ecf20Sopenharmony_ci 33448c2ecf20Sopenharmony_ci /* 33458c2ecf20Sopenharmony_ci * Writes that span EOF might trigger an I/O size update on completion, 33468c2ecf20Sopenharmony_ci * so consider them to be dirty for the purpose of O_DSYNC, even if 33478c2ecf20Sopenharmony_ci * there is no other metadata changes being made or are pending. 33488c2ecf20Sopenharmony_ci */ 33498c2ecf20Sopenharmony_ci iomap->flags = 0; 33508c2ecf20Sopenharmony_ci if (ext4_inode_datasync_dirty(inode) || 33518c2ecf20Sopenharmony_ci offset + length > i_size_read(inode)) 33528c2ecf20Sopenharmony_ci iomap->flags |= IOMAP_F_DIRTY; 33538c2ecf20Sopenharmony_ci 33548c2ecf20Sopenharmony_ci if (map->m_flags & EXT4_MAP_NEW) 33558c2ecf20Sopenharmony_ci iomap->flags |= IOMAP_F_NEW; 33568c2ecf20Sopenharmony_ci 33578c2ecf20Sopenharmony_ci iomap->bdev = inode->i_sb->s_bdev; 33588c2ecf20Sopenharmony_ci iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; 33598c2ecf20Sopenharmony_ci iomap->offset = (u64) map->m_lblk << blkbits; 33608c2ecf20Sopenharmony_ci iomap->length = (u64) map->m_len << blkbits; 33618c2ecf20Sopenharmony_ci 33628c2ecf20Sopenharmony_ci if ((map->m_flags & EXT4_MAP_MAPPED) && 33638c2ecf20Sopenharmony_ci !ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 33648c2ecf20Sopenharmony_ci iomap->flags |= IOMAP_F_MERGED; 33658c2ecf20Sopenharmony_ci 33668c2ecf20Sopenharmony_ci /* 33678c2ecf20Sopenharmony_ci * Flags passed to ext4_map_blocks() for direct I/O writes can result 33688c2ecf20Sopenharmony_ci * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits 33698c2ecf20Sopenharmony_ci * set. In order for any allocated unwritten extents to be converted 33708c2ecf20Sopenharmony_ci * into written extents correctly within the ->end_io() handler, we 33718c2ecf20Sopenharmony_ci * need to ensure that the iomap->type is set appropriately. Hence, the 33728c2ecf20Sopenharmony_ci * reason why we need to check whether the EXT4_MAP_UNWRITTEN bit has 33738c2ecf20Sopenharmony_ci * been set first. 33748c2ecf20Sopenharmony_ci */ 33758c2ecf20Sopenharmony_ci if (map->m_flags & EXT4_MAP_UNWRITTEN) { 33768c2ecf20Sopenharmony_ci iomap->type = IOMAP_UNWRITTEN; 33778c2ecf20Sopenharmony_ci iomap->addr = (u64) map->m_pblk << blkbits; 33788c2ecf20Sopenharmony_ci } else if (map->m_flags & EXT4_MAP_MAPPED) { 33798c2ecf20Sopenharmony_ci iomap->type = IOMAP_MAPPED; 33808c2ecf20Sopenharmony_ci iomap->addr = (u64) map->m_pblk << blkbits; 33818c2ecf20Sopenharmony_ci } else { 33828c2ecf20Sopenharmony_ci iomap->type = IOMAP_HOLE; 33838c2ecf20Sopenharmony_ci iomap->addr = IOMAP_NULL_ADDR; 33848c2ecf20Sopenharmony_ci } 33858c2ecf20Sopenharmony_ci} 33868c2ecf20Sopenharmony_ci 33878c2ecf20Sopenharmony_cistatic int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map, 33888c2ecf20Sopenharmony_ci unsigned int flags) 33898c2ecf20Sopenharmony_ci{ 33908c2ecf20Sopenharmony_ci handle_t *handle; 33918c2ecf20Sopenharmony_ci u8 blkbits = inode->i_blkbits; 33928c2ecf20Sopenharmony_ci int ret, dio_credits, m_flags = 0, retries = 0; 33938c2ecf20Sopenharmony_ci 33948c2ecf20Sopenharmony_ci /* 33958c2ecf20Sopenharmony_ci * Trim the mapping request to the maximum value that we can map at 33968c2ecf20Sopenharmony_ci * once for direct I/O. 33978c2ecf20Sopenharmony_ci */ 33988c2ecf20Sopenharmony_ci if (map->m_len > DIO_MAX_BLOCKS) 33998c2ecf20Sopenharmony_ci map->m_len = DIO_MAX_BLOCKS; 34008c2ecf20Sopenharmony_ci dio_credits = ext4_chunk_trans_blocks(inode, map->m_len); 34018c2ecf20Sopenharmony_ci 34028c2ecf20Sopenharmony_ciretry: 34038c2ecf20Sopenharmony_ci /* 34048c2ecf20Sopenharmony_ci * Either we allocate blocks and then don't get an unwritten extent, so 34058c2ecf20Sopenharmony_ci * in that case we have reserved enough credits. Or, the blocks are 34068c2ecf20Sopenharmony_ci * already allocated and unwritten. In that case, the extent conversion 34078c2ecf20Sopenharmony_ci * fits into the credits as well. 34088c2ecf20Sopenharmony_ci */ 34098c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); 34108c2ecf20Sopenharmony_ci if (IS_ERR(handle)) 34118c2ecf20Sopenharmony_ci return PTR_ERR(handle); 34128c2ecf20Sopenharmony_ci 34138c2ecf20Sopenharmony_ci /* 34148c2ecf20Sopenharmony_ci * DAX and direct I/O are the only two operations that are currently 34158c2ecf20Sopenharmony_ci * supported with IOMAP_WRITE. 34168c2ecf20Sopenharmony_ci */ 34178c2ecf20Sopenharmony_ci WARN_ON(!IS_DAX(inode) && !(flags & IOMAP_DIRECT)); 34188c2ecf20Sopenharmony_ci if (IS_DAX(inode)) 34198c2ecf20Sopenharmony_ci m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; 34208c2ecf20Sopenharmony_ci /* 34218c2ecf20Sopenharmony_ci * We use i_size instead of i_disksize here because delalloc writeback 34228c2ecf20Sopenharmony_ci * can complete at any point during the I/O and subsequently push the 34238c2ecf20Sopenharmony_ci * i_disksize out to i_size. This could be beyond where direct I/O is 34248c2ecf20Sopenharmony_ci * happening and thus expose allocated blocks to direct I/O reads. 34258c2ecf20Sopenharmony_ci */ 34268c2ecf20Sopenharmony_ci else if (((loff_t)map->m_lblk << blkbits) >= i_size_read(inode)) 34278c2ecf20Sopenharmony_ci m_flags = EXT4_GET_BLOCKS_CREATE; 34288c2ecf20Sopenharmony_ci else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 34298c2ecf20Sopenharmony_ci m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT; 34308c2ecf20Sopenharmony_ci 34318c2ecf20Sopenharmony_ci ret = ext4_map_blocks(handle, inode, map, m_flags); 34328c2ecf20Sopenharmony_ci 34338c2ecf20Sopenharmony_ci /* 34348c2ecf20Sopenharmony_ci * We cannot fill holes in indirect tree based inodes as that could 34358c2ecf20Sopenharmony_ci * expose stale data in the case of a crash. Use the magic error code 34368c2ecf20Sopenharmony_ci * to fallback to buffered I/O. 34378c2ecf20Sopenharmony_ci */ 34388c2ecf20Sopenharmony_ci if (!m_flags && !ret) 34398c2ecf20Sopenharmony_ci ret = -ENOTBLK; 34408c2ecf20Sopenharmony_ci 34418c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 34428c2ecf20Sopenharmony_ci if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 34438c2ecf20Sopenharmony_ci goto retry; 34448c2ecf20Sopenharmony_ci 34458c2ecf20Sopenharmony_ci return ret; 34468c2ecf20Sopenharmony_ci} 34478c2ecf20Sopenharmony_ci 34488c2ecf20Sopenharmony_ci 34498c2ecf20Sopenharmony_cistatic int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 34508c2ecf20Sopenharmony_ci unsigned flags, struct iomap *iomap, struct iomap *srcmap) 34518c2ecf20Sopenharmony_ci{ 34528c2ecf20Sopenharmony_ci int ret; 34538c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 34548c2ecf20Sopenharmony_ci u8 blkbits = inode->i_blkbits; 34558c2ecf20Sopenharmony_ci 34568c2ecf20Sopenharmony_ci if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 34578c2ecf20Sopenharmony_ci return -EINVAL; 34588c2ecf20Sopenharmony_ci 34598c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(ext4_has_inline_data(inode))) 34608c2ecf20Sopenharmony_ci return -ERANGE; 34618c2ecf20Sopenharmony_ci 34628c2ecf20Sopenharmony_ci /* 34638c2ecf20Sopenharmony_ci * Calculate the first and last logical blocks respectively. 34648c2ecf20Sopenharmony_ci */ 34658c2ecf20Sopenharmony_ci map.m_lblk = offset >> blkbits; 34668c2ecf20Sopenharmony_ci map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, 34678c2ecf20Sopenharmony_ci EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; 34688c2ecf20Sopenharmony_ci 34698c2ecf20Sopenharmony_ci if (flags & IOMAP_WRITE) { 34708c2ecf20Sopenharmony_ci /* 34718c2ecf20Sopenharmony_ci * We check here if the blocks are already allocated, then we 34728c2ecf20Sopenharmony_ci * don't need to start a journal txn and we can directly return 34738c2ecf20Sopenharmony_ci * the mapping information. This could boost performance 34748c2ecf20Sopenharmony_ci * especially in multi-threaded overwrite requests. 34758c2ecf20Sopenharmony_ci */ 34768c2ecf20Sopenharmony_ci if (offset + length <= i_size_read(inode)) { 34778c2ecf20Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 34788c2ecf20Sopenharmony_ci if (ret > 0 && (map.m_flags & EXT4_MAP_MAPPED)) 34798c2ecf20Sopenharmony_ci goto out; 34808c2ecf20Sopenharmony_ci } 34818c2ecf20Sopenharmony_ci ret = ext4_iomap_alloc(inode, &map, flags); 34828c2ecf20Sopenharmony_ci } else { 34838c2ecf20Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 34848c2ecf20Sopenharmony_ci } 34858c2ecf20Sopenharmony_ci 34868c2ecf20Sopenharmony_ci if (ret < 0) 34878c2ecf20Sopenharmony_ci return ret; 34888c2ecf20Sopenharmony_ciout: 34898c2ecf20Sopenharmony_ci ext4_set_iomap(inode, iomap, &map, offset, length); 34908c2ecf20Sopenharmony_ci 34918c2ecf20Sopenharmony_ci return 0; 34928c2ecf20Sopenharmony_ci} 34938c2ecf20Sopenharmony_ci 34948c2ecf20Sopenharmony_cistatic int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset, 34958c2ecf20Sopenharmony_ci loff_t length, unsigned flags, struct iomap *iomap, 34968c2ecf20Sopenharmony_ci struct iomap *srcmap) 34978c2ecf20Sopenharmony_ci{ 34988c2ecf20Sopenharmony_ci int ret; 34998c2ecf20Sopenharmony_ci 35008c2ecf20Sopenharmony_ci /* 35018c2ecf20Sopenharmony_ci * Even for writes we don't need to allocate blocks, so just pretend 35028c2ecf20Sopenharmony_ci * we are reading to save overhead of starting a transaction. 35038c2ecf20Sopenharmony_ci */ 35048c2ecf20Sopenharmony_ci flags &= ~IOMAP_WRITE; 35058c2ecf20Sopenharmony_ci ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap); 35068c2ecf20Sopenharmony_ci WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); 35078c2ecf20Sopenharmony_ci return ret; 35088c2ecf20Sopenharmony_ci} 35098c2ecf20Sopenharmony_ci 35108c2ecf20Sopenharmony_cistatic int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, 35118c2ecf20Sopenharmony_ci ssize_t written, unsigned flags, struct iomap *iomap) 35128c2ecf20Sopenharmony_ci{ 35138c2ecf20Sopenharmony_ci /* 35148c2ecf20Sopenharmony_ci * Check to see whether an error occurred while writing out the data to 35158c2ecf20Sopenharmony_ci * the allocated blocks. If so, return the magic error code so that we 35168c2ecf20Sopenharmony_ci * fallback to buffered I/O and attempt to complete the remainder of 35178c2ecf20Sopenharmony_ci * the I/O. Any blocks that may have been allocated in preparation for 35188c2ecf20Sopenharmony_ci * the direct I/O will be reused during buffered I/O. 35198c2ecf20Sopenharmony_ci */ 35208c2ecf20Sopenharmony_ci if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0) 35218c2ecf20Sopenharmony_ci return -ENOTBLK; 35228c2ecf20Sopenharmony_ci 35238c2ecf20Sopenharmony_ci return 0; 35248c2ecf20Sopenharmony_ci} 35258c2ecf20Sopenharmony_ci 35268c2ecf20Sopenharmony_ciconst struct iomap_ops ext4_iomap_ops = { 35278c2ecf20Sopenharmony_ci .iomap_begin = ext4_iomap_begin, 35288c2ecf20Sopenharmony_ci .iomap_end = ext4_iomap_end, 35298c2ecf20Sopenharmony_ci}; 35308c2ecf20Sopenharmony_ci 35318c2ecf20Sopenharmony_ciconst struct iomap_ops ext4_iomap_overwrite_ops = { 35328c2ecf20Sopenharmony_ci .iomap_begin = ext4_iomap_overwrite_begin, 35338c2ecf20Sopenharmony_ci .iomap_end = ext4_iomap_end, 35348c2ecf20Sopenharmony_ci}; 35358c2ecf20Sopenharmony_ci 35368c2ecf20Sopenharmony_cistatic bool ext4_iomap_is_delalloc(struct inode *inode, 35378c2ecf20Sopenharmony_ci struct ext4_map_blocks *map) 35388c2ecf20Sopenharmony_ci{ 35398c2ecf20Sopenharmony_ci struct extent_status es; 35408c2ecf20Sopenharmony_ci ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; 35418c2ecf20Sopenharmony_ci 35428c2ecf20Sopenharmony_ci ext4_es_find_extent_range(inode, &ext4_es_is_delayed, 35438c2ecf20Sopenharmony_ci map->m_lblk, end, &es); 35448c2ecf20Sopenharmony_ci 35458c2ecf20Sopenharmony_ci if (!es.es_len || es.es_lblk > end) 35468c2ecf20Sopenharmony_ci return false; 35478c2ecf20Sopenharmony_ci 35488c2ecf20Sopenharmony_ci if (es.es_lblk > map->m_lblk) { 35498c2ecf20Sopenharmony_ci map->m_len = es.es_lblk - map->m_lblk; 35508c2ecf20Sopenharmony_ci return false; 35518c2ecf20Sopenharmony_ci } 35528c2ecf20Sopenharmony_ci 35538c2ecf20Sopenharmony_ci offset = map->m_lblk - es.es_lblk; 35548c2ecf20Sopenharmony_ci map->m_len = es.es_len - offset; 35558c2ecf20Sopenharmony_ci 35568c2ecf20Sopenharmony_ci return true; 35578c2ecf20Sopenharmony_ci} 35588c2ecf20Sopenharmony_ci 35598c2ecf20Sopenharmony_cistatic int ext4_iomap_begin_report(struct inode *inode, loff_t offset, 35608c2ecf20Sopenharmony_ci loff_t length, unsigned int flags, 35618c2ecf20Sopenharmony_ci struct iomap *iomap, struct iomap *srcmap) 35628c2ecf20Sopenharmony_ci{ 35638c2ecf20Sopenharmony_ci int ret; 35648c2ecf20Sopenharmony_ci bool delalloc = false; 35658c2ecf20Sopenharmony_ci struct ext4_map_blocks map; 35668c2ecf20Sopenharmony_ci u8 blkbits = inode->i_blkbits; 35678c2ecf20Sopenharmony_ci 35688c2ecf20Sopenharmony_ci if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 35698c2ecf20Sopenharmony_ci return -EINVAL; 35708c2ecf20Sopenharmony_ci 35718c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) { 35728c2ecf20Sopenharmony_ci ret = ext4_inline_data_iomap(inode, iomap); 35738c2ecf20Sopenharmony_ci if (ret != -EAGAIN) { 35748c2ecf20Sopenharmony_ci if (ret == 0 && offset >= iomap->length) 35758c2ecf20Sopenharmony_ci ret = -ENOENT; 35768c2ecf20Sopenharmony_ci return ret; 35778c2ecf20Sopenharmony_ci } 35788c2ecf20Sopenharmony_ci } 35798c2ecf20Sopenharmony_ci 35808c2ecf20Sopenharmony_ci /* 35818c2ecf20Sopenharmony_ci * Calculate the first and last logical block respectively. 35828c2ecf20Sopenharmony_ci */ 35838c2ecf20Sopenharmony_ci map.m_lblk = offset >> blkbits; 35848c2ecf20Sopenharmony_ci map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, 35858c2ecf20Sopenharmony_ci EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; 35868c2ecf20Sopenharmony_ci 35878c2ecf20Sopenharmony_ci /* 35888c2ecf20Sopenharmony_ci * Fiemap callers may call for offset beyond s_bitmap_maxbytes. 35898c2ecf20Sopenharmony_ci * So handle it here itself instead of querying ext4_map_blocks(). 35908c2ecf20Sopenharmony_ci * Since ext4_map_blocks() will warn about it and will return 35918c2ecf20Sopenharmony_ci * -EIO error. 35928c2ecf20Sopenharmony_ci */ 35938c2ecf20Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 35948c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 35958c2ecf20Sopenharmony_ci 35968c2ecf20Sopenharmony_ci if (offset >= sbi->s_bitmap_maxbytes) { 35978c2ecf20Sopenharmony_ci map.m_flags = 0; 35988c2ecf20Sopenharmony_ci goto set_iomap; 35998c2ecf20Sopenharmony_ci } 36008c2ecf20Sopenharmony_ci } 36018c2ecf20Sopenharmony_ci 36028c2ecf20Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 36038c2ecf20Sopenharmony_ci if (ret < 0) 36048c2ecf20Sopenharmony_ci return ret; 36058c2ecf20Sopenharmony_ci if (ret == 0) 36068c2ecf20Sopenharmony_ci delalloc = ext4_iomap_is_delalloc(inode, &map); 36078c2ecf20Sopenharmony_ci 36088c2ecf20Sopenharmony_ciset_iomap: 36098c2ecf20Sopenharmony_ci ext4_set_iomap(inode, iomap, &map, offset, length); 36108c2ecf20Sopenharmony_ci if (delalloc && iomap->type == IOMAP_HOLE) 36118c2ecf20Sopenharmony_ci iomap->type = IOMAP_DELALLOC; 36128c2ecf20Sopenharmony_ci 36138c2ecf20Sopenharmony_ci return 0; 36148c2ecf20Sopenharmony_ci} 36158c2ecf20Sopenharmony_ci 36168c2ecf20Sopenharmony_ciconst struct iomap_ops ext4_iomap_report_ops = { 36178c2ecf20Sopenharmony_ci .iomap_begin = ext4_iomap_begin_report, 36188c2ecf20Sopenharmony_ci}; 36198c2ecf20Sopenharmony_ci 36208c2ecf20Sopenharmony_ci/* 36218c2ecf20Sopenharmony_ci * Pages can be marked dirty completely asynchronously from ext4's journalling 36228c2ecf20Sopenharmony_ci * activity. By filemap_sync_pte(), try_to_unmap_one(), etc. We cannot do 36238c2ecf20Sopenharmony_ci * much here because ->set_page_dirty is called under VFS locks. The page is 36248c2ecf20Sopenharmony_ci * not necessarily locked. 36258c2ecf20Sopenharmony_ci * 36268c2ecf20Sopenharmony_ci * We cannot just dirty the page and leave attached buffers clean, because the 36278c2ecf20Sopenharmony_ci * buffers' dirty state is "definitive". We cannot just set the buffers dirty 36288c2ecf20Sopenharmony_ci * or jbddirty because all the journalling code will explode. 36298c2ecf20Sopenharmony_ci * 36308c2ecf20Sopenharmony_ci * So what we do is to mark the page "pending dirty" and next time writepage 36318c2ecf20Sopenharmony_ci * is called, propagate that into the buffers appropriately. 36328c2ecf20Sopenharmony_ci */ 36338c2ecf20Sopenharmony_cistatic int ext4_journalled_set_page_dirty(struct page *page) 36348c2ecf20Sopenharmony_ci{ 36358c2ecf20Sopenharmony_ci SetPageChecked(page); 36368c2ecf20Sopenharmony_ci return __set_page_dirty_nobuffers(page); 36378c2ecf20Sopenharmony_ci} 36388c2ecf20Sopenharmony_ci 36398c2ecf20Sopenharmony_cistatic int ext4_set_page_dirty(struct page *page) 36408c2ecf20Sopenharmony_ci{ 36418c2ecf20Sopenharmony_ci WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); 36428c2ecf20Sopenharmony_ci WARN_ON_ONCE(!page_has_buffers(page)); 36438c2ecf20Sopenharmony_ci return __set_page_dirty_buffers(page); 36448c2ecf20Sopenharmony_ci} 36458c2ecf20Sopenharmony_ci 36468c2ecf20Sopenharmony_cistatic int ext4_iomap_swap_activate(struct swap_info_struct *sis, 36478c2ecf20Sopenharmony_ci struct file *file, sector_t *span) 36488c2ecf20Sopenharmony_ci{ 36498c2ecf20Sopenharmony_ci return iomap_swapfile_activate(sis, file, span, 36508c2ecf20Sopenharmony_ci &ext4_iomap_report_ops); 36518c2ecf20Sopenharmony_ci} 36528c2ecf20Sopenharmony_ci 36538c2ecf20Sopenharmony_cistatic const struct address_space_operations ext4_aops = { 36548c2ecf20Sopenharmony_ci .readpage = ext4_readpage, 36558c2ecf20Sopenharmony_ci .readahead = ext4_readahead, 36568c2ecf20Sopenharmony_ci .writepage = ext4_writepage, 36578c2ecf20Sopenharmony_ci .writepages = ext4_writepages, 36588c2ecf20Sopenharmony_ci .write_begin = ext4_write_begin, 36598c2ecf20Sopenharmony_ci .write_end = ext4_write_end, 36608c2ecf20Sopenharmony_ci .set_page_dirty = ext4_set_page_dirty, 36618c2ecf20Sopenharmony_ci .bmap = ext4_bmap, 36628c2ecf20Sopenharmony_ci .invalidatepage = ext4_invalidatepage, 36638c2ecf20Sopenharmony_ci .releasepage = ext4_releasepage, 36648c2ecf20Sopenharmony_ci .direct_IO = noop_direct_IO, 36658c2ecf20Sopenharmony_ci .migratepage = buffer_migrate_page, 36668c2ecf20Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 36678c2ecf20Sopenharmony_ci .error_remove_page = generic_error_remove_page, 36688c2ecf20Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 36698c2ecf20Sopenharmony_ci}; 36708c2ecf20Sopenharmony_ci 36718c2ecf20Sopenharmony_cistatic const struct address_space_operations ext4_journalled_aops = { 36728c2ecf20Sopenharmony_ci .readpage = ext4_readpage, 36738c2ecf20Sopenharmony_ci .readahead = ext4_readahead, 36748c2ecf20Sopenharmony_ci .writepage = ext4_writepage, 36758c2ecf20Sopenharmony_ci .writepages = ext4_writepages, 36768c2ecf20Sopenharmony_ci .write_begin = ext4_write_begin, 36778c2ecf20Sopenharmony_ci .write_end = ext4_journalled_write_end, 36788c2ecf20Sopenharmony_ci .set_page_dirty = ext4_journalled_set_page_dirty, 36798c2ecf20Sopenharmony_ci .bmap = ext4_bmap, 36808c2ecf20Sopenharmony_ci .invalidatepage = ext4_journalled_invalidatepage, 36818c2ecf20Sopenharmony_ci .releasepage = ext4_releasepage, 36828c2ecf20Sopenharmony_ci .direct_IO = noop_direct_IO, 36838c2ecf20Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 36848c2ecf20Sopenharmony_ci .error_remove_page = generic_error_remove_page, 36858c2ecf20Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 36868c2ecf20Sopenharmony_ci}; 36878c2ecf20Sopenharmony_ci 36888c2ecf20Sopenharmony_cistatic const struct address_space_operations ext4_da_aops = { 36898c2ecf20Sopenharmony_ci .readpage = ext4_readpage, 36908c2ecf20Sopenharmony_ci .readahead = ext4_readahead, 36918c2ecf20Sopenharmony_ci .writepage = ext4_writepage, 36928c2ecf20Sopenharmony_ci .writepages = ext4_writepages, 36938c2ecf20Sopenharmony_ci .write_begin = ext4_da_write_begin, 36948c2ecf20Sopenharmony_ci .write_end = ext4_da_write_end, 36958c2ecf20Sopenharmony_ci .set_page_dirty = ext4_set_page_dirty, 36968c2ecf20Sopenharmony_ci .bmap = ext4_bmap, 36978c2ecf20Sopenharmony_ci .invalidatepage = ext4_invalidatepage, 36988c2ecf20Sopenharmony_ci .releasepage = ext4_releasepage, 36998c2ecf20Sopenharmony_ci .direct_IO = noop_direct_IO, 37008c2ecf20Sopenharmony_ci .migratepage = buffer_migrate_page, 37018c2ecf20Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 37028c2ecf20Sopenharmony_ci .error_remove_page = generic_error_remove_page, 37038c2ecf20Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 37048c2ecf20Sopenharmony_ci}; 37058c2ecf20Sopenharmony_ci 37068c2ecf20Sopenharmony_cistatic const struct address_space_operations ext4_dax_aops = { 37078c2ecf20Sopenharmony_ci .writepages = ext4_dax_writepages, 37088c2ecf20Sopenharmony_ci .direct_IO = noop_direct_IO, 37098c2ecf20Sopenharmony_ci .set_page_dirty = noop_set_page_dirty, 37108c2ecf20Sopenharmony_ci .bmap = ext4_bmap, 37118c2ecf20Sopenharmony_ci .invalidatepage = noop_invalidatepage, 37128c2ecf20Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 37138c2ecf20Sopenharmony_ci}; 37148c2ecf20Sopenharmony_ci 37158c2ecf20Sopenharmony_civoid ext4_set_aops(struct inode *inode) 37168c2ecf20Sopenharmony_ci{ 37178c2ecf20Sopenharmony_ci switch (ext4_inode_journal_mode(inode)) { 37188c2ecf20Sopenharmony_ci case EXT4_INODE_ORDERED_DATA_MODE: 37198c2ecf20Sopenharmony_ci case EXT4_INODE_WRITEBACK_DATA_MODE: 37208c2ecf20Sopenharmony_ci break; 37218c2ecf20Sopenharmony_ci case EXT4_INODE_JOURNAL_DATA_MODE: 37228c2ecf20Sopenharmony_ci inode->i_mapping->a_ops = &ext4_journalled_aops; 37238c2ecf20Sopenharmony_ci return; 37248c2ecf20Sopenharmony_ci default: 37258c2ecf20Sopenharmony_ci BUG(); 37268c2ecf20Sopenharmony_ci } 37278c2ecf20Sopenharmony_ci if (IS_DAX(inode)) 37288c2ecf20Sopenharmony_ci inode->i_mapping->a_ops = &ext4_dax_aops; 37298c2ecf20Sopenharmony_ci else if (test_opt(inode->i_sb, DELALLOC)) 37308c2ecf20Sopenharmony_ci inode->i_mapping->a_ops = &ext4_da_aops; 37318c2ecf20Sopenharmony_ci else 37328c2ecf20Sopenharmony_ci inode->i_mapping->a_ops = &ext4_aops; 37338c2ecf20Sopenharmony_ci} 37348c2ecf20Sopenharmony_ci 37358c2ecf20Sopenharmony_cistatic int __ext4_block_zero_page_range(handle_t *handle, 37368c2ecf20Sopenharmony_ci struct address_space *mapping, loff_t from, loff_t length) 37378c2ecf20Sopenharmony_ci{ 37388c2ecf20Sopenharmony_ci ext4_fsblk_t index = from >> PAGE_SHIFT; 37398c2ecf20Sopenharmony_ci unsigned offset = from & (PAGE_SIZE-1); 37408c2ecf20Sopenharmony_ci unsigned blocksize, pos; 37418c2ecf20Sopenharmony_ci ext4_lblk_t iblock; 37428c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 37438c2ecf20Sopenharmony_ci struct buffer_head *bh; 37448c2ecf20Sopenharmony_ci struct page *page; 37458c2ecf20Sopenharmony_ci int err = 0; 37468c2ecf20Sopenharmony_ci 37478c2ecf20Sopenharmony_ci page = find_or_create_page(mapping, from >> PAGE_SHIFT, 37488c2ecf20Sopenharmony_ci mapping_gfp_constraint(mapping, ~__GFP_FS)); 37498c2ecf20Sopenharmony_ci if (!page) 37508c2ecf20Sopenharmony_ci return -ENOMEM; 37518c2ecf20Sopenharmony_ci 37528c2ecf20Sopenharmony_ci blocksize = inode->i_sb->s_blocksize; 37538c2ecf20Sopenharmony_ci 37548c2ecf20Sopenharmony_ci iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); 37558c2ecf20Sopenharmony_ci 37568c2ecf20Sopenharmony_ci if (!page_has_buffers(page)) 37578c2ecf20Sopenharmony_ci create_empty_buffers(page, blocksize, 0); 37588c2ecf20Sopenharmony_ci 37598c2ecf20Sopenharmony_ci /* Find the buffer that contains "offset" */ 37608c2ecf20Sopenharmony_ci bh = page_buffers(page); 37618c2ecf20Sopenharmony_ci pos = blocksize; 37628c2ecf20Sopenharmony_ci while (offset >= pos) { 37638c2ecf20Sopenharmony_ci bh = bh->b_this_page; 37648c2ecf20Sopenharmony_ci iblock++; 37658c2ecf20Sopenharmony_ci pos += blocksize; 37668c2ecf20Sopenharmony_ci } 37678c2ecf20Sopenharmony_ci if (buffer_freed(bh)) { 37688c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "freed: skip"); 37698c2ecf20Sopenharmony_ci goto unlock; 37708c2ecf20Sopenharmony_ci } 37718c2ecf20Sopenharmony_ci if (!buffer_mapped(bh)) { 37728c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "unmapped"); 37738c2ecf20Sopenharmony_ci ext4_get_block(inode, iblock, bh, 0); 37748c2ecf20Sopenharmony_ci /* unmapped? It's a hole - nothing to do */ 37758c2ecf20Sopenharmony_ci if (!buffer_mapped(bh)) { 37768c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "still unmapped"); 37778c2ecf20Sopenharmony_ci goto unlock; 37788c2ecf20Sopenharmony_ci } 37798c2ecf20Sopenharmony_ci } 37808c2ecf20Sopenharmony_ci 37818c2ecf20Sopenharmony_ci /* Ok, it's mapped. Make sure it's up-to-date */ 37828c2ecf20Sopenharmony_ci if (PageUptodate(page)) 37838c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 37848c2ecf20Sopenharmony_ci 37858c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 37868c2ecf20Sopenharmony_ci err = ext4_read_bh_lock(bh, 0, true); 37878c2ecf20Sopenharmony_ci if (err) 37888c2ecf20Sopenharmony_ci goto unlock; 37898c2ecf20Sopenharmony_ci if (fscrypt_inode_uses_fs_layer_crypto(inode)) { 37908c2ecf20Sopenharmony_ci /* We expect the key to be set. */ 37918c2ecf20Sopenharmony_ci BUG_ON(!fscrypt_has_encryption_key(inode)); 37928c2ecf20Sopenharmony_ci err = fscrypt_decrypt_pagecache_blocks(page, blocksize, 37938c2ecf20Sopenharmony_ci bh_offset(bh)); 37948c2ecf20Sopenharmony_ci if (err) { 37958c2ecf20Sopenharmony_ci clear_buffer_uptodate(bh); 37968c2ecf20Sopenharmony_ci goto unlock; 37978c2ecf20Sopenharmony_ci } 37988c2ecf20Sopenharmony_ci } 37998c2ecf20Sopenharmony_ci } 38008c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) { 38018c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "get write access"); 38028c2ecf20Sopenharmony_ci err = ext4_journal_get_write_access(handle, bh); 38038c2ecf20Sopenharmony_ci if (err) 38048c2ecf20Sopenharmony_ci goto unlock; 38058c2ecf20Sopenharmony_ci } 38068c2ecf20Sopenharmony_ci zero_user(page, offset, length); 38078c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "zeroed end of block"); 38088c2ecf20Sopenharmony_ci 38098c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) { 38108c2ecf20Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, inode, bh); 38118c2ecf20Sopenharmony_ci } else { 38128c2ecf20Sopenharmony_ci err = 0; 38138c2ecf20Sopenharmony_ci mark_buffer_dirty(bh); 38148c2ecf20Sopenharmony_ci if (ext4_should_order_data(inode)) 38158c2ecf20Sopenharmony_ci err = ext4_jbd2_inode_add_write(handle, inode, from, 38168c2ecf20Sopenharmony_ci length); 38178c2ecf20Sopenharmony_ci } 38188c2ecf20Sopenharmony_ci 38198c2ecf20Sopenharmony_ciunlock: 38208c2ecf20Sopenharmony_ci unlock_page(page); 38218c2ecf20Sopenharmony_ci put_page(page); 38228c2ecf20Sopenharmony_ci return err; 38238c2ecf20Sopenharmony_ci} 38248c2ecf20Sopenharmony_ci 38258c2ecf20Sopenharmony_ci/* 38268c2ecf20Sopenharmony_ci * ext4_block_zero_page_range() zeros out a mapping of length 'length' 38278c2ecf20Sopenharmony_ci * starting from file offset 'from'. The range to be zero'd must 38288c2ecf20Sopenharmony_ci * be contained with in one block. If the specified range exceeds 38298c2ecf20Sopenharmony_ci * the end of the block it will be shortened to end of the block 38308c2ecf20Sopenharmony_ci * that corresponds to 'from' 38318c2ecf20Sopenharmony_ci */ 38328c2ecf20Sopenharmony_cistatic int ext4_block_zero_page_range(handle_t *handle, 38338c2ecf20Sopenharmony_ci struct address_space *mapping, loff_t from, loff_t length) 38348c2ecf20Sopenharmony_ci{ 38358c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 38368c2ecf20Sopenharmony_ci unsigned offset = from & (PAGE_SIZE-1); 38378c2ecf20Sopenharmony_ci unsigned blocksize = inode->i_sb->s_blocksize; 38388c2ecf20Sopenharmony_ci unsigned max = blocksize - (offset & (blocksize - 1)); 38398c2ecf20Sopenharmony_ci 38408c2ecf20Sopenharmony_ci /* 38418c2ecf20Sopenharmony_ci * correct length if it does not fall between 38428c2ecf20Sopenharmony_ci * 'from' and the end of the block 38438c2ecf20Sopenharmony_ci */ 38448c2ecf20Sopenharmony_ci if (length > max || length < 0) 38458c2ecf20Sopenharmony_ci length = max; 38468c2ecf20Sopenharmony_ci 38478c2ecf20Sopenharmony_ci if (IS_DAX(inode)) { 38488c2ecf20Sopenharmony_ci return iomap_zero_range(inode, from, length, NULL, 38498c2ecf20Sopenharmony_ci &ext4_iomap_ops); 38508c2ecf20Sopenharmony_ci } 38518c2ecf20Sopenharmony_ci return __ext4_block_zero_page_range(handle, mapping, from, length); 38528c2ecf20Sopenharmony_ci} 38538c2ecf20Sopenharmony_ci 38548c2ecf20Sopenharmony_ci/* 38558c2ecf20Sopenharmony_ci * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 38568c2ecf20Sopenharmony_ci * up to the end of the block which corresponds to `from'. 38578c2ecf20Sopenharmony_ci * This required during truncate. We need to physically zero the tail end 38588c2ecf20Sopenharmony_ci * of that block so it doesn't yield old data if the file is later grown. 38598c2ecf20Sopenharmony_ci */ 38608c2ecf20Sopenharmony_cistatic int ext4_block_truncate_page(handle_t *handle, 38618c2ecf20Sopenharmony_ci struct address_space *mapping, loff_t from) 38628c2ecf20Sopenharmony_ci{ 38638c2ecf20Sopenharmony_ci unsigned offset = from & (PAGE_SIZE-1); 38648c2ecf20Sopenharmony_ci unsigned length; 38658c2ecf20Sopenharmony_ci unsigned blocksize; 38668c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 38678c2ecf20Sopenharmony_ci 38688c2ecf20Sopenharmony_ci /* If we are processing an encrypted inode during orphan list handling */ 38698c2ecf20Sopenharmony_ci if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode)) 38708c2ecf20Sopenharmony_ci return 0; 38718c2ecf20Sopenharmony_ci 38728c2ecf20Sopenharmony_ci blocksize = inode->i_sb->s_blocksize; 38738c2ecf20Sopenharmony_ci length = blocksize - (offset & (blocksize - 1)); 38748c2ecf20Sopenharmony_ci 38758c2ecf20Sopenharmony_ci return ext4_block_zero_page_range(handle, mapping, from, length); 38768c2ecf20Sopenharmony_ci} 38778c2ecf20Sopenharmony_ci 38788c2ecf20Sopenharmony_ciint ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 38798c2ecf20Sopenharmony_ci loff_t lstart, loff_t length) 38808c2ecf20Sopenharmony_ci{ 38818c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 38828c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 38838c2ecf20Sopenharmony_ci unsigned partial_start, partial_end; 38848c2ecf20Sopenharmony_ci ext4_fsblk_t start, end; 38858c2ecf20Sopenharmony_ci loff_t byte_end = (lstart + length - 1); 38868c2ecf20Sopenharmony_ci int err = 0; 38878c2ecf20Sopenharmony_ci 38888c2ecf20Sopenharmony_ci partial_start = lstart & (sb->s_blocksize - 1); 38898c2ecf20Sopenharmony_ci partial_end = byte_end & (sb->s_blocksize - 1); 38908c2ecf20Sopenharmony_ci 38918c2ecf20Sopenharmony_ci start = lstart >> sb->s_blocksize_bits; 38928c2ecf20Sopenharmony_ci end = byte_end >> sb->s_blocksize_bits; 38938c2ecf20Sopenharmony_ci 38948c2ecf20Sopenharmony_ci /* Handle partial zero within the single block */ 38958c2ecf20Sopenharmony_ci if (start == end && 38968c2ecf20Sopenharmony_ci (partial_start || (partial_end != sb->s_blocksize - 1))) { 38978c2ecf20Sopenharmony_ci err = ext4_block_zero_page_range(handle, mapping, 38988c2ecf20Sopenharmony_ci lstart, length); 38998c2ecf20Sopenharmony_ci return err; 39008c2ecf20Sopenharmony_ci } 39018c2ecf20Sopenharmony_ci /* Handle partial zero out on the start of the range */ 39028c2ecf20Sopenharmony_ci if (partial_start) { 39038c2ecf20Sopenharmony_ci err = ext4_block_zero_page_range(handle, mapping, 39048c2ecf20Sopenharmony_ci lstart, sb->s_blocksize); 39058c2ecf20Sopenharmony_ci if (err) 39068c2ecf20Sopenharmony_ci return err; 39078c2ecf20Sopenharmony_ci } 39088c2ecf20Sopenharmony_ci /* Handle partial zero out on the end of the range */ 39098c2ecf20Sopenharmony_ci if (partial_end != sb->s_blocksize - 1) 39108c2ecf20Sopenharmony_ci err = ext4_block_zero_page_range(handle, mapping, 39118c2ecf20Sopenharmony_ci byte_end - partial_end, 39128c2ecf20Sopenharmony_ci partial_end + 1); 39138c2ecf20Sopenharmony_ci return err; 39148c2ecf20Sopenharmony_ci} 39158c2ecf20Sopenharmony_ci 39168c2ecf20Sopenharmony_ciint ext4_can_truncate(struct inode *inode) 39178c2ecf20Sopenharmony_ci{ 39188c2ecf20Sopenharmony_ci if (S_ISREG(inode->i_mode)) 39198c2ecf20Sopenharmony_ci return 1; 39208c2ecf20Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 39218c2ecf20Sopenharmony_ci return 1; 39228c2ecf20Sopenharmony_ci if (S_ISLNK(inode->i_mode)) 39238c2ecf20Sopenharmony_ci return !ext4_inode_is_fast_symlink(inode); 39248c2ecf20Sopenharmony_ci return 0; 39258c2ecf20Sopenharmony_ci} 39268c2ecf20Sopenharmony_ci 39278c2ecf20Sopenharmony_ci/* 39288c2ecf20Sopenharmony_ci * We have to make sure i_disksize gets properly updated before we truncate 39298c2ecf20Sopenharmony_ci * page cache due to hole punching or zero range. Otherwise i_disksize update 39308c2ecf20Sopenharmony_ci * can get lost as it may have been postponed to submission of writeback but 39318c2ecf20Sopenharmony_ci * that will never happen after we truncate page cache. 39328c2ecf20Sopenharmony_ci */ 39338c2ecf20Sopenharmony_ciint ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, 39348c2ecf20Sopenharmony_ci loff_t len) 39358c2ecf20Sopenharmony_ci{ 39368c2ecf20Sopenharmony_ci handle_t *handle; 39378c2ecf20Sopenharmony_ci int ret; 39388c2ecf20Sopenharmony_ci 39398c2ecf20Sopenharmony_ci loff_t size = i_size_read(inode); 39408c2ecf20Sopenharmony_ci 39418c2ecf20Sopenharmony_ci WARN_ON(!inode_is_locked(inode)); 39428c2ecf20Sopenharmony_ci if (offset > size || offset + len < size) 39438c2ecf20Sopenharmony_ci return 0; 39448c2ecf20Sopenharmony_ci 39458c2ecf20Sopenharmony_ci if (EXT4_I(inode)->i_disksize >= size) 39468c2ecf20Sopenharmony_ci return 0; 39478c2ecf20Sopenharmony_ci 39488c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); 39498c2ecf20Sopenharmony_ci if (IS_ERR(handle)) 39508c2ecf20Sopenharmony_ci return PTR_ERR(handle); 39518c2ecf20Sopenharmony_ci ext4_update_i_disksize(inode, size); 39528c2ecf20Sopenharmony_ci ret = ext4_mark_inode_dirty(handle, inode); 39538c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 39548c2ecf20Sopenharmony_ci 39558c2ecf20Sopenharmony_ci return ret; 39568c2ecf20Sopenharmony_ci} 39578c2ecf20Sopenharmony_ci 39588c2ecf20Sopenharmony_cistatic void ext4_wait_dax_page(struct ext4_inode_info *ei) 39598c2ecf20Sopenharmony_ci{ 39608c2ecf20Sopenharmony_ci up_write(&ei->i_mmap_sem); 39618c2ecf20Sopenharmony_ci schedule(); 39628c2ecf20Sopenharmony_ci down_write(&ei->i_mmap_sem); 39638c2ecf20Sopenharmony_ci} 39648c2ecf20Sopenharmony_ci 39658c2ecf20Sopenharmony_ciint ext4_break_layouts(struct inode *inode) 39668c2ecf20Sopenharmony_ci{ 39678c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 39688c2ecf20Sopenharmony_ci struct page *page; 39698c2ecf20Sopenharmony_ci int error; 39708c2ecf20Sopenharmony_ci 39718c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) 39728c2ecf20Sopenharmony_ci return -EINVAL; 39738c2ecf20Sopenharmony_ci 39748c2ecf20Sopenharmony_ci do { 39758c2ecf20Sopenharmony_ci page = dax_layout_busy_page(inode->i_mapping); 39768c2ecf20Sopenharmony_ci if (!page) 39778c2ecf20Sopenharmony_ci return 0; 39788c2ecf20Sopenharmony_ci 39798c2ecf20Sopenharmony_ci error = ___wait_var_event(&page->_refcount, 39808c2ecf20Sopenharmony_ci atomic_read(&page->_refcount) == 1, 39818c2ecf20Sopenharmony_ci TASK_INTERRUPTIBLE, 0, 0, 39828c2ecf20Sopenharmony_ci ext4_wait_dax_page(ei)); 39838c2ecf20Sopenharmony_ci } while (error == 0); 39848c2ecf20Sopenharmony_ci 39858c2ecf20Sopenharmony_ci return error; 39868c2ecf20Sopenharmony_ci} 39878c2ecf20Sopenharmony_ci 39888c2ecf20Sopenharmony_ci/* 39898c2ecf20Sopenharmony_ci * ext4_punch_hole: punches a hole in a file by releasing the blocks 39908c2ecf20Sopenharmony_ci * associated with the given offset and length 39918c2ecf20Sopenharmony_ci * 39928c2ecf20Sopenharmony_ci * @inode: File inode 39938c2ecf20Sopenharmony_ci * @offset: The offset where the hole will begin 39948c2ecf20Sopenharmony_ci * @len: The length of the hole 39958c2ecf20Sopenharmony_ci * 39968c2ecf20Sopenharmony_ci * Returns: 0 on success or negative on failure 39978c2ecf20Sopenharmony_ci */ 39988c2ecf20Sopenharmony_ci 39998c2ecf20Sopenharmony_ciint ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 40008c2ecf20Sopenharmony_ci{ 40018c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 40028c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 40038c2ecf20Sopenharmony_ci ext4_lblk_t first_block, stop_block; 40048c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 40058c2ecf20Sopenharmony_ci loff_t first_block_offset, last_block_offset, max_length; 40068c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 40078c2ecf20Sopenharmony_ci handle_t *handle; 40088c2ecf20Sopenharmony_ci unsigned int credits; 40098c2ecf20Sopenharmony_ci int ret = 0, ret2 = 0; 40108c2ecf20Sopenharmony_ci 40118c2ecf20Sopenharmony_ci trace_ext4_punch_hole(inode, offset, length, 0); 40128c2ecf20Sopenharmony_ci 40138c2ecf20Sopenharmony_ci /* 40148c2ecf20Sopenharmony_ci * Write out all dirty pages to avoid race conditions 40158c2ecf20Sopenharmony_ci * Then release them. 40168c2ecf20Sopenharmony_ci */ 40178c2ecf20Sopenharmony_ci if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 40188c2ecf20Sopenharmony_ci ret = filemap_write_and_wait_range(mapping, offset, 40198c2ecf20Sopenharmony_ci offset + length - 1); 40208c2ecf20Sopenharmony_ci if (ret) 40218c2ecf20Sopenharmony_ci return ret; 40228c2ecf20Sopenharmony_ci } 40238c2ecf20Sopenharmony_ci 40248c2ecf20Sopenharmony_ci inode_lock(inode); 40258c2ecf20Sopenharmony_ci 40268c2ecf20Sopenharmony_ci /* No need to punch hole beyond i_size */ 40278c2ecf20Sopenharmony_ci if (offset >= inode->i_size) 40288c2ecf20Sopenharmony_ci goto out_mutex; 40298c2ecf20Sopenharmony_ci 40308c2ecf20Sopenharmony_ci /* 40318c2ecf20Sopenharmony_ci * If the hole extends beyond i_size, set the hole 40328c2ecf20Sopenharmony_ci * to end after the page that contains i_size 40338c2ecf20Sopenharmony_ci */ 40348c2ecf20Sopenharmony_ci if (offset + length > inode->i_size) { 40358c2ecf20Sopenharmony_ci length = inode->i_size + 40368c2ecf20Sopenharmony_ci PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) - 40378c2ecf20Sopenharmony_ci offset; 40388c2ecf20Sopenharmony_ci } 40398c2ecf20Sopenharmony_ci 40408c2ecf20Sopenharmony_ci /* 40418c2ecf20Sopenharmony_ci * For punch hole the length + offset needs to be within one block 40428c2ecf20Sopenharmony_ci * before last range. Adjust the length if it goes beyond that limit. 40438c2ecf20Sopenharmony_ci */ 40448c2ecf20Sopenharmony_ci max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize; 40458c2ecf20Sopenharmony_ci if (offset + length > max_length) 40468c2ecf20Sopenharmony_ci length = max_length - offset; 40478c2ecf20Sopenharmony_ci 40488c2ecf20Sopenharmony_ci if (offset & (sb->s_blocksize - 1) || 40498c2ecf20Sopenharmony_ci (offset + length) & (sb->s_blocksize - 1)) { 40508c2ecf20Sopenharmony_ci /* 40518c2ecf20Sopenharmony_ci * Attach jinode to inode for jbd2 if we do any zeroing of 40528c2ecf20Sopenharmony_ci * partial block 40538c2ecf20Sopenharmony_ci */ 40548c2ecf20Sopenharmony_ci ret = ext4_inode_attach_jinode(inode); 40558c2ecf20Sopenharmony_ci if (ret < 0) 40568c2ecf20Sopenharmony_ci goto out_mutex; 40578c2ecf20Sopenharmony_ci 40588c2ecf20Sopenharmony_ci } 40598c2ecf20Sopenharmony_ci 40608c2ecf20Sopenharmony_ci /* Wait all existing dio workers, newcomers will block on i_mutex */ 40618c2ecf20Sopenharmony_ci inode_dio_wait(inode); 40628c2ecf20Sopenharmony_ci 40638c2ecf20Sopenharmony_ci ret = file_modified(file); 40648c2ecf20Sopenharmony_ci if (ret) 40658c2ecf20Sopenharmony_ci goto out_mutex; 40668c2ecf20Sopenharmony_ci 40678c2ecf20Sopenharmony_ci /* 40688c2ecf20Sopenharmony_ci * Prevent page faults from reinstantiating pages we have released from 40698c2ecf20Sopenharmony_ci * page cache. 40708c2ecf20Sopenharmony_ci */ 40718c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_mmap_sem); 40728c2ecf20Sopenharmony_ci 40738c2ecf20Sopenharmony_ci ret = ext4_break_layouts(inode); 40748c2ecf20Sopenharmony_ci if (ret) 40758c2ecf20Sopenharmony_ci goto out_dio; 40768c2ecf20Sopenharmony_ci 40778c2ecf20Sopenharmony_ci first_block_offset = round_up(offset, sb->s_blocksize); 40788c2ecf20Sopenharmony_ci last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; 40798c2ecf20Sopenharmony_ci 40808c2ecf20Sopenharmony_ci /* Now release the pages and zero block aligned part of pages*/ 40818c2ecf20Sopenharmony_ci if (last_block_offset > first_block_offset) { 40828c2ecf20Sopenharmony_ci ret = ext4_update_disksize_before_punch(inode, offset, length); 40838c2ecf20Sopenharmony_ci if (ret) 40848c2ecf20Sopenharmony_ci goto out_dio; 40858c2ecf20Sopenharmony_ci truncate_pagecache_range(inode, first_block_offset, 40868c2ecf20Sopenharmony_ci last_block_offset); 40878c2ecf20Sopenharmony_ci } 40888c2ecf20Sopenharmony_ci 40898c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 40908c2ecf20Sopenharmony_ci credits = ext4_writepage_trans_blocks(inode); 40918c2ecf20Sopenharmony_ci else 40928c2ecf20Sopenharmony_ci credits = ext4_blocks_for_truncate(inode); 40938c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 40948c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 40958c2ecf20Sopenharmony_ci ret = PTR_ERR(handle); 40968c2ecf20Sopenharmony_ci ext4_std_error(sb, ret); 40978c2ecf20Sopenharmony_ci goto out_dio; 40988c2ecf20Sopenharmony_ci } 40998c2ecf20Sopenharmony_ci 41008c2ecf20Sopenharmony_ci ret = ext4_zero_partial_blocks(handle, inode, offset, 41018c2ecf20Sopenharmony_ci length); 41028c2ecf20Sopenharmony_ci if (ret) 41038c2ecf20Sopenharmony_ci goto out_stop; 41048c2ecf20Sopenharmony_ci 41058c2ecf20Sopenharmony_ci first_block = (offset + sb->s_blocksize - 1) >> 41068c2ecf20Sopenharmony_ci EXT4_BLOCK_SIZE_BITS(sb); 41078c2ecf20Sopenharmony_ci stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 41088c2ecf20Sopenharmony_ci 41098c2ecf20Sopenharmony_ci /* If there are blocks to remove, do it */ 41108c2ecf20Sopenharmony_ci if (stop_block > first_block) { 41118c2ecf20Sopenharmony_ci 41128c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 41138c2ecf20Sopenharmony_ci ext4_discard_preallocations(inode, 0); 41148c2ecf20Sopenharmony_ci 41158c2ecf20Sopenharmony_ci ret = ext4_es_remove_extent(inode, first_block, 41168c2ecf20Sopenharmony_ci stop_block - first_block); 41178c2ecf20Sopenharmony_ci if (ret) { 41188c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 41198c2ecf20Sopenharmony_ci goto out_stop; 41208c2ecf20Sopenharmony_ci } 41218c2ecf20Sopenharmony_ci 41228c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 41238c2ecf20Sopenharmony_ci ret = ext4_ext_remove_space(inode, first_block, 41248c2ecf20Sopenharmony_ci stop_block - 1); 41258c2ecf20Sopenharmony_ci else 41268c2ecf20Sopenharmony_ci ret = ext4_ind_remove_space(handle, inode, first_block, 41278c2ecf20Sopenharmony_ci stop_block); 41288c2ecf20Sopenharmony_ci 41298c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 41308c2ecf20Sopenharmony_ci } 41318c2ecf20Sopenharmony_ci ext4_fc_track_range(handle, inode, first_block, stop_block); 41328c2ecf20Sopenharmony_ci if (IS_SYNC(inode)) 41338c2ecf20Sopenharmony_ci ext4_handle_sync(handle); 41348c2ecf20Sopenharmony_ci 41358c2ecf20Sopenharmony_ci inode->i_mtime = inode->i_ctime = current_time(inode); 41368c2ecf20Sopenharmony_ci ret2 = ext4_mark_inode_dirty(handle, inode); 41378c2ecf20Sopenharmony_ci if (unlikely(ret2)) 41388c2ecf20Sopenharmony_ci ret = ret2; 41398c2ecf20Sopenharmony_ci if (ret >= 0) 41408c2ecf20Sopenharmony_ci ext4_update_inode_fsync_trans(handle, inode, 1); 41418c2ecf20Sopenharmony_ciout_stop: 41428c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 41438c2ecf20Sopenharmony_ciout_dio: 41448c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_mmap_sem); 41458c2ecf20Sopenharmony_ciout_mutex: 41468c2ecf20Sopenharmony_ci inode_unlock(inode); 41478c2ecf20Sopenharmony_ci return ret; 41488c2ecf20Sopenharmony_ci} 41498c2ecf20Sopenharmony_ci 41508c2ecf20Sopenharmony_ciint ext4_inode_attach_jinode(struct inode *inode) 41518c2ecf20Sopenharmony_ci{ 41528c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 41538c2ecf20Sopenharmony_ci struct jbd2_inode *jinode; 41548c2ecf20Sopenharmony_ci 41558c2ecf20Sopenharmony_ci if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal) 41568c2ecf20Sopenharmony_ci return 0; 41578c2ecf20Sopenharmony_ci 41588c2ecf20Sopenharmony_ci jinode = jbd2_alloc_inode(GFP_KERNEL); 41598c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 41608c2ecf20Sopenharmony_ci if (!ei->jinode) { 41618c2ecf20Sopenharmony_ci if (!jinode) { 41628c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 41638c2ecf20Sopenharmony_ci return -ENOMEM; 41648c2ecf20Sopenharmony_ci } 41658c2ecf20Sopenharmony_ci ei->jinode = jinode; 41668c2ecf20Sopenharmony_ci jbd2_journal_init_jbd_inode(ei->jinode, inode); 41678c2ecf20Sopenharmony_ci jinode = NULL; 41688c2ecf20Sopenharmony_ci } 41698c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 41708c2ecf20Sopenharmony_ci if (unlikely(jinode != NULL)) 41718c2ecf20Sopenharmony_ci jbd2_free_inode(jinode); 41728c2ecf20Sopenharmony_ci return 0; 41738c2ecf20Sopenharmony_ci} 41748c2ecf20Sopenharmony_ci 41758c2ecf20Sopenharmony_ci/* 41768c2ecf20Sopenharmony_ci * ext4_truncate() 41778c2ecf20Sopenharmony_ci * 41788c2ecf20Sopenharmony_ci * We block out ext4_get_block() block instantiations across the entire 41798c2ecf20Sopenharmony_ci * transaction, and VFS/VM ensures that ext4_truncate() cannot run 41808c2ecf20Sopenharmony_ci * simultaneously on behalf of the same inode. 41818c2ecf20Sopenharmony_ci * 41828c2ecf20Sopenharmony_ci * As we work through the truncate and commit bits of it to the journal there 41838c2ecf20Sopenharmony_ci * is one core, guiding principle: the file's tree must always be consistent on 41848c2ecf20Sopenharmony_ci * disk. We must be able to restart the truncate after a crash. 41858c2ecf20Sopenharmony_ci * 41868c2ecf20Sopenharmony_ci * The file's tree may be transiently inconsistent in memory (although it 41878c2ecf20Sopenharmony_ci * probably isn't), but whenever we close off and commit a journal transaction, 41888c2ecf20Sopenharmony_ci * the contents of (the filesystem + the journal) must be consistent and 41898c2ecf20Sopenharmony_ci * restartable. It's pretty simple, really: bottom up, right to left (although 41908c2ecf20Sopenharmony_ci * left-to-right works OK too). 41918c2ecf20Sopenharmony_ci * 41928c2ecf20Sopenharmony_ci * Note that at recovery time, journal replay occurs *before* the restart of 41938c2ecf20Sopenharmony_ci * truncate against the orphan inode list. 41948c2ecf20Sopenharmony_ci * 41958c2ecf20Sopenharmony_ci * The committed inode has the new, desired i_size (which is the same as 41968c2ecf20Sopenharmony_ci * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see 41978c2ecf20Sopenharmony_ci * that this inode's truncate did not complete and it will again call 41988c2ecf20Sopenharmony_ci * ext4_truncate() to have another go. So there will be instantiated blocks 41998c2ecf20Sopenharmony_ci * to the right of the truncation point in a crashed ext4 filesystem. But 42008c2ecf20Sopenharmony_ci * that's fine - as long as they are linked from the inode, the post-crash 42018c2ecf20Sopenharmony_ci * ext4_truncate() run will find them and release them. 42028c2ecf20Sopenharmony_ci */ 42038c2ecf20Sopenharmony_ciint ext4_truncate(struct inode *inode) 42048c2ecf20Sopenharmony_ci{ 42058c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 42068c2ecf20Sopenharmony_ci unsigned int credits; 42078c2ecf20Sopenharmony_ci int err = 0, err2; 42088c2ecf20Sopenharmony_ci handle_t *handle; 42098c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 42108c2ecf20Sopenharmony_ci 42118c2ecf20Sopenharmony_ci /* 42128c2ecf20Sopenharmony_ci * There is a possibility that we're either freeing the inode 42138c2ecf20Sopenharmony_ci * or it's a completely new inode. In those cases we might not 42148c2ecf20Sopenharmony_ci * have i_mutex locked because it's not necessary. 42158c2ecf20Sopenharmony_ci */ 42168c2ecf20Sopenharmony_ci if (!(inode->i_state & (I_NEW|I_FREEING))) 42178c2ecf20Sopenharmony_ci WARN_ON(!inode_is_locked(inode)); 42188c2ecf20Sopenharmony_ci trace_ext4_truncate_enter(inode); 42198c2ecf20Sopenharmony_ci 42208c2ecf20Sopenharmony_ci if (!ext4_can_truncate(inode)) 42218c2ecf20Sopenharmony_ci goto out_trace; 42228c2ecf20Sopenharmony_ci 42238c2ecf20Sopenharmony_ci if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 42248c2ecf20Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 42258c2ecf20Sopenharmony_ci 42268c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) { 42278c2ecf20Sopenharmony_ci int has_inline = 1; 42288c2ecf20Sopenharmony_ci 42298c2ecf20Sopenharmony_ci err = ext4_inline_data_truncate(inode, &has_inline); 42308c2ecf20Sopenharmony_ci if (err || has_inline) 42318c2ecf20Sopenharmony_ci goto out_trace; 42328c2ecf20Sopenharmony_ci } 42338c2ecf20Sopenharmony_ci 42348c2ecf20Sopenharmony_ci /* If we zero-out tail of the page, we have to create jinode for jbd2 */ 42358c2ecf20Sopenharmony_ci if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { 42368c2ecf20Sopenharmony_ci err = ext4_inode_attach_jinode(inode); 42378c2ecf20Sopenharmony_ci if (err) 42388c2ecf20Sopenharmony_ci goto out_trace; 42398c2ecf20Sopenharmony_ci } 42408c2ecf20Sopenharmony_ci 42418c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 42428c2ecf20Sopenharmony_ci credits = ext4_writepage_trans_blocks(inode); 42438c2ecf20Sopenharmony_ci else 42448c2ecf20Sopenharmony_ci credits = ext4_blocks_for_truncate(inode); 42458c2ecf20Sopenharmony_ci 42468c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 42478c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 42488c2ecf20Sopenharmony_ci err = PTR_ERR(handle); 42498c2ecf20Sopenharmony_ci goto out_trace; 42508c2ecf20Sopenharmony_ci } 42518c2ecf20Sopenharmony_ci 42528c2ecf20Sopenharmony_ci if (inode->i_size & (inode->i_sb->s_blocksize - 1)) 42538c2ecf20Sopenharmony_ci ext4_block_truncate_page(handle, mapping, inode->i_size); 42548c2ecf20Sopenharmony_ci 42558c2ecf20Sopenharmony_ci /* 42568c2ecf20Sopenharmony_ci * We add the inode to the orphan list, so that if this 42578c2ecf20Sopenharmony_ci * truncate spans multiple transactions, and we crash, we will 42588c2ecf20Sopenharmony_ci * resume the truncate when the filesystem recovers. It also 42598c2ecf20Sopenharmony_ci * marks the inode dirty, to catch the new size. 42608c2ecf20Sopenharmony_ci * 42618c2ecf20Sopenharmony_ci * Implication: the file must always be in a sane, consistent 42628c2ecf20Sopenharmony_ci * truncatable state while each transaction commits. 42638c2ecf20Sopenharmony_ci */ 42648c2ecf20Sopenharmony_ci err = ext4_orphan_add(handle, inode); 42658c2ecf20Sopenharmony_ci if (err) 42668c2ecf20Sopenharmony_ci goto out_stop; 42678c2ecf20Sopenharmony_ci 42688c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 42698c2ecf20Sopenharmony_ci 42708c2ecf20Sopenharmony_ci ext4_discard_preallocations(inode, 0); 42718c2ecf20Sopenharmony_ci 42728c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 42738c2ecf20Sopenharmony_ci err = ext4_ext_truncate(handle, inode); 42748c2ecf20Sopenharmony_ci else 42758c2ecf20Sopenharmony_ci ext4_ind_truncate(handle, inode); 42768c2ecf20Sopenharmony_ci 42778c2ecf20Sopenharmony_ci up_write(&ei->i_data_sem); 42788c2ecf20Sopenharmony_ci if (err) 42798c2ecf20Sopenharmony_ci goto out_stop; 42808c2ecf20Sopenharmony_ci 42818c2ecf20Sopenharmony_ci if (IS_SYNC(inode)) 42828c2ecf20Sopenharmony_ci ext4_handle_sync(handle); 42838c2ecf20Sopenharmony_ci 42848c2ecf20Sopenharmony_ciout_stop: 42858c2ecf20Sopenharmony_ci /* 42868c2ecf20Sopenharmony_ci * If this was a simple ftruncate() and the file will remain alive, 42878c2ecf20Sopenharmony_ci * then we need to clear up the orphan record which we created above. 42888c2ecf20Sopenharmony_ci * However, if this was a real unlink then we were called by 42898c2ecf20Sopenharmony_ci * ext4_evict_inode(), and we allow that function to clean up the 42908c2ecf20Sopenharmony_ci * orphan info for us. 42918c2ecf20Sopenharmony_ci */ 42928c2ecf20Sopenharmony_ci if (inode->i_nlink) 42938c2ecf20Sopenharmony_ci ext4_orphan_del(handle, inode); 42948c2ecf20Sopenharmony_ci 42958c2ecf20Sopenharmony_ci inode->i_mtime = inode->i_ctime = current_time(inode); 42968c2ecf20Sopenharmony_ci err2 = ext4_mark_inode_dirty(handle, inode); 42978c2ecf20Sopenharmony_ci if (unlikely(err2 && !err)) 42988c2ecf20Sopenharmony_ci err = err2; 42998c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 43008c2ecf20Sopenharmony_ci 43018c2ecf20Sopenharmony_ciout_trace: 43028c2ecf20Sopenharmony_ci trace_ext4_truncate_exit(inode); 43038c2ecf20Sopenharmony_ci return err; 43048c2ecf20Sopenharmony_ci} 43058c2ecf20Sopenharmony_ci 43068c2ecf20Sopenharmony_cistatic inline u64 ext4_inode_peek_iversion(const struct inode *inode) 43078c2ecf20Sopenharmony_ci{ 43088c2ecf20Sopenharmony_ci if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 43098c2ecf20Sopenharmony_ci return inode_peek_iversion_raw(inode); 43108c2ecf20Sopenharmony_ci else 43118c2ecf20Sopenharmony_ci return inode_peek_iversion(inode); 43128c2ecf20Sopenharmony_ci} 43138c2ecf20Sopenharmony_ci 43148c2ecf20Sopenharmony_cistatic int ext4_inode_blocks_set(struct ext4_inode *raw_inode, 43158c2ecf20Sopenharmony_ci struct ext4_inode_info *ei) 43168c2ecf20Sopenharmony_ci{ 43178c2ecf20Sopenharmony_ci struct inode *inode = &(ei->vfs_inode); 43188c2ecf20Sopenharmony_ci u64 i_blocks = READ_ONCE(inode->i_blocks); 43198c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 43208c2ecf20Sopenharmony_ci 43218c2ecf20Sopenharmony_ci if (i_blocks <= ~0U) { 43228c2ecf20Sopenharmony_ci /* 43238c2ecf20Sopenharmony_ci * i_blocks can be represented in a 32 bit variable 43248c2ecf20Sopenharmony_ci * as multiple of 512 bytes 43258c2ecf20Sopenharmony_ci */ 43268c2ecf20Sopenharmony_ci raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 43278c2ecf20Sopenharmony_ci raw_inode->i_blocks_high = 0; 43288c2ecf20Sopenharmony_ci ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 43298c2ecf20Sopenharmony_ci return 0; 43308c2ecf20Sopenharmony_ci } 43318c2ecf20Sopenharmony_ci 43328c2ecf20Sopenharmony_ci /* 43338c2ecf20Sopenharmony_ci * This should never happen since sb->s_maxbytes should not have 43348c2ecf20Sopenharmony_ci * allowed this, sb->s_maxbytes was set according to the huge_file 43358c2ecf20Sopenharmony_ci * feature in ext4_fill_super(). 43368c2ecf20Sopenharmony_ci */ 43378c2ecf20Sopenharmony_ci if (!ext4_has_feature_huge_file(sb)) 43388c2ecf20Sopenharmony_ci return -EFSCORRUPTED; 43398c2ecf20Sopenharmony_ci 43408c2ecf20Sopenharmony_ci if (i_blocks <= 0xffffffffffffULL) { 43418c2ecf20Sopenharmony_ci /* 43428c2ecf20Sopenharmony_ci * i_blocks can be represented in a 48 bit variable 43438c2ecf20Sopenharmony_ci * as multiple of 512 bytes 43448c2ecf20Sopenharmony_ci */ 43458c2ecf20Sopenharmony_ci raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 43468c2ecf20Sopenharmony_ci raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 43478c2ecf20Sopenharmony_ci ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 43488c2ecf20Sopenharmony_ci } else { 43498c2ecf20Sopenharmony_ci ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); 43508c2ecf20Sopenharmony_ci /* i_block is stored in file system block size */ 43518c2ecf20Sopenharmony_ci i_blocks = i_blocks >> (inode->i_blkbits - 9); 43528c2ecf20Sopenharmony_ci raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 43538c2ecf20Sopenharmony_ci raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 43548c2ecf20Sopenharmony_ci } 43558c2ecf20Sopenharmony_ci return 0; 43568c2ecf20Sopenharmony_ci} 43578c2ecf20Sopenharmony_ci 43588c2ecf20Sopenharmony_cistatic int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) 43598c2ecf20Sopenharmony_ci{ 43608c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 43618c2ecf20Sopenharmony_ci uid_t i_uid; 43628c2ecf20Sopenharmony_ci gid_t i_gid; 43638c2ecf20Sopenharmony_ci projid_t i_projid; 43648c2ecf20Sopenharmony_ci int block; 43658c2ecf20Sopenharmony_ci int err; 43668c2ecf20Sopenharmony_ci 43678c2ecf20Sopenharmony_ci err = ext4_inode_blocks_set(raw_inode, ei); 43688c2ecf20Sopenharmony_ci 43698c2ecf20Sopenharmony_ci raw_inode->i_mode = cpu_to_le16(inode->i_mode); 43708c2ecf20Sopenharmony_ci i_uid = i_uid_read(inode); 43718c2ecf20Sopenharmony_ci i_gid = i_gid_read(inode); 43728c2ecf20Sopenharmony_ci i_projid = from_kprojid(&init_user_ns, ei->i_projid); 43738c2ecf20Sopenharmony_ci if (!(test_opt(inode->i_sb, NO_UID32))) { 43748c2ecf20Sopenharmony_ci raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 43758c2ecf20Sopenharmony_ci raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 43768c2ecf20Sopenharmony_ci /* 43778c2ecf20Sopenharmony_ci * Fix up interoperability with old kernels. Otherwise, 43788c2ecf20Sopenharmony_ci * old inodes get re-used with the upper 16 bits of the 43798c2ecf20Sopenharmony_ci * uid/gid intact. 43808c2ecf20Sopenharmony_ci */ 43818c2ecf20Sopenharmony_ci if (ei->i_dtime && list_empty(&ei->i_orphan)) { 43828c2ecf20Sopenharmony_ci raw_inode->i_uid_high = 0; 43838c2ecf20Sopenharmony_ci raw_inode->i_gid_high = 0; 43848c2ecf20Sopenharmony_ci } else { 43858c2ecf20Sopenharmony_ci raw_inode->i_uid_high = 43868c2ecf20Sopenharmony_ci cpu_to_le16(high_16_bits(i_uid)); 43878c2ecf20Sopenharmony_ci raw_inode->i_gid_high = 43888c2ecf20Sopenharmony_ci cpu_to_le16(high_16_bits(i_gid)); 43898c2ecf20Sopenharmony_ci } 43908c2ecf20Sopenharmony_ci } else { 43918c2ecf20Sopenharmony_ci raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); 43928c2ecf20Sopenharmony_ci raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); 43938c2ecf20Sopenharmony_ci raw_inode->i_uid_high = 0; 43948c2ecf20Sopenharmony_ci raw_inode->i_gid_high = 0; 43958c2ecf20Sopenharmony_ci } 43968c2ecf20Sopenharmony_ci raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 43978c2ecf20Sopenharmony_ci 43988c2ecf20Sopenharmony_ci EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); 43998c2ecf20Sopenharmony_ci EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 44008c2ecf20Sopenharmony_ci EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 44018c2ecf20Sopenharmony_ci EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 44028c2ecf20Sopenharmony_ci 44038c2ecf20Sopenharmony_ci raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 44048c2ecf20Sopenharmony_ci raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 44058c2ecf20Sopenharmony_ci if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 44068c2ecf20Sopenharmony_ci raw_inode->i_file_acl_high = 44078c2ecf20Sopenharmony_ci cpu_to_le16(ei->i_file_acl >> 32); 44088c2ecf20Sopenharmony_ci raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 44098c2ecf20Sopenharmony_ci ext4_isize_set(raw_inode, ei->i_disksize); 44108c2ecf20Sopenharmony_ci 44118c2ecf20Sopenharmony_ci raw_inode->i_generation = cpu_to_le32(inode->i_generation); 44128c2ecf20Sopenharmony_ci if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 44138c2ecf20Sopenharmony_ci if (old_valid_dev(inode->i_rdev)) { 44148c2ecf20Sopenharmony_ci raw_inode->i_block[0] = 44158c2ecf20Sopenharmony_ci cpu_to_le32(old_encode_dev(inode->i_rdev)); 44168c2ecf20Sopenharmony_ci raw_inode->i_block[1] = 0; 44178c2ecf20Sopenharmony_ci } else { 44188c2ecf20Sopenharmony_ci raw_inode->i_block[0] = 0; 44198c2ecf20Sopenharmony_ci raw_inode->i_block[1] = 44208c2ecf20Sopenharmony_ci cpu_to_le32(new_encode_dev(inode->i_rdev)); 44218c2ecf20Sopenharmony_ci raw_inode->i_block[2] = 0; 44228c2ecf20Sopenharmony_ci } 44238c2ecf20Sopenharmony_ci } else if (!ext4_has_inline_data(inode)) { 44248c2ecf20Sopenharmony_ci for (block = 0; block < EXT4_N_BLOCKS; block++) 44258c2ecf20Sopenharmony_ci raw_inode->i_block[block] = ei->i_data[block]; 44268c2ecf20Sopenharmony_ci } 44278c2ecf20Sopenharmony_ci 44288c2ecf20Sopenharmony_ci if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 44298c2ecf20Sopenharmony_ci u64 ivers = ext4_inode_peek_iversion(inode); 44308c2ecf20Sopenharmony_ci 44318c2ecf20Sopenharmony_ci raw_inode->i_disk_version = cpu_to_le32(ivers); 44328c2ecf20Sopenharmony_ci if (ei->i_extra_isize) { 44338c2ecf20Sopenharmony_ci if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 44348c2ecf20Sopenharmony_ci raw_inode->i_version_hi = 44358c2ecf20Sopenharmony_ci cpu_to_le32(ivers >> 32); 44368c2ecf20Sopenharmony_ci raw_inode->i_extra_isize = 44378c2ecf20Sopenharmony_ci cpu_to_le16(ei->i_extra_isize); 44388c2ecf20Sopenharmony_ci } 44398c2ecf20Sopenharmony_ci } 44408c2ecf20Sopenharmony_ci 44418c2ecf20Sopenharmony_ci if (i_projid != EXT4_DEF_PROJID && 44428c2ecf20Sopenharmony_ci !ext4_has_feature_project(inode->i_sb)) 44438c2ecf20Sopenharmony_ci err = err ?: -EFSCORRUPTED; 44448c2ecf20Sopenharmony_ci 44458c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 44468c2ecf20Sopenharmony_ci EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) 44478c2ecf20Sopenharmony_ci raw_inode->i_projid = cpu_to_le32(i_projid); 44488c2ecf20Sopenharmony_ci 44498c2ecf20Sopenharmony_ci ext4_inode_csum_set(inode, raw_inode, ei); 44508c2ecf20Sopenharmony_ci return err; 44518c2ecf20Sopenharmony_ci} 44528c2ecf20Sopenharmony_ci 44538c2ecf20Sopenharmony_ci/* 44548c2ecf20Sopenharmony_ci * ext4_get_inode_loc returns with an extra refcount against the inode's 44558c2ecf20Sopenharmony_ci * underlying buffer_head on success. If we pass 'inode' and it does not 44568c2ecf20Sopenharmony_ci * have in-inode xattr, we have all inode data in memory that is needed 44578c2ecf20Sopenharmony_ci * to recreate the on-disk version of this inode. 44588c2ecf20Sopenharmony_ci */ 44598c2ecf20Sopenharmony_cistatic int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, 44608c2ecf20Sopenharmony_ci struct inode *inode, struct ext4_iloc *iloc, 44618c2ecf20Sopenharmony_ci ext4_fsblk_t *ret_block) 44628c2ecf20Sopenharmony_ci{ 44638c2ecf20Sopenharmony_ci struct ext4_group_desc *gdp; 44648c2ecf20Sopenharmony_ci struct buffer_head *bh; 44658c2ecf20Sopenharmony_ci ext4_fsblk_t block; 44668c2ecf20Sopenharmony_ci struct blk_plug plug; 44678c2ecf20Sopenharmony_ci int inodes_per_block, inode_offset; 44688c2ecf20Sopenharmony_ci 44698c2ecf20Sopenharmony_ci iloc->bh = NULL; 44708c2ecf20Sopenharmony_ci if (ino < EXT4_ROOT_INO || 44718c2ecf20Sopenharmony_ci ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 44728c2ecf20Sopenharmony_ci return -EFSCORRUPTED; 44738c2ecf20Sopenharmony_ci 44748c2ecf20Sopenharmony_ci iloc->block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 44758c2ecf20Sopenharmony_ci gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); 44768c2ecf20Sopenharmony_ci if (!gdp) 44778c2ecf20Sopenharmony_ci return -EIO; 44788c2ecf20Sopenharmony_ci 44798c2ecf20Sopenharmony_ci /* 44808c2ecf20Sopenharmony_ci * Figure out the offset within the block group inode table 44818c2ecf20Sopenharmony_ci */ 44828c2ecf20Sopenharmony_ci inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 44838c2ecf20Sopenharmony_ci inode_offset = ((ino - 1) % 44848c2ecf20Sopenharmony_ci EXT4_INODES_PER_GROUP(sb)); 44858c2ecf20Sopenharmony_ci iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); 44868c2ecf20Sopenharmony_ci 44878c2ecf20Sopenharmony_ci block = ext4_inode_table(sb, gdp); 44888c2ecf20Sopenharmony_ci if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) || 44898c2ecf20Sopenharmony_ci (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) { 44908c2ecf20Sopenharmony_ci ext4_error(sb, "Invalid inode table block %llu in " 44918c2ecf20Sopenharmony_ci "block_group %u", block, iloc->block_group); 44928c2ecf20Sopenharmony_ci return -EFSCORRUPTED; 44938c2ecf20Sopenharmony_ci } 44948c2ecf20Sopenharmony_ci block += (inode_offset / inodes_per_block); 44958c2ecf20Sopenharmony_ci 44968c2ecf20Sopenharmony_ci bh = sb_getblk(sb, block); 44978c2ecf20Sopenharmony_ci if (unlikely(!bh)) 44988c2ecf20Sopenharmony_ci return -ENOMEM; 44998c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 45008c2ecf20Sopenharmony_ci lock_buffer(bh); 45018c2ecf20Sopenharmony_ci 45028c2ecf20Sopenharmony_ci if (ext4_buffer_uptodate(bh)) { 45038c2ecf20Sopenharmony_ci /* someone brought it uptodate while we waited */ 45048c2ecf20Sopenharmony_ci unlock_buffer(bh); 45058c2ecf20Sopenharmony_ci goto has_buffer; 45068c2ecf20Sopenharmony_ci } 45078c2ecf20Sopenharmony_ci 45088c2ecf20Sopenharmony_ci /* 45098c2ecf20Sopenharmony_ci * If we have all information of the inode in memory and this 45108c2ecf20Sopenharmony_ci * is the only valid inode in the block, we need not read the 45118c2ecf20Sopenharmony_ci * block. 45128c2ecf20Sopenharmony_ci */ 45138c2ecf20Sopenharmony_ci if (inode && !ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 45148c2ecf20Sopenharmony_ci struct buffer_head *bitmap_bh; 45158c2ecf20Sopenharmony_ci int i, start; 45168c2ecf20Sopenharmony_ci 45178c2ecf20Sopenharmony_ci start = inode_offset & ~(inodes_per_block - 1); 45188c2ecf20Sopenharmony_ci 45198c2ecf20Sopenharmony_ci /* Is the inode bitmap in cache? */ 45208c2ecf20Sopenharmony_ci bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); 45218c2ecf20Sopenharmony_ci if (unlikely(!bitmap_bh)) 45228c2ecf20Sopenharmony_ci goto make_io; 45238c2ecf20Sopenharmony_ci 45248c2ecf20Sopenharmony_ci /* 45258c2ecf20Sopenharmony_ci * If the inode bitmap isn't in cache then the 45268c2ecf20Sopenharmony_ci * optimisation may end up performing two reads instead 45278c2ecf20Sopenharmony_ci * of one, so skip it. 45288c2ecf20Sopenharmony_ci */ 45298c2ecf20Sopenharmony_ci if (!buffer_uptodate(bitmap_bh)) { 45308c2ecf20Sopenharmony_ci brelse(bitmap_bh); 45318c2ecf20Sopenharmony_ci goto make_io; 45328c2ecf20Sopenharmony_ci } 45338c2ecf20Sopenharmony_ci for (i = start; i < start + inodes_per_block; i++) { 45348c2ecf20Sopenharmony_ci if (i == inode_offset) 45358c2ecf20Sopenharmony_ci continue; 45368c2ecf20Sopenharmony_ci if (ext4_test_bit(i, bitmap_bh->b_data)) 45378c2ecf20Sopenharmony_ci break; 45388c2ecf20Sopenharmony_ci } 45398c2ecf20Sopenharmony_ci brelse(bitmap_bh); 45408c2ecf20Sopenharmony_ci if (i == start + inodes_per_block) { 45418c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode = 45428c2ecf20Sopenharmony_ci (struct ext4_inode *) (bh->b_data + iloc->offset); 45438c2ecf20Sopenharmony_ci 45448c2ecf20Sopenharmony_ci /* all other inodes are free, so skip I/O */ 45458c2ecf20Sopenharmony_ci memset(bh->b_data, 0, bh->b_size); 45468c2ecf20Sopenharmony_ci if (!ext4_test_inode_state(inode, EXT4_STATE_NEW)) 45478c2ecf20Sopenharmony_ci ext4_fill_raw_inode(inode, raw_inode); 45488c2ecf20Sopenharmony_ci set_buffer_uptodate(bh); 45498c2ecf20Sopenharmony_ci unlock_buffer(bh); 45508c2ecf20Sopenharmony_ci goto has_buffer; 45518c2ecf20Sopenharmony_ci } 45528c2ecf20Sopenharmony_ci } 45538c2ecf20Sopenharmony_ci 45548c2ecf20Sopenharmony_cimake_io: 45558c2ecf20Sopenharmony_ci /* 45568c2ecf20Sopenharmony_ci * If we need to do any I/O, try to pre-readahead extra 45578c2ecf20Sopenharmony_ci * blocks from the inode table. 45588c2ecf20Sopenharmony_ci */ 45598c2ecf20Sopenharmony_ci blk_start_plug(&plug); 45608c2ecf20Sopenharmony_ci if (EXT4_SB(sb)->s_inode_readahead_blks) { 45618c2ecf20Sopenharmony_ci ext4_fsblk_t b, end, table; 45628c2ecf20Sopenharmony_ci unsigned num; 45638c2ecf20Sopenharmony_ci __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks; 45648c2ecf20Sopenharmony_ci 45658c2ecf20Sopenharmony_ci table = ext4_inode_table(sb, gdp); 45668c2ecf20Sopenharmony_ci /* s_inode_readahead_blks is always a power of 2 */ 45678c2ecf20Sopenharmony_ci b = block & ~((ext4_fsblk_t) ra_blks - 1); 45688c2ecf20Sopenharmony_ci if (table > b) 45698c2ecf20Sopenharmony_ci b = table; 45708c2ecf20Sopenharmony_ci end = b + ra_blks; 45718c2ecf20Sopenharmony_ci num = EXT4_INODES_PER_GROUP(sb); 45728c2ecf20Sopenharmony_ci if (ext4_has_group_desc_csum(sb)) 45738c2ecf20Sopenharmony_ci num -= ext4_itable_unused_count(sb, gdp); 45748c2ecf20Sopenharmony_ci table += num / inodes_per_block; 45758c2ecf20Sopenharmony_ci if (end > table) 45768c2ecf20Sopenharmony_ci end = table; 45778c2ecf20Sopenharmony_ci while (b <= end) 45788c2ecf20Sopenharmony_ci ext4_sb_breadahead_unmovable(sb, b++); 45798c2ecf20Sopenharmony_ci } 45808c2ecf20Sopenharmony_ci 45818c2ecf20Sopenharmony_ci /* 45828c2ecf20Sopenharmony_ci * There are other valid inodes in the buffer, this inode 45838c2ecf20Sopenharmony_ci * has in-inode xattrs, or we don't have this inode in memory. 45848c2ecf20Sopenharmony_ci * Read the block from disk. 45858c2ecf20Sopenharmony_ci */ 45868c2ecf20Sopenharmony_ci trace_ext4_load_inode(sb, ino); 45878c2ecf20Sopenharmony_ci ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL); 45888c2ecf20Sopenharmony_ci blk_finish_plug(&plug); 45898c2ecf20Sopenharmony_ci wait_on_buffer(bh); 45908c2ecf20Sopenharmony_ci ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO); 45918c2ecf20Sopenharmony_ci if (!buffer_uptodate(bh)) { 45928c2ecf20Sopenharmony_ci if (ret_block) 45938c2ecf20Sopenharmony_ci *ret_block = block; 45948c2ecf20Sopenharmony_ci brelse(bh); 45958c2ecf20Sopenharmony_ci return -EIO; 45968c2ecf20Sopenharmony_ci } 45978c2ecf20Sopenharmony_ci } 45988c2ecf20Sopenharmony_cihas_buffer: 45998c2ecf20Sopenharmony_ci iloc->bh = bh; 46008c2ecf20Sopenharmony_ci return 0; 46018c2ecf20Sopenharmony_ci} 46028c2ecf20Sopenharmony_ci 46038c2ecf20Sopenharmony_cistatic int __ext4_get_inode_loc_noinmem(struct inode *inode, 46048c2ecf20Sopenharmony_ci struct ext4_iloc *iloc) 46058c2ecf20Sopenharmony_ci{ 46068c2ecf20Sopenharmony_ci ext4_fsblk_t err_blk = 0; 46078c2ecf20Sopenharmony_ci int ret; 46088c2ecf20Sopenharmony_ci 46098c2ecf20Sopenharmony_ci ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, 46108c2ecf20Sopenharmony_ci &err_blk); 46118c2ecf20Sopenharmony_ci 46128c2ecf20Sopenharmony_ci if (ret == -EIO) 46138c2ecf20Sopenharmony_ci ext4_error_inode_block(inode, err_blk, EIO, 46148c2ecf20Sopenharmony_ci "unable to read itable block"); 46158c2ecf20Sopenharmony_ci 46168c2ecf20Sopenharmony_ci return ret; 46178c2ecf20Sopenharmony_ci} 46188c2ecf20Sopenharmony_ci 46198c2ecf20Sopenharmony_ciint ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) 46208c2ecf20Sopenharmony_ci{ 46218c2ecf20Sopenharmony_ci ext4_fsblk_t err_blk = 0; 46228c2ecf20Sopenharmony_ci int ret; 46238c2ecf20Sopenharmony_ci 46248c2ecf20Sopenharmony_ci ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, 46258c2ecf20Sopenharmony_ci &err_blk); 46268c2ecf20Sopenharmony_ci 46278c2ecf20Sopenharmony_ci if (ret == -EIO) 46288c2ecf20Sopenharmony_ci ext4_error_inode_block(inode, err_blk, EIO, 46298c2ecf20Sopenharmony_ci "unable to read itable block"); 46308c2ecf20Sopenharmony_ci 46318c2ecf20Sopenharmony_ci return ret; 46328c2ecf20Sopenharmony_ci} 46338c2ecf20Sopenharmony_ci 46348c2ecf20Sopenharmony_ci 46358c2ecf20Sopenharmony_ciint ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino, 46368c2ecf20Sopenharmony_ci struct ext4_iloc *iloc) 46378c2ecf20Sopenharmony_ci{ 46388c2ecf20Sopenharmony_ci return __ext4_get_inode_loc(sb, ino, NULL, iloc, NULL); 46398c2ecf20Sopenharmony_ci} 46408c2ecf20Sopenharmony_ci 46418c2ecf20Sopenharmony_cistatic bool ext4_should_enable_dax(struct inode *inode) 46428c2ecf20Sopenharmony_ci{ 46438c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 46448c2ecf20Sopenharmony_ci 46458c2ecf20Sopenharmony_ci if (test_opt2(inode->i_sb, DAX_NEVER)) 46468c2ecf20Sopenharmony_ci return false; 46478c2ecf20Sopenharmony_ci if (!S_ISREG(inode->i_mode)) 46488c2ecf20Sopenharmony_ci return false; 46498c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) 46508c2ecf20Sopenharmony_ci return false; 46518c2ecf20Sopenharmony_ci if (ext4_has_inline_data(inode)) 46528c2ecf20Sopenharmony_ci return false; 46538c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT)) 46548c2ecf20Sopenharmony_ci return false; 46558c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY)) 46568c2ecf20Sopenharmony_ci return false; 46578c2ecf20Sopenharmony_ci if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) 46588c2ecf20Sopenharmony_ci return false; 46598c2ecf20Sopenharmony_ci if (test_opt(inode->i_sb, DAX_ALWAYS)) 46608c2ecf20Sopenharmony_ci return true; 46618c2ecf20Sopenharmony_ci 46628c2ecf20Sopenharmony_ci return ext4_test_inode_flag(inode, EXT4_INODE_DAX); 46638c2ecf20Sopenharmony_ci} 46648c2ecf20Sopenharmony_ci 46658c2ecf20Sopenharmony_civoid ext4_set_inode_flags(struct inode *inode, bool init) 46668c2ecf20Sopenharmony_ci{ 46678c2ecf20Sopenharmony_ci unsigned int flags = EXT4_I(inode)->i_flags; 46688c2ecf20Sopenharmony_ci unsigned int new_fl = 0; 46698c2ecf20Sopenharmony_ci 46708c2ecf20Sopenharmony_ci WARN_ON_ONCE(IS_DAX(inode) && init); 46718c2ecf20Sopenharmony_ci 46728c2ecf20Sopenharmony_ci if (flags & EXT4_SYNC_FL) 46738c2ecf20Sopenharmony_ci new_fl |= S_SYNC; 46748c2ecf20Sopenharmony_ci if (flags & EXT4_APPEND_FL) 46758c2ecf20Sopenharmony_ci new_fl |= S_APPEND; 46768c2ecf20Sopenharmony_ci if (flags & EXT4_IMMUTABLE_FL) 46778c2ecf20Sopenharmony_ci new_fl |= S_IMMUTABLE; 46788c2ecf20Sopenharmony_ci if (flags & EXT4_NOATIME_FL) 46798c2ecf20Sopenharmony_ci new_fl |= S_NOATIME; 46808c2ecf20Sopenharmony_ci if (flags & EXT4_DIRSYNC_FL) 46818c2ecf20Sopenharmony_ci new_fl |= S_DIRSYNC; 46828c2ecf20Sopenharmony_ci 46838c2ecf20Sopenharmony_ci /* Because of the way inode_set_flags() works we must preserve S_DAX 46848c2ecf20Sopenharmony_ci * here if already set. */ 46858c2ecf20Sopenharmony_ci new_fl |= (inode->i_flags & S_DAX); 46868c2ecf20Sopenharmony_ci if (init && ext4_should_enable_dax(inode)) 46878c2ecf20Sopenharmony_ci new_fl |= S_DAX; 46888c2ecf20Sopenharmony_ci 46898c2ecf20Sopenharmony_ci if (flags & EXT4_ENCRYPT_FL) 46908c2ecf20Sopenharmony_ci new_fl |= S_ENCRYPTED; 46918c2ecf20Sopenharmony_ci if (flags & EXT4_CASEFOLD_FL) 46928c2ecf20Sopenharmony_ci new_fl |= S_CASEFOLD; 46938c2ecf20Sopenharmony_ci if (flags & EXT4_VERITY_FL) 46948c2ecf20Sopenharmony_ci new_fl |= S_VERITY; 46958c2ecf20Sopenharmony_ci inode_set_flags(inode, new_fl, 46968c2ecf20Sopenharmony_ci S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| 46978c2ecf20Sopenharmony_ci S_ENCRYPTED|S_CASEFOLD|S_VERITY); 46988c2ecf20Sopenharmony_ci} 46998c2ecf20Sopenharmony_ci 47008c2ecf20Sopenharmony_cistatic blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 47018c2ecf20Sopenharmony_ci struct ext4_inode_info *ei) 47028c2ecf20Sopenharmony_ci{ 47038c2ecf20Sopenharmony_ci blkcnt_t i_blocks ; 47048c2ecf20Sopenharmony_ci struct inode *inode = &(ei->vfs_inode); 47058c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 47068c2ecf20Sopenharmony_ci 47078c2ecf20Sopenharmony_ci if (ext4_has_feature_huge_file(sb)) { 47088c2ecf20Sopenharmony_ci /* we are using combined 48 bit field */ 47098c2ecf20Sopenharmony_ci i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | 47108c2ecf20Sopenharmony_ci le32_to_cpu(raw_inode->i_blocks_lo); 47118c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { 47128c2ecf20Sopenharmony_ci /* i_blocks represent file system block size */ 47138c2ecf20Sopenharmony_ci return i_blocks << (inode->i_blkbits - 9); 47148c2ecf20Sopenharmony_ci } else { 47158c2ecf20Sopenharmony_ci return i_blocks; 47168c2ecf20Sopenharmony_ci } 47178c2ecf20Sopenharmony_ci } else { 47188c2ecf20Sopenharmony_ci return le32_to_cpu(raw_inode->i_blocks_lo); 47198c2ecf20Sopenharmony_ci } 47208c2ecf20Sopenharmony_ci} 47218c2ecf20Sopenharmony_ci 47228c2ecf20Sopenharmony_cistatic inline int ext4_iget_extra_inode(struct inode *inode, 47238c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode, 47248c2ecf20Sopenharmony_ci struct ext4_inode_info *ei) 47258c2ecf20Sopenharmony_ci{ 47268c2ecf20Sopenharmony_ci __le32 *magic = (void *)raw_inode + 47278c2ecf20Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; 47288c2ecf20Sopenharmony_ci 47298c2ecf20Sopenharmony_ci if (EXT4_INODE_HAS_XATTR_SPACE(inode) && 47308c2ecf20Sopenharmony_ci *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { 47318c2ecf20Sopenharmony_ci int err; 47328c2ecf20Sopenharmony_ci 47338c2ecf20Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_XATTR); 47348c2ecf20Sopenharmony_ci err = ext4_find_inline_data_nolock(inode); 47358c2ecf20Sopenharmony_ci if (!err && ext4_has_inline_data(inode)) 47368c2ecf20Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 47378c2ecf20Sopenharmony_ci return err; 47388c2ecf20Sopenharmony_ci } else 47398c2ecf20Sopenharmony_ci EXT4_I(inode)->i_inline_off = 0; 47408c2ecf20Sopenharmony_ci return 0; 47418c2ecf20Sopenharmony_ci} 47428c2ecf20Sopenharmony_ci 47438c2ecf20Sopenharmony_ciint ext4_get_projid(struct inode *inode, kprojid_t *projid) 47448c2ecf20Sopenharmony_ci{ 47458c2ecf20Sopenharmony_ci if (!ext4_has_feature_project(inode->i_sb)) 47468c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 47478c2ecf20Sopenharmony_ci *projid = EXT4_I(inode)->i_projid; 47488c2ecf20Sopenharmony_ci return 0; 47498c2ecf20Sopenharmony_ci} 47508c2ecf20Sopenharmony_ci 47518c2ecf20Sopenharmony_ci/* 47528c2ecf20Sopenharmony_ci * ext4 has self-managed i_version for ea inodes, it stores the lower 32bit of 47538c2ecf20Sopenharmony_ci * refcount in i_version, so use raw values if inode has EXT4_EA_INODE_FL flag 47548c2ecf20Sopenharmony_ci * set. 47558c2ecf20Sopenharmony_ci */ 47568c2ecf20Sopenharmony_cistatic inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) 47578c2ecf20Sopenharmony_ci{ 47588c2ecf20Sopenharmony_ci if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 47598c2ecf20Sopenharmony_ci inode_set_iversion_raw(inode, val); 47608c2ecf20Sopenharmony_ci else 47618c2ecf20Sopenharmony_ci inode_set_iversion_queried(inode, val); 47628c2ecf20Sopenharmony_ci} 47638c2ecf20Sopenharmony_ci 47648c2ecf20Sopenharmony_cistatic const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) 47658c2ecf20Sopenharmony_ci 47668c2ecf20Sopenharmony_ci{ 47678c2ecf20Sopenharmony_ci if (flags & EXT4_IGET_EA_INODE) { 47688c2ecf20Sopenharmony_ci if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 47698c2ecf20Sopenharmony_ci return "missing EA_INODE flag"; 47708c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 47718c2ecf20Sopenharmony_ci EXT4_I(inode)->i_file_acl) 47728c2ecf20Sopenharmony_ci return "ea_inode with extended attributes"; 47738c2ecf20Sopenharmony_ci } else { 47748c2ecf20Sopenharmony_ci if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 47758c2ecf20Sopenharmony_ci return "unexpected EA_INODE flag"; 47768c2ecf20Sopenharmony_ci } 47778c2ecf20Sopenharmony_ci if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) 47788c2ecf20Sopenharmony_ci return "unexpected bad inode w/o EXT4_IGET_BAD"; 47798c2ecf20Sopenharmony_ci return NULL; 47808c2ecf20Sopenharmony_ci} 47818c2ecf20Sopenharmony_ci 47828c2ecf20Sopenharmony_cistruct inode *__ext4_iget(struct super_block *sb, unsigned long ino, 47838c2ecf20Sopenharmony_ci ext4_iget_flags flags, const char *function, 47848c2ecf20Sopenharmony_ci unsigned int line) 47858c2ecf20Sopenharmony_ci{ 47868c2ecf20Sopenharmony_ci struct ext4_iloc iloc; 47878c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode; 47888c2ecf20Sopenharmony_ci struct ext4_inode_info *ei; 47898c2ecf20Sopenharmony_ci struct inode *inode; 47908c2ecf20Sopenharmony_ci const char *err_str; 47918c2ecf20Sopenharmony_ci journal_t *journal = EXT4_SB(sb)->s_journal; 47928c2ecf20Sopenharmony_ci long ret; 47938c2ecf20Sopenharmony_ci loff_t size; 47948c2ecf20Sopenharmony_ci int block; 47958c2ecf20Sopenharmony_ci uid_t i_uid; 47968c2ecf20Sopenharmony_ci gid_t i_gid; 47978c2ecf20Sopenharmony_ci projid_t i_projid; 47988c2ecf20Sopenharmony_ci 47998c2ecf20Sopenharmony_ci if ((!(flags & EXT4_IGET_SPECIAL) && 48008c2ecf20Sopenharmony_ci (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)) || 48018c2ecf20Sopenharmony_ci (ino < EXT4_ROOT_INO) || 48028c2ecf20Sopenharmony_ci (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) { 48038c2ecf20Sopenharmony_ci if (flags & EXT4_IGET_HANDLE) 48048c2ecf20Sopenharmony_ci return ERR_PTR(-ESTALE); 48058c2ecf20Sopenharmony_ci __ext4_error(sb, function, line, false, EFSCORRUPTED, 0, 48068c2ecf20Sopenharmony_ci "inode #%lu: comm %s: iget: illegal inode #", 48078c2ecf20Sopenharmony_ci ino, current->comm); 48088c2ecf20Sopenharmony_ci return ERR_PTR(-EFSCORRUPTED); 48098c2ecf20Sopenharmony_ci } 48108c2ecf20Sopenharmony_ci 48118c2ecf20Sopenharmony_ci inode = iget_locked(sb, ino); 48128c2ecf20Sopenharmony_ci if (!inode) 48138c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 48148c2ecf20Sopenharmony_ci if (!(inode->i_state & I_NEW)) { 48158c2ecf20Sopenharmony_ci if ((err_str = check_igot_inode(inode, flags)) != NULL) { 48168c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, err_str); 48178c2ecf20Sopenharmony_ci iput(inode); 48188c2ecf20Sopenharmony_ci return ERR_PTR(-EFSCORRUPTED); 48198c2ecf20Sopenharmony_ci } 48208c2ecf20Sopenharmony_ci return inode; 48218c2ecf20Sopenharmony_ci } 48228c2ecf20Sopenharmony_ci 48238c2ecf20Sopenharmony_ci ei = EXT4_I(inode); 48248c2ecf20Sopenharmony_ci iloc.bh = NULL; 48258c2ecf20Sopenharmony_ci 48268c2ecf20Sopenharmony_ci ret = __ext4_get_inode_loc_noinmem(inode, &iloc); 48278c2ecf20Sopenharmony_ci if (ret < 0) 48288c2ecf20Sopenharmony_ci goto bad_inode; 48298c2ecf20Sopenharmony_ci raw_inode = ext4_raw_inode(&iloc); 48308c2ecf20Sopenharmony_ci 48318c2ecf20Sopenharmony_ci if ((flags & EXT4_IGET_HANDLE) && 48328c2ecf20Sopenharmony_ci (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) { 48338c2ecf20Sopenharmony_ci ret = -ESTALE; 48348c2ecf20Sopenharmony_ci goto bad_inode; 48358c2ecf20Sopenharmony_ci } 48368c2ecf20Sopenharmony_ci 48378c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 48388c2ecf20Sopenharmony_ci ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 48398c2ecf20Sopenharmony_ci if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 48408c2ecf20Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb) || 48418c2ecf20Sopenharmony_ci (ei->i_extra_isize & 3)) { 48428c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 48438c2ecf20Sopenharmony_ci "iget: bad extra_isize %u " 48448c2ecf20Sopenharmony_ci "(inode size %u)", 48458c2ecf20Sopenharmony_ci ei->i_extra_isize, 48468c2ecf20Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb)); 48478c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 48488c2ecf20Sopenharmony_ci goto bad_inode; 48498c2ecf20Sopenharmony_ci } 48508c2ecf20Sopenharmony_ci } else 48518c2ecf20Sopenharmony_ci ei->i_extra_isize = 0; 48528c2ecf20Sopenharmony_ci 48538c2ecf20Sopenharmony_ci /* Precompute checksum seed for inode metadata */ 48548c2ecf20Sopenharmony_ci if (ext4_has_metadata_csum(sb)) { 48558c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 48568c2ecf20Sopenharmony_ci __u32 csum; 48578c2ecf20Sopenharmony_ci __le32 inum = cpu_to_le32(inode->i_ino); 48588c2ecf20Sopenharmony_ci __le32 gen = raw_inode->i_generation; 48598c2ecf20Sopenharmony_ci csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, 48608c2ecf20Sopenharmony_ci sizeof(inum)); 48618c2ecf20Sopenharmony_ci ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, 48628c2ecf20Sopenharmony_ci sizeof(gen)); 48638c2ecf20Sopenharmony_ci } 48648c2ecf20Sopenharmony_ci 48658c2ecf20Sopenharmony_ci if ((!ext4_inode_csum_verify(inode, raw_inode, ei) || 48668c2ecf20Sopenharmony_ci ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) && 48678c2ecf20Sopenharmony_ci (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))) { 48688c2ecf20Sopenharmony_ci ext4_error_inode_err(inode, function, line, 0, 48698c2ecf20Sopenharmony_ci EFSBADCRC, "iget: checksum invalid"); 48708c2ecf20Sopenharmony_ci ret = -EFSBADCRC; 48718c2ecf20Sopenharmony_ci goto bad_inode; 48728c2ecf20Sopenharmony_ci } 48738c2ecf20Sopenharmony_ci 48748c2ecf20Sopenharmony_ci inode->i_mode = le16_to_cpu(raw_inode->i_mode); 48758c2ecf20Sopenharmony_ci i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 48768c2ecf20Sopenharmony_ci i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 48778c2ecf20Sopenharmony_ci if (ext4_has_feature_project(sb) && 48788c2ecf20Sopenharmony_ci EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE && 48798c2ecf20Sopenharmony_ci EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) 48808c2ecf20Sopenharmony_ci i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid); 48818c2ecf20Sopenharmony_ci else 48828c2ecf20Sopenharmony_ci i_projid = EXT4_DEF_PROJID; 48838c2ecf20Sopenharmony_ci 48848c2ecf20Sopenharmony_ci if (!(test_opt(inode->i_sb, NO_UID32))) { 48858c2ecf20Sopenharmony_ci i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 48868c2ecf20Sopenharmony_ci i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 48878c2ecf20Sopenharmony_ci } 48888c2ecf20Sopenharmony_ci i_uid_write(inode, i_uid); 48898c2ecf20Sopenharmony_ci i_gid_write(inode, i_gid); 48908c2ecf20Sopenharmony_ci ei->i_projid = make_kprojid(&init_user_ns, i_projid); 48918c2ecf20Sopenharmony_ci set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 48928c2ecf20Sopenharmony_ci 48938c2ecf20Sopenharmony_ci ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 48948c2ecf20Sopenharmony_ci ei->i_inline_off = 0; 48958c2ecf20Sopenharmony_ci ei->i_dir_start_lookup = 0; 48968c2ecf20Sopenharmony_ci ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 48978c2ecf20Sopenharmony_ci /* We now have enough fields to check if the inode was active or not. 48988c2ecf20Sopenharmony_ci * This is needed because nfsd might try to access dead inodes 48998c2ecf20Sopenharmony_ci * the test is that same one that e2fsck uses 49008c2ecf20Sopenharmony_ci * NeilBrown 1999oct15 49018c2ecf20Sopenharmony_ci */ 49028c2ecf20Sopenharmony_ci if (inode->i_nlink == 0) { 49038c2ecf20Sopenharmony_ci if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL || 49048c2ecf20Sopenharmony_ci !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && 49058c2ecf20Sopenharmony_ci ino != EXT4_BOOT_LOADER_INO) { 49068c2ecf20Sopenharmony_ci /* this inode is deleted or unallocated */ 49078c2ecf20Sopenharmony_ci if (flags & EXT4_IGET_SPECIAL) { 49088c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 49098c2ecf20Sopenharmony_ci "iget: special inode unallocated"); 49108c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 49118c2ecf20Sopenharmony_ci } else 49128c2ecf20Sopenharmony_ci ret = -ESTALE; 49138c2ecf20Sopenharmony_ci goto bad_inode; 49148c2ecf20Sopenharmony_ci } 49158c2ecf20Sopenharmony_ci /* The only unlinked inodes we let through here have 49168c2ecf20Sopenharmony_ci * valid i_mode and are being read by the orphan 49178c2ecf20Sopenharmony_ci * recovery code: that's fine, we're about to complete 49188c2ecf20Sopenharmony_ci * the process of deleting those. 49198c2ecf20Sopenharmony_ci * OR it is the EXT4_BOOT_LOADER_INO which is 49208c2ecf20Sopenharmony_ci * not initialized on a new filesystem. */ 49218c2ecf20Sopenharmony_ci } 49228c2ecf20Sopenharmony_ci ei->i_flags = le32_to_cpu(raw_inode->i_flags); 49238c2ecf20Sopenharmony_ci ext4_set_inode_flags(inode, true); 49248c2ecf20Sopenharmony_ci inode->i_blocks = ext4_inode_blocks(raw_inode, ei); 49258c2ecf20Sopenharmony_ci ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); 49268c2ecf20Sopenharmony_ci if (ext4_has_feature_64bit(sb)) 49278c2ecf20Sopenharmony_ci ei->i_file_acl |= 49288c2ecf20Sopenharmony_ci ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 49298c2ecf20Sopenharmony_ci inode->i_size = ext4_isize(sb, raw_inode); 49308c2ecf20Sopenharmony_ci if ((size = i_size_read(inode)) < 0) { 49318c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 49328c2ecf20Sopenharmony_ci "iget: bad i_size value: %lld", size); 49338c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 49348c2ecf20Sopenharmony_ci goto bad_inode; 49358c2ecf20Sopenharmony_ci } 49368c2ecf20Sopenharmony_ci /* 49378c2ecf20Sopenharmony_ci * If dir_index is not enabled but there's dir with INDEX flag set, 49388c2ecf20Sopenharmony_ci * we'd normally treat htree data as empty space. But with metadata 49398c2ecf20Sopenharmony_ci * checksumming that corrupts checksums so forbid that. 49408c2ecf20Sopenharmony_ci */ 49418c2ecf20Sopenharmony_ci if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && 49428c2ecf20Sopenharmony_ci ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { 49438c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 49448c2ecf20Sopenharmony_ci "iget: Dir with htree data on filesystem without dir_index feature."); 49458c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 49468c2ecf20Sopenharmony_ci goto bad_inode; 49478c2ecf20Sopenharmony_ci } 49488c2ecf20Sopenharmony_ci ei->i_disksize = inode->i_size; 49498c2ecf20Sopenharmony_ci#ifdef CONFIG_QUOTA 49508c2ecf20Sopenharmony_ci ei->i_reserved_quota = 0; 49518c2ecf20Sopenharmony_ci#endif 49528c2ecf20Sopenharmony_ci inode->i_generation = le32_to_cpu(raw_inode->i_generation); 49538c2ecf20Sopenharmony_ci ei->i_block_group = iloc.block_group; 49548c2ecf20Sopenharmony_ci ei->i_last_alloc_group = ~0; 49558c2ecf20Sopenharmony_ci /* 49568c2ecf20Sopenharmony_ci * NOTE! The in-memory inode i_data array is in little-endian order 49578c2ecf20Sopenharmony_ci * even on big-endian machines: we do NOT byteswap the block numbers! 49588c2ecf20Sopenharmony_ci */ 49598c2ecf20Sopenharmony_ci for (block = 0; block < EXT4_N_BLOCKS; block++) 49608c2ecf20Sopenharmony_ci ei->i_data[block] = raw_inode->i_block[block]; 49618c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&ei->i_orphan); 49628c2ecf20Sopenharmony_ci ext4_fc_init_inode(&ei->vfs_inode); 49638c2ecf20Sopenharmony_ci 49648c2ecf20Sopenharmony_ci /* 49658c2ecf20Sopenharmony_ci * Set transaction id's of transactions that have to be committed 49668c2ecf20Sopenharmony_ci * to finish f[data]sync. We set them to currently running transaction 49678c2ecf20Sopenharmony_ci * as we cannot be sure that the inode or some of its metadata isn't 49688c2ecf20Sopenharmony_ci * part of the transaction - the inode could have been reclaimed and 49698c2ecf20Sopenharmony_ci * now it is reread from disk. 49708c2ecf20Sopenharmony_ci */ 49718c2ecf20Sopenharmony_ci if (journal) { 49728c2ecf20Sopenharmony_ci transaction_t *transaction; 49738c2ecf20Sopenharmony_ci tid_t tid; 49748c2ecf20Sopenharmony_ci 49758c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 49768c2ecf20Sopenharmony_ci if (journal->j_running_transaction) 49778c2ecf20Sopenharmony_ci transaction = journal->j_running_transaction; 49788c2ecf20Sopenharmony_ci else 49798c2ecf20Sopenharmony_ci transaction = journal->j_committing_transaction; 49808c2ecf20Sopenharmony_ci if (transaction) 49818c2ecf20Sopenharmony_ci tid = transaction->t_tid; 49828c2ecf20Sopenharmony_ci else 49838c2ecf20Sopenharmony_ci tid = journal->j_commit_sequence; 49848c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 49858c2ecf20Sopenharmony_ci ei->i_sync_tid = tid; 49868c2ecf20Sopenharmony_ci ei->i_datasync_tid = tid; 49878c2ecf20Sopenharmony_ci } 49888c2ecf20Sopenharmony_ci 49898c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 49908c2ecf20Sopenharmony_ci if (ei->i_extra_isize == 0) { 49918c2ecf20Sopenharmony_ci /* The extra space is currently unused. Use it. */ 49928c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct ext4_inode) & 3); 49938c2ecf20Sopenharmony_ci ei->i_extra_isize = sizeof(struct ext4_inode) - 49948c2ecf20Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE; 49958c2ecf20Sopenharmony_ci } else { 49968c2ecf20Sopenharmony_ci ret = ext4_iget_extra_inode(inode, raw_inode, ei); 49978c2ecf20Sopenharmony_ci if (ret) 49988c2ecf20Sopenharmony_ci goto bad_inode; 49998c2ecf20Sopenharmony_ci } 50008c2ecf20Sopenharmony_ci } 50018c2ecf20Sopenharmony_ci 50028c2ecf20Sopenharmony_ci EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode); 50038c2ecf20Sopenharmony_ci EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); 50048c2ecf20Sopenharmony_ci EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); 50058c2ecf20Sopenharmony_ci EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); 50068c2ecf20Sopenharmony_ci 50078c2ecf20Sopenharmony_ci if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 50088c2ecf20Sopenharmony_ci u64 ivers = le32_to_cpu(raw_inode->i_disk_version); 50098c2ecf20Sopenharmony_ci 50108c2ecf20Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 50118c2ecf20Sopenharmony_ci if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 50128c2ecf20Sopenharmony_ci ivers |= 50138c2ecf20Sopenharmony_ci (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 50148c2ecf20Sopenharmony_ci } 50158c2ecf20Sopenharmony_ci ext4_inode_set_iversion_queried(inode, ivers); 50168c2ecf20Sopenharmony_ci } 50178c2ecf20Sopenharmony_ci 50188c2ecf20Sopenharmony_ci ret = 0; 50198c2ecf20Sopenharmony_ci if (ei->i_file_acl && 50208c2ecf20Sopenharmony_ci !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) { 50218c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 50228c2ecf20Sopenharmony_ci "iget: bad extended attribute block %llu", 50238c2ecf20Sopenharmony_ci ei->i_file_acl); 50248c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 50258c2ecf20Sopenharmony_ci goto bad_inode; 50268c2ecf20Sopenharmony_ci } else if (!ext4_has_inline_data(inode)) { 50278c2ecf20Sopenharmony_ci /* validate the block references in the inode */ 50288c2ecf20Sopenharmony_ci if (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && 50298c2ecf20Sopenharmony_ci (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 50308c2ecf20Sopenharmony_ci (S_ISLNK(inode->i_mode) && 50318c2ecf20Sopenharmony_ci !ext4_inode_is_fast_symlink(inode)))) { 50328c2ecf20Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 50338c2ecf20Sopenharmony_ci ret = ext4_ext_check_inode(inode); 50348c2ecf20Sopenharmony_ci else 50358c2ecf20Sopenharmony_ci ret = ext4_ind_check_inode(inode); 50368c2ecf20Sopenharmony_ci } 50378c2ecf20Sopenharmony_ci } 50388c2ecf20Sopenharmony_ci if (ret) 50398c2ecf20Sopenharmony_ci goto bad_inode; 50408c2ecf20Sopenharmony_ci 50418c2ecf20Sopenharmony_ci if (S_ISREG(inode->i_mode)) { 50428c2ecf20Sopenharmony_ci inode->i_op = &ext4_file_inode_operations; 50438c2ecf20Sopenharmony_ci inode->i_fop = &ext4_file_operations; 50448c2ecf20Sopenharmony_ci ext4_set_aops(inode); 50458c2ecf20Sopenharmony_ci } else if (S_ISDIR(inode->i_mode)) { 50468c2ecf20Sopenharmony_ci inode->i_op = &ext4_dir_inode_operations; 50478c2ecf20Sopenharmony_ci inode->i_fop = &ext4_dir_operations; 50488c2ecf20Sopenharmony_ci } else if (S_ISLNK(inode->i_mode)) { 50498c2ecf20Sopenharmony_ci /* VFS does not allow setting these so must be corruption */ 50508c2ecf20Sopenharmony_ci if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { 50518c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 50528c2ecf20Sopenharmony_ci "iget: immutable or append flags " 50538c2ecf20Sopenharmony_ci "not allowed on symlinks"); 50548c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 50558c2ecf20Sopenharmony_ci goto bad_inode; 50568c2ecf20Sopenharmony_ci } 50578c2ecf20Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 50588c2ecf20Sopenharmony_ci inode->i_op = &ext4_encrypted_symlink_inode_operations; 50598c2ecf20Sopenharmony_ci ext4_set_aops(inode); 50608c2ecf20Sopenharmony_ci } else if (ext4_inode_is_fast_symlink(inode)) { 50618c2ecf20Sopenharmony_ci inode->i_link = (char *)ei->i_data; 50628c2ecf20Sopenharmony_ci inode->i_op = &ext4_fast_symlink_inode_operations; 50638c2ecf20Sopenharmony_ci nd_terminate_link(ei->i_data, inode->i_size, 50648c2ecf20Sopenharmony_ci sizeof(ei->i_data) - 1); 50658c2ecf20Sopenharmony_ci } else { 50668c2ecf20Sopenharmony_ci inode->i_op = &ext4_symlink_inode_operations; 50678c2ecf20Sopenharmony_ci ext4_set_aops(inode); 50688c2ecf20Sopenharmony_ci } 50698c2ecf20Sopenharmony_ci inode_nohighmem(inode); 50708c2ecf20Sopenharmony_ci } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 50718c2ecf20Sopenharmony_ci S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 50728c2ecf20Sopenharmony_ci inode->i_op = &ext4_special_inode_operations; 50738c2ecf20Sopenharmony_ci if (raw_inode->i_block[0]) 50748c2ecf20Sopenharmony_ci init_special_inode(inode, inode->i_mode, 50758c2ecf20Sopenharmony_ci old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); 50768c2ecf20Sopenharmony_ci else 50778c2ecf20Sopenharmony_ci init_special_inode(inode, inode->i_mode, 50788c2ecf20Sopenharmony_ci new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 50798c2ecf20Sopenharmony_ci } else if (ino == EXT4_BOOT_LOADER_INO) { 50808c2ecf20Sopenharmony_ci make_bad_inode(inode); 50818c2ecf20Sopenharmony_ci } else { 50828c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 50838c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 50848c2ecf20Sopenharmony_ci "iget: bogus i_mode (%o)", inode->i_mode); 50858c2ecf20Sopenharmony_ci goto bad_inode; 50868c2ecf20Sopenharmony_ci } 50878c2ecf20Sopenharmony_ci if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) 50888c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 50898c2ecf20Sopenharmony_ci "casefold flag without casefold feature"); 50908c2ecf20Sopenharmony_ci if ((err_str = check_igot_inode(inode, flags)) != NULL) { 50918c2ecf20Sopenharmony_ci ext4_error_inode(inode, function, line, 0, err_str); 50928c2ecf20Sopenharmony_ci ret = -EFSCORRUPTED; 50938c2ecf20Sopenharmony_ci goto bad_inode; 50948c2ecf20Sopenharmony_ci } 50958c2ecf20Sopenharmony_ci 50968c2ecf20Sopenharmony_ci brelse(iloc.bh); 50978c2ecf20Sopenharmony_ci unlock_new_inode(inode); 50988c2ecf20Sopenharmony_ci return inode; 50998c2ecf20Sopenharmony_ci 51008c2ecf20Sopenharmony_cibad_inode: 51018c2ecf20Sopenharmony_ci brelse(iloc.bh); 51028c2ecf20Sopenharmony_ci iget_failed(inode); 51038c2ecf20Sopenharmony_ci return ERR_PTR(ret); 51048c2ecf20Sopenharmony_ci} 51058c2ecf20Sopenharmony_ci 51068c2ecf20Sopenharmony_cistatic void __ext4_update_other_inode_time(struct super_block *sb, 51078c2ecf20Sopenharmony_ci unsigned long orig_ino, 51088c2ecf20Sopenharmony_ci unsigned long ino, 51098c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode) 51108c2ecf20Sopenharmony_ci{ 51118c2ecf20Sopenharmony_ci struct inode *inode; 51128c2ecf20Sopenharmony_ci 51138c2ecf20Sopenharmony_ci inode = find_inode_by_ino_rcu(sb, ino); 51148c2ecf20Sopenharmony_ci if (!inode) 51158c2ecf20Sopenharmony_ci return; 51168c2ecf20Sopenharmony_ci 51178c2ecf20Sopenharmony_ci if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | 51188c2ecf20Sopenharmony_ci I_DIRTY_INODE)) || 51198c2ecf20Sopenharmony_ci ((inode->i_state & I_DIRTY_TIME) == 0)) 51208c2ecf20Sopenharmony_ci return; 51218c2ecf20Sopenharmony_ci 51228c2ecf20Sopenharmony_ci spin_lock(&inode->i_lock); 51238c2ecf20Sopenharmony_ci if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | 51248c2ecf20Sopenharmony_ci I_DIRTY_INODE)) == 0) && 51258c2ecf20Sopenharmony_ci (inode->i_state & I_DIRTY_TIME)) { 51268c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 51278c2ecf20Sopenharmony_ci 51288c2ecf20Sopenharmony_ci inode->i_state &= ~I_DIRTY_TIME; 51298c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 51308c2ecf20Sopenharmony_ci 51318c2ecf20Sopenharmony_ci spin_lock(&ei->i_raw_lock); 51328c2ecf20Sopenharmony_ci EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode); 51338c2ecf20Sopenharmony_ci EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 51348c2ecf20Sopenharmony_ci EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 51358c2ecf20Sopenharmony_ci ext4_inode_csum_set(inode, raw_inode, ei); 51368c2ecf20Sopenharmony_ci spin_unlock(&ei->i_raw_lock); 51378c2ecf20Sopenharmony_ci trace_ext4_other_inode_update_time(inode, orig_ino); 51388c2ecf20Sopenharmony_ci return; 51398c2ecf20Sopenharmony_ci } 51408c2ecf20Sopenharmony_ci spin_unlock(&inode->i_lock); 51418c2ecf20Sopenharmony_ci} 51428c2ecf20Sopenharmony_ci 51438c2ecf20Sopenharmony_ci/* 51448c2ecf20Sopenharmony_ci * Opportunistically update the other time fields for other inodes in 51458c2ecf20Sopenharmony_ci * the same inode table block. 51468c2ecf20Sopenharmony_ci */ 51478c2ecf20Sopenharmony_cistatic void ext4_update_other_inodes_time(struct super_block *sb, 51488c2ecf20Sopenharmony_ci unsigned long orig_ino, char *buf) 51498c2ecf20Sopenharmony_ci{ 51508c2ecf20Sopenharmony_ci unsigned long ino; 51518c2ecf20Sopenharmony_ci int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 51528c2ecf20Sopenharmony_ci int inode_size = EXT4_INODE_SIZE(sb); 51538c2ecf20Sopenharmony_ci 51548c2ecf20Sopenharmony_ci /* 51558c2ecf20Sopenharmony_ci * Calculate the first inode in the inode table block. Inode 51568c2ecf20Sopenharmony_ci * numbers are one-based. That is, the first inode in a block 51578c2ecf20Sopenharmony_ci * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). 51588c2ecf20Sopenharmony_ci */ 51598c2ecf20Sopenharmony_ci ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; 51608c2ecf20Sopenharmony_ci rcu_read_lock(); 51618c2ecf20Sopenharmony_ci for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { 51628c2ecf20Sopenharmony_ci if (ino == orig_ino) 51638c2ecf20Sopenharmony_ci continue; 51648c2ecf20Sopenharmony_ci __ext4_update_other_inode_time(sb, orig_ino, ino, 51658c2ecf20Sopenharmony_ci (struct ext4_inode *)buf); 51668c2ecf20Sopenharmony_ci } 51678c2ecf20Sopenharmony_ci rcu_read_unlock(); 51688c2ecf20Sopenharmony_ci} 51698c2ecf20Sopenharmony_ci 51708c2ecf20Sopenharmony_ci/* 51718c2ecf20Sopenharmony_ci * Post the struct inode info into an on-disk inode location in the 51728c2ecf20Sopenharmony_ci * buffer-cache. This gobbles the caller's reference to the 51738c2ecf20Sopenharmony_ci * buffer_head in the inode location struct. 51748c2ecf20Sopenharmony_ci * 51758c2ecf20Sopenharmony_ci * The caller must have write access to iloc->bh. 51768c2ecf20Sopenharmony_ci */ 51778c2ecf20Sopenharmony_cistatic int ext4_do_update_inode(handle_t *handle, 51788c2ecf20Sopenharmony_ci struct inode *inode, 51798c2ecf20Sopenharmony_ci struct ext4_iloc *iloc) 51808c2ecf20Sopenharmony_ci{ 51818c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode = ext4_raw_inode(iloc); 51828c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 51838c2ecf20Sopenharmony_ci struct buffer_head *bh = iloc->bh; 51848c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 51858c2ecf20Sopenharmony_ci int err; 51868c2ecf20Sopenharmony_ci int need_datasync = 0, set_large_file = 0; 51878c2ecf20Sopenharmony_ci 51888c2ecf20Sopenharmony_ci spin_lock(&ei->i_raw_lock); 51898c2ecf20Sopenharmony_ci 51908c2ecf20Sopenharmony_ci /* 51918c2ecf20Sopenharmony_ci * For fields not tracked in the in-memory inode, initialise them 51928c2ecf20Sopenharmony_ci * to zero for new inodes. 51938c2ecf20Sopenharmony_ci */ 51948c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) 51958c2ecf20Sopenharmony_ci memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 51968c2ecf20Sopenharmony_ci 51978c2ecf20Sopenharmony_ci if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) 51988c2ecf20Sopenharmony_ci need_datasync = 1; 51998c2ecf20Sopenharmony_ci if (ei->i_disksize > 0x7fffffffULL) { 52008c2ecf20Sopenharmony_ci if (!ext4_has_feature_large_file(sb) || 52018c2ecf20Sopenharmony_ci EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) 52028c2ecf20Sopenharmony_ci set_large_file = 1; 52038c2ecf20Sopenharmony_ci } 52048c2ecf20Sopenharmony_ci 52058c2ecf20Sopenharmony_ci err = ext4_fill_raw_inode(inode, raw_inode); 52068c2ecf20Sopenharmony_ci spin_unlock(&ei->i_raw_lock); 52078c2ecf20Sopenharmony_ci if (err) { 52088c2ecf20Sopenharmony_ci EXT4_ERROR_INODE(inode, "corrupted inode contents"); 52098c2ecf20Sopenharmony_ci goto out_brelse; 52108c2ecf20Sopenharmony_ci } 52118c2ecf20Sopenharmony_ci 52128c2ecf20Sopenharmony_ci if (inode->i_sb->s_flags & SB_LAZYTIME) 52138c2ecf20Sopenharmony_ci ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, 52148c2ecf20Sopenharmony_ci bh->b_data); 52158c2ecf20Sopenharmony_ci 52168c2ecf20Sopenharmony_ci BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 52178c2ecf20Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, NULL, bh); 52188c2ecf20Sopenharmony_ci if (err) 52198c2ecf20Sopenharmony_ci goto out_error; 52208c2ecf20Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_NEW); 52218c2ecf20Sopenharmony_ci if (set_large_file) { 52228c2ecf20Sopenharmony_ci BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); 52238c2ecf20Sopenharmony_ci err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 52248c2ecf20Sopenharmony_ci if (err) 52258c2ecf20Sopenharmony_ci goto out_error; 52268c2ecf20Sopenharmony_ci lock_buffer(EXT4_SB(sb)->s_sbh); 52278c2ecf20Sopenharmony_ci ext4_set_feature_large_file(sb); 52288c2ecf20Sopenharmony_ci ext4_superblock_csum_set(sb); 52298c2ecf20Sopenharmony_ci unlock_buffer(EXT4_SB(sb)->s_sbh); 52308c2ecf20Sopenharmony_ci ext4_handle_sync(handle); 52318c2ecf20Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, NULL, 52328c2ecf20Sopenharmony_ci EXT4_SB(sb)->s_sbh); 52338c2ecf20Sopenharmony_ci } 52348c2ecf20Sopenharmony_ci ext4_update_inode_fsync_trans(handle, inode, need_datasync); 52358c2ecf20Sopenharmony_ciout_error: 52368c2ecf20Sopenharmony_ci ext4_std_error(inode->i_sb, err); 52378c2ecf20Sopenharmony_ciout_brelse: 52388c2ecf20Sopenharmony_ci brelse(bh); 52398c2ecf20Sopenharmony_ci return err; 52408c2ecf20Sopenharmony_ci} 52418c2ecf20Sopenharmony_ci 52428c2ecf20Sopenharmony_ci/* 52438c2ecf20Sopenharmony_ci * ext4_write_inode() 52448c2ecf20Sopenharmony_ci * 52458c2ecf20Sopenharmony_ci * We are called from a few places: 52468c2ecf20Sopenharmony_ci * 52478c2ecf20Sopenharmony_ci * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. 52488c2ecf20Sopenharmony_ci * Here, there will be no transaction running. We wait for any running 52498c2ecf20Sopenharmony_ci * transaction to commit. 52508c2ecf20Sopenharmony_ci * 52518c2ecf20Sopenharmony_ci * - Within flush work (sys_sync(), kupdate and such). 52528c2ecf20Sopenharmony_ci * We wait on commit, if told to. 52538c2ecf20Sopenharmony_ci * 52548c2ecf20Sopenharmony_ci * - Within iput_final() -> write_inode_now() 52558c2ecf20Sopenharmony_ci * We wait on commit, if told to. 52568c2ecf20Sopenharmony_ci * 52578c2ecf20Sopenharmony_ci * In all cases it is actually safe for us to return without doing anything, 52588c2ecf20Sopenharmony_ci * because the inode has been copied into a raw inode buffer in 52598c2ecf20Sopenharmony_ci * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL 52608c2ecf20Sopenharmony_ci * writeback. 52618c2ecf20Sopenharmony_ci * 52628c2ecf20Sopenharmony_ci * Note that we are absolutely dependent upon all inode dirtiers doing the 52638c2ecf20Sopenharmony_ci * right thing: they *must* call mark_inode_dirty() after dirtying info in 52648c2ecf20Sopenharmony_ci * which we are interested. 52658c2ecf20Sopenharmony_ci * 52668c2ecf20Sopenharmony_ci * It would be a bug for them to not do this. The code: 52678c2ecf20Sopenharmony_ci * 52688c2ecf20Sopenharmony_ci * mark_inode_dirty(inode) 52698c2ecf20Sopenharmony_ci * stuff(); 52708c2ecf20Sopenharmony_ci * inode->i_size = expr; 52718c2ecf20Sopenharmony_ci * 52728c2ecf20Sopenharmony_ci * is in error because write_inode() could occur while `stuff()' is running, 52738c2ecf20Sopenharmony_ci * and the new i_size will be lost. Plus the inode will no longer be on the 52748c2ecf20Sopenharmony_ci * superblock's dirty inode list. 52758c2ecf20Sopenharmony_ci */ 52768c2ecf20Sopenharmony_ciint ext4_write_inode(struct inode *inode, struct writeback_control *wbc) 52778c2ecf20Sopenharmony_ci{ 52788c2ecf20Sopenharmony_ci int err; 52798c2ecf20Sopenharmony_ci 52808c2ecf20Sopenharmony_ci if (WARN_ON_ONCE(current->flags & PF_MEMALLOC) || 52818c2ecf20Sopenharmony_ci sb_rdonly(inode->i_sb)) 52828c2ecf20Sopenharmony_ci return 0; 52838c2ecf20Sopenharmony_ci 52848c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 52858c2ecf20Sopenharmony_ci return -EIO; 52868c2ecf20Sopenharmony_ci 52878c2ecf20Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_journal) { 52888c2ecf20Sopenharmony_ci if (ext4_journal_current_handle()) { 52898c2ecf20Sopenharmony_ci jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); 52908c2ecf20Sopenharmony_ci dump_stack(); 52918c2ecf20Sopenharmony_ci return -EIO; 52928c2ecf20Sopenharmony_ci } 52938c2ecf20Sopenharmony_ci 52948c2ecf20Sopenharmony_ci /* 52958c2ecf20Sopenharmony_ci * No need to force transaction in WB_SYNC_NONE mode. Also 52968c2ecf20Sopenharmony_ci * ext4_sync_fs() will force the commit after everything is 52978c2ecf20Sopenharmony_ci * written. 52988c2ecf20Sopenharmony_ci */ 52998c2ecf20Sopenharmony_ci if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) 53008c2ecf20Sopenharmony_ci return 0; 53018c2ecf20Sopenharmony_ci 53028c2ecf20Sopenharmony_ci err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, 53038c2ecf20Sopenharmony_ci EXT4_I(inode)->i_sync_tid); 53048c2ecf20Sopenharmony_ci } else { 53058c2ecf20Sopenharmony_ci struct ext4_iloc iloc; 53068c2ecf20Sopenharmony_ci 53078c2ecf20Sopenharmony_ci err = __ext4_get_inode_loc_noinmem(inode, &iloc); 53088c2ecf20Sopenharmony_ci if (err) 53098c2ecf20Sopenharmony_ci return err; 53108c2ecf20Sopenharmony_ci /* 53118c2ecf20Sopenharmony_ci * sync(2) will flush the whole buffer cache. No need to do 53128c2ecf20Sopenharmony_ci * it here separately for each inode. 53138c2ecf20Sopenharmony_ci */ 53148c2ecf20Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) 53158c2ecf20Sopenharmony_ci sync_dirty_buffer(iloc.bh); 53168c2ecf20Sopenharmony_ci if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 53178c2ecf20Sopenharmony_ci ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO, 53188c2ecf20Sopenharmony_ci "IO error syncing inode"); 53198c2ecf20Sopenharmony_ci err = -EIO; 53208c2ecf20Sopenharmony_ci } 53218c2ecf20Sopenharmony_ci brelse(iloc.bh); 53228c2ecf20Sopenharmony_ci } 53238c2ecf20Sopenharmony_ci return err; 53248c2ecf20Sopenharmony_ci} 53258c2ecf20Sopenharmony_ci 53268c2ecf20Sopenharmony_ci/* 53278c2ecf20Sopenharmony_ci * In data=journal mode ext4_journalled_invalidatepage() may fail to invalidate 53288c2ecf20Sopenharmony_ci * buffers that are attached to a page stradding i_size and are undergoing 53298c2ecf20Sopenharmony_ci * commit. In that case we have to wait for commit to finish and try again. 53308c2ecf20Sopenharmony_ci */ 53318c2ecf20Sopenharmony_cistatic void ext4_wait_for_tail_page_commit(struct inode *inode) 53328c2ecf20Sopenharmony_ci{ 53338c2ecf20Sopenharmony_ci struct page *page; 53348c2ecf20Sopenharmony_ci unsigned offset; 53358c2ecf20Sopenharmony_ci journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 53368c2ecf20Sopenharmony_ci tid_t commit_tid = 0; 53378c2ecf20Sopenharmony_ci int ret; 53388c2ecf20Sopenharmony_ci 53398c2ecf20Sopenharmony_ci offset = inode->i_size & (PAGE_SIZE - 1); 53408c2ecf20Sopenharmony_ci /* 53418c2ecf20Sopenharmony_ci * If the page is fully truncated, we don't need to wait for any commit 53428c2ecf20Sopenharmony_ci * (and we even should not as __ext4_journalled_invalidatepage() may 53438c2ecf20Sopenharmony_ci * strip all buffers from the page but keep the page dirty which can then 53448c2ecf20Sopenharmony_ci * confuse e.g. concurrent ext4_writepage() seeing dirty page without 53458c2ecf20Sopenharmony_ci * buffers). Also we don't need to wait for any commit if all buffers in 53468c2ecf20Sopenharmony_ci * the page remain valid. This is most beneficial for the common case of 53478c2ecf20Sopenharmony_ci * blocksize == PAGESIZE. 53488c2ecf20Sopenharmony_ci */ 53498c2ecf20Sopenharmony_ci if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) 53508c2ecf20Sopenharmony_ci return; 53518c2ecf20Sopenharmony_ci while (1) { 53528c2ecf20Sopenharmony_ci page = find_lock_page(inode->i_mapping, 53538c2ecf20Sopenharmony_ci inode->i_size >> PAGE_SHIFT); 53548c2ecf20Sopenharmony_ci if (!page) 53558c2ecf20Sopenharmony_ci return; 53568c2ecf20Sopenharmony_ci ret = __ext4_journalled_invalidatepage(page, offset, 53578c2ecf20Sopenharmony_ci PAGE_SIZE - offset); 53588c2ecf20Sopenharmony_ci unlock_page(page); 53598c2ecf20Sopenharmony_ci put_page(page); 53608c2ecf20Sopenharmony_ci if (ret != -EBUSY) 53618c2ecf20Sopenharmony_ci return; 53628c2ecf20Sopenharmony_ci commit_tid = 0; 53638c2ecf20Sopenharmony_ci read_lock(&journal->j_state_lock); 53648c2ecf20Sopenharmony_ci if (journal->j_committing_transaction) 53658c2ecf20Sopenharmony_ci commit_tid = journal->j_committing_transaction->t_tid; 53668c2ecf20Sopenharmony_ci read_unlock(&journal->j_state_lock); 53678c2ecf20Sopenharmony_ci if (commit_tid) 53688c2ecf20Sopenharmony_ci jbd2_log_wait_commit(journal, commit_tid); 53698c2ecf20Sopenharmony_ci } 53708c2ecf20Sopenharmony_ci} 53718c2ecf20Sopenharmony_ci 53728c2ecf20Sopenharmony_ci/* 53738c2ecf20Sopenharmony_ci * ext4_setattr() 53748c2ecf20Sopenharmony_ci * 53758c2ecf20Sopenharmony_ci * Called from notify_change. 53768c2ecf20Sopenharmony_ci * 53778c2ecf20Sopenharmony_ci * We want to trap VFS attempts to truncate the file as soon as 53788c2ecf20Sopenharmony_ci * possible. In particular, we want to make sure that when the VFS 53798c2ecf20Sopenharmony_ci * shrinks i_size, we put the inode on the orphan list and modify 53808c2ecf20Sopenharmony_ci * i_disksize immediately, so that during the subsequent flushing of 53818c2ecf20Sopenharmony_ci * dirty pages and freeing of disk blocks, we can guarantee that any 53828c2ecf20Sopenharmony_ci * commit will leave the blocks being flushed in an unused state on 53838c2ecf20Sopenharmony_ci * disk. (On recovery, the inode will get truncated and the blocks will 53848c2ecf20Sopenharmony_ci * be freed, so we have a strong guarantee that no future commit will 53858c2ecf20Sopenharmony_ci * leave these blocks visible to the user.) 53868c2ecf20Sopenharmony_ci * 53878c2ecf20Sopenharmony_ci * Another thing we have to assure is that if we are in ordered mode 53888c2ecf20Sopenharmony_ci * and inode is still attached to the committing transaction, we must 53898c2ecf20Sopenharmony_ci * we start writeout of all the dirty pages which are being truncated. 53908c2ecf20Sopenharmony_ci * This way we are sure that all the data written in the previous 53918c2ecf20Sopenharmony_ci * transaction are already on disk (truncate waits for pages under 53928c2ecf20Sopenharmony_ci * writeback). 53938c2ecf20Sopenharmony_ci * 53948c2ecf20Sopenharmony_ci * Called with inode->i_mutex down. 53958c2ecf20Sopenharmony_ci */ 53968c2ecf20Sopenharmony_ciint ext4_setattr(struct dentry *dentry, struct iattr *attr) 53978c2ecf20Sopenharmony_ci{ 53988c2ecf20Sopenharmony_ci struct inode *inode = d_inode(dentry); 53998c2ecf20Sopenharmony_ci int error, rc = 0; 54008c2ecf20Sopenharmony_ci int orphan = 0; 54018c2ecf20Sopenharmony_ci const unsigned int ia_valid = attr->ia_valid; 54028c2ecf20Sopenharmony_ci 54038c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 54048c2ecf20Sopenharmony_ci return -EIO; 54058c2ecf20Sopenharmony_ci 54068c2ecf20Sopenharmony_ci if (unlikely(IS_IMMUTABLE(inode))) 54078c2ecf20Sopenharmony_ci return -EPERM; 54088c2ecf20Sopenharmony_ci 54098c2ecf20Sopenharmony_ci if (unlikely(IS_APPEND(inode) && 54108c2ecf20Sopenharmony_ci (ia_valid & (ATTR_MODE | ATTR_UID | 54118c2ecf20Sopenharmony_ci ATTR_GID | ATTR_TIMES_SET)))) 54128c2ecf20Sopenharmony_ci return -EPERM; 54138c2ecf20Sopenharmony_ci 54148c2ecf20Sopenharmony_ci error = setattr_prepare(dentry, attr); 54158c2ecf20Sopenharmony_ci if (error) 54168c2ecf20Sopenharmony_ci return error; 54178c2ecf20Sopenharmony_ci 54188c2ecf20Sopenharmony_ci error = fscrypt_prepare_setattr(dentry, attr); 54198c2ecf20Sopenharmony_ci if (error) 54208c2ecf20Sopenharmony_ci return error; 54218c2ecf20Sopenharmony_ci 54228c2ecf20Sopenharmony_ci error = fsverity_prepare_setattr(dentry, attr); 54238c2ecf20Sopenharmony_ci if (error) 54248c2ecf20Sopenharmony_ci return error; 54258c2ecf20Sopenharmony_ci 54268c2ecf20Sopenharmony_ci if (is_quota_modification(inode, attr)) { 54278c2ecf20Sopenharmony_ci error = dquot_initialize(inode); 54288c2ecf20Sopenharmony_ci if (error) 54298c2ecf20Sopenharmony_ci return error; 54308c2ecf20Sopenharmony_ci } 54318c2ecf20Sopenharmony_ci 54328c2ecf20Sopenharmony_ci if ((ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || 54338c2ecf20Sopenharmony_ci (ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { 54348c2ecf20Sopenharmony_ci handle_t *handle; 54358c2ecf20Sopenharmony_ci 54368c2ecf20Sopenharmony_ci /* (user+group)*(old+new) structure, inode write (sb, 54378c2ecf20Sopenharmony_ci * inode block, ? - but truncate inode update has it) */ 54388c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 54398c2ecf20Sopenharmony_ci (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + 54408c2ecf20Sopenharmony_ci EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3); 54418c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 54428c2ecf20Sopenharmony_ci error = PTR_ERR(handle); 54438c2ecf20Sopenharmony_ci goto err_out; 54448c2ecf20Sopenharmony_ci } 54458c2ecf20Sopenharmony_ci 54468c2ecf20Sopenharmony_ci /* dquot_transfer() calls back ext4_get_inode_usage() which 54478c2ecf20Sopenharmony_ci * counts xattr inode references. 54488c2ecf20Sopenharmony_ci */ 54498c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->xattr_sem); 54508c2ecf20Sopenharmony_ci error = dquot_transfer(inode, attr); 54518c2ecf20Sopenharmony_ci up_read(&EXT4_I(inode)->xattr_sem); 54528c2ecf20Sopenharmony_ci 54538c2ecf20Sopenharmony_ci if (error) { 54548c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 54558c2ecf20Sopenharmony_ci return error; 54568c2ecf20Sopenharmony_ci } 54578c2ecf20Sopenharmony_ci /* Update corresponding info in inode so that everything is in 54588c2ecf20Sopenharmony_ci * one transaction */ 54598c2ecf20Sopenharmony_ci if (attr->ia_valid & ATTR_UID) 54608c2ecf20Sopenharmony_ci inode->i_uid = attr->ia_uid; 54618c2ecf20Sopenharmony_ci if (attr->ia_valid & ATTR_GID) 54628c2ecf20Sopenharmony_ci inode->i_gid = attr->ia_gid; 54638c2ecf20Sopenharmony_ci error = ext4_mark_inode_dirty(handle, inode); 54648c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 54658c2ecf20Sopenharmony_ci if (unlikely(error)) { 54668c2ecf20Sopenharmony_ci return error; 54678c2ecf20Sopenharmony_ci } 54688c2ecf20Sopenharmony_ci } 54698c2ecf20Sopenharmony_ci 54708c2ecf20Sopenharmony_ci if (attr->ia_valid & ATTR_SIZE) { 54718c2ecf20Sopenharmony_ci handle_t *handle; 54728c2ecf20Sopenharmony_ci loff_t oldsize = inode->i_size; 54738c2ecf20Sopenharmony_ci loff_t old_disksize; 54748c2ecf20Sopenharmony_ci int shrink = (attr->ia_size < inode->i_size); 54758c2ecf20Sopenharmony_ci 54768c2ecf20Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 54778c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 54788c2ecf20Sopenharmony_ci 54798c2ecf20Sopenharmony_ci if (attr->ia_size > sbi->s_bitmap_maxbytes) { 54808c2ecf20Sopenharmony_ci return -EFBIG; 54818c2ecf20Sopenharmony_ci } 54828c2ecf20Sopenharmony_ci } 54838c2ecf20Sopenharmony_ci if (!S_ISREG(inode->i_mode)) { 54848c2ecf20Sopenharmony_ci return -EINVAL; 54858c2ecf20Sopenharmony_ci } 54868c2ecf20Sopenharmony_ci 54878c2ecf20Sopenharmony_ci if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size) 54888c2ecf20Sopenharmony_ci inode_inc_iversion(inode); 54898c2ecf20Sopenharmony_ci 54908c2ecf20Sopenharmony_ci if (shrink) { 54918c2ecf20Sopenharmony_ci if (ext4_should_order_data(inode)) { 54928c2ecf20Sopenharmony_ci error = ext4_begin_ordered_truncate(inode, 54938c2ecf20Sopenharmony_ci attr->ia_size); 54948c2ecf20Sopenharmony_ci if (error) 54958c2ecf20Sopenharmony_ci goto err_out; 54968c2ecf20Sopenharmony_ci } 54978c2ecf20Sopenharmony_ci /* 54988c2ecf20Sopenharmony_ci * Blocks are going to be removed from the inode. Wait 54998c2ecf20Sopenharmony_ci * for dio in flight. 55008c2ecf20Sopenharmony_ci */ 55018c2ecf20Sopenharmony_ci inode_dio_wait(inode); 55028c2ecf20Sopenharmony_ci } 55038c2ecf20Sopenharmony_ci 55048c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_mmap_sem); 55058c2ecf20Sopenharmony_ci 55068c2ecf20Sopenharmony_ci rc = ext4_break_layouts(inode); 55078c2ecf20Sopenharmony_ci if (rc) { 55088c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_mmap_sem); 55098c2ecf20Sopenharmony_ci goto err_out; 55108c2ecf20Sopenharmony_ci } 55118c2ecf20Sopenharmony_ci 55128c2ecf20Sopenharmony_ci if (attr->ia_size != inode->i_size) { 55138c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); 55148c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 55158c2ecf20Sopenharmony_ci error = PTR_ERR(handle); 55168c2ecf20Sopenharmony_ci goto out_mmap_sem; 55178c2ecf20Sopenharmony_ci } 55188c2ecf20Sopenharmony_ci if (ext4_handle_valid(handle) && shrink) { 55198c2ecf20Sopenharmony_ci error = ext4_orphan_add(handle, inode); 55208c2ecf20Sopenharmony_ci orphan = 1; 55218c2ecf20Sopenharmony_ci } 55228c2ecf20Sopenharmony_ci /* 55238c2ecf20Sopenharmony_ci * Update c/mtime on truncate up, ext4_truncate() will 55248c2ecf20Sopenharmony_ci * update c/mtime in shrink case below 55258c2ecf20Sopenharmony_ci */ 55268c2ecf20Sopenharmony_ci if (!shrink) { 55278c2ecf20Sopenharmony_ci inode->i_mtime = current_time(inode); 55288c2ecf20Sopenharmony_ci inode->i_ctime = inode->i_mtime; 55298c2ecf20Sopenharmony_ci } 55308c2ecf20Sopenharmony_ci 55318c2ecf20Sopenharmony_ci if (shrink) 55328c2ecf20Sopenharmony_ci ext4_fc_track_range(handle, inode, 55338c2ecf20Sopenharmony_ci (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> 55348c2ecf20Sopenharmony_ci inode->i_sb->s_blocksize_bits, 55358c2ecf20Sopenharmony_ci EXT_MAX_BLOCKS - 1); 55368c2ecf20Sopenharmony_ci else 55378c2ecf20Sopenharmony_ci ext4_fc_track_range( 55388c2ecf20Sopenharmony_ci handle, inode, 55398c2ecf20Sopenharmony_ci (oldsize > 0 ? oldsize - 1 : oldsize) >> 55408c2ecf20Sopenharmony_ci inode->i_sb->s_blocksize_bits, 55418c2ecf20Sopenharmony_ci (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> 55428c2ecf20Sopenharmony_ci inode->i_sb->s_blocksize_bits); 55438c2ecf20Sopenharmony_ci 55448c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 55458c2ecf20Sopenharmony_ci old_disksize = EXT4_I(inode)->i_disksize; 55468c2ecf20Sopenharmony_ci EXT4_I(inode)->i_disksize = attr->ia_size; 55478c2ecf20Sopenharmony_ci rc = ext4_mark_inode_dirty(handle, inode); 55488c2ecf20Sopenharmony_ci if (!error) 55498c2ecf20Sopenharmony_ci error = rc; 55508c2ecf20Sopenharmony_ci /* 55518c2ecf20Sopenharmony_ci * We have to update i_size under i_data_sem together 55528c2ecf20Sopenharmony_ci * with i_disksize to avoid races with writeback code 55538c2ecf20Sopenharmony_ci * running ext4_wb_update_i_disksize(). 55548c2ecf20Sopenharmony_ci */ 55558c2ecf20Sopenharmony_ci if (!error) 55568c2ecf20Sopenharmony_ci i_size_write(inode, attr->ia_size); 55578c2ecf20Sopenharmony_ci else 55588c2ecf20Sopenharmony_ci EXT4_I(inode)->i_disksize = old_disksize; 55598c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 55608c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 55618c2ecf20Sopenharmony_ci if (error) 55628c2ecf20Sopenharmony_ci goto out_mmap_sem; 55638c2ecf20Sopenharmony_ci if (!shrink) { 55648c2ecf20Sopenharmony_ci pagecache_isize_extended(inode, oldsize, 55658c2ecf20Sopenharmony_ci inode->i_size); 55668c2ecf20Sopenharmony_ci } else if (ext4_should_journal_data(inode)) { 55678c2ecf20Sopenharmony_ci ext4_wait_for_tail_page_commit(inode); 55688c2ecf20Sopenharmony_ci } 55698c2ecf20Sopenharmony_ci } 55708c2ecf20Sopenharmony_ci 55718c2ecf20Sopenharmony_ci /* 55728c2ecf20Sopenharmony_ci * Truncate pagecache after we've waited for commit 55738c2ecf20Sopenharmony_ci * in data=journal mode to make pages freeable. 55748c2ecf20Sopenharmony_ci */ 55758c2ecf20Sopenharmony_ci truncate_pagecache(inode, inode->i_size); 55768c2ecf20Sopenharmony_ci /* 55778c2ecf20Sopenharmony_ci * Call ext4_truncate() even if i_size didn't change to 55788c2ecf20Sopenharmony_ci * truncate possible preallocated blocks. 55798c2ecf20Sopenharmony_ci */ 55808c2ecf20Sopenharmony_ci if (attr->ia_size <= oldsize) { 55818c2ecf20Sopenharmony_ci rc = ext4_truncate(inode); 55828c2ecf20Sopenharmony_ci if (rc) 55838c2ecf20Sopenharmony_ci error = rc; 55848c2ecf20Sopenharmony_ci } 55858c2ecf20Sopenharmony_ciout_mmap_sem: 55868c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_mmap_sem); 55878c2ecf20Sopenharmony_ci } 55888c2ecf20Sopenharmony_ci 55898c2ecf20Sopenharmony_ci if (!error) { 55908c2ecf20Sopenharmony_ci setattr_copy(inode, attr); 55918c2ecf20Sopenharmony_ci mark_inode_dirty(inode); 55928c2ecf20Sopenharmony_ci } 55938c2ecf20Sopenharmony_ci 55948c2ecf20Sopenharmony_ci /* 55958c2ecf20Sopenharmony_ci * If the call to ext4_truncate failed to get a transaction handle at 55968c2ecf20Sopenharmony_ci * all, we need to clean up the in-core orphan list manually. 55978c2ecf20Sopenharmony_ci */ 55988c2ecf20Sopenharmony_ci if (orphan && inode->i_nlink) 55998c2ecf20Sopenharmony_ci ext4_orphan_del(NULL, inode); 56008c2ecf20Sopenharmony_ci 56018c2ecf20Sopenharmony_ci if (!error && (ia_valid & ATTR_MODE)) 56028c2ecf20Sopenharmony_ci rc = posix_acl_chmod(inode, inode->i_mode); 56038c2ecf20Sopenharmony_ci 56048c2ecf20Sopenharmony_cierr_out: 56058c2ecf20Sopenharmony_ci if (error) 56068c2ecf20Sopenharmony_ci ext4_std_error(inode->i_sb, error); 56078c2ecf20Sopenharmony_ci if (!error) 56088c2ecf20Sopenharmony_ci error = rc; 56098c2ecf20Sopenharmony_ci return error; 56108c2ecf20Sopenharmony_ci} 56118c2ecf20Sopenharmony_ci 56128c2ecf20Sopenharmony_ciint ext4_getattr(const struct path *path, struct kstat *stat, 56138c2ecf20Sopenharmony_ci u32 request_mask, unsigned int query_flags) 56148c2ecf20Sopenharmony_ci{ 56158c2ecf20Sopenharmony_ci struct inode *inode = d_inode(path->dentry); 56168c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode; 56178c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 56188c2ecf20Sopenharmony_ci unsigned int flags; 56198c2ecf20Sopenharmony_ci 56208c2ecf20Sopenharmony_ci if ((request_mask & STATX_BTIME) && 56218c2ecf20Sopenharmony_ci EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { 56228c2ecf20Sopenharmony_ci stat->result_mask |= STATX_BTIME; 56238c2ecf20Sopenharmony_ci stat->btime.tv_sec = ei->i_crtime.tv_sec; 56248c2ecf20Sopenharmony_ci stat->btime.tv_nsec = ei->i_crtime.tv_nsec; 56258c2ecf20Sopenharmony_ci } 56268c2ecf20Sopenharmony_ci 56278c2ecf20Sopenharmony_ci flags = ei->i_flags & EXT4_FL_USER_VISIBLE; 56288c2ecf20Sopenharmony_ci if (flags & EXT4_APPEND_FL) 56298c2ecf20Sopenharmony_ci stat->attributes |= STATX_ATTR_APPEND; 56308c2ecf20Sopenharmony_ci if (flags & EXT4_COMPR_FL) 56318c2ecf20Sopenharmony_ci stat->attributes |= STATX_ATTR_COMPRESSED; 56328c2ecf20Sopenharmony_ci if (flags & EXT4_ENCRYPT_FL) 56338c2ecf20Sopenharmony_ci stat->attributes |= STATX_ATTR_ENCRYPTED; 56348c2ecf20Sopenharmony_ci if (flags & EXT4_IMMUTABLE_FL) 56358c2ecf20Sopenharmony_ci stat->attributes |= STATX_ATTR_IMMUTABLE; 56368c2ecf20Sopenharmony_ci if (flags & EXT4_NODUMP_FL) 56378c2ecf20Sopenharmony_ci stat->attributes |= STATX_ATTR_NODUMP; 56388c2ecf20Sopenharmony_ci if (flags & EXT4_VERITY_FL) 56398c2ecf20Sopenharmony_ci stat->attributes |= STATX_ATTR_VERITY; 56408c2ecf20Sopenharmony_ci 56418c2ecf20Sopenharmony_ci stat->attributes_mask |= (STATX_ATTR_APPEND | 56428c2ecf20Sopenharmony_ci STATX_ATTR_COMPRESSED | 56438c2ecf20Sopenharmony_ci STATX_ATTR_ENCRYPTED | 56448c2ecf20Sopenharmony_ci STATX_ATTR_IMMUTABLE | 56458c2ecf20Sopenharmony_ci STATX_ATTR_NODUMP | 56468c2ecf20Sopenharmony_ci STATX_ATTR_VERITY); 56478c2ecf20Sopenharmony_ci 56488c2ecf20Sopenharmony_ci generic_fillattr(inode, stat); 56498c2ecf20Sopenharmony_ci return 0; 56508c2ecf20Sopenharmony_ci} 56518c2ecf20Sopenharmony_ci 56528c2ecf20Sopenharmony_ciint ext4_file_getattr(const struct path *path, struct kstat *stat, 56538c2ecf20Sopenharmony_ci u32 request_mask, unsigned int query_flags) 56548c2ecf20Sopenharmony_ci{ 56558c2ecf20Sopenharmony_ci struct inode *inode = d_inode(path->dentry); 56568c2ecf20Sopenharmony_ci u64 delalloc_blocks; 56578c2ecf20Sopenharmony_ci 56588c2ecf20Sopenharmony_ci ext4_getattr(path, stat, request_mask, query_flags); 56598c2ecf20Sopenharmony_ci 56608c2ecf20Sopenharmony_ci /* 56618c2ecf20Sopenharmony_ci * If there is inline data in the inode, the inode will normally not 56628c2ecf20Sopenharmony_ci * have data blocks allocated (it may have an external xattr block). 56638c2ecf20Sopenharmony_ci * Report at least one sector for such files, so tools like tar, rsync, 56648c2ecf20Sopenharmony_ci * others don't incorrectly think the file is completely sparse. 56658c2ecf20Sopenharmony_ci */ 56668c2ecf20Sopenharmony_ci if (unlikely(ext4_has_inline_data(inode))) 56678c2ecf20Sopenharmony_ci stat->blocks += (stat->size + 511) >> 9; 56688c2ecf20Sopenharmony_ci 56698c2ecf20Sopenharmony_ci /* 56708c2ecf20Sopenharmony_ci * We can't update i_blocks if the block allocation is delayed 56718c2ecf20Sopenharmony_ci * otherwise in the case of system crash before the real block 56728c2ecf20Sopenharmony_ci * allocation is done, we will have i_blocks inconsistent with 56738c2ecf20Sopenharmony_ci * on-disk file blocks. 56748c2ecf20Sopenharmony_ci * We always keep i_blocks updated together with real 56758c2ecf20Sopenharmony_ci * allocation. But to not confuse with user, stat 56768c2ecf20Sopenharmony_ci * will return the blocks that include the delayed allocation 56778c2ecf20Sopenharmony_ci * blocks for this file. 56788c2ecf20Sopenharmony_ci */ 56798c2ecf20Sopenharmony_ci delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), 56808c2ecf20Sopenharmony_ci EXT4_I(inode)->i_reserved_data_blocks); 56818c2ecf20Sopenharmony_ci stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); 56828c2ecf20Sopenharmony_ci return 0; 56838c2ecf20Sopenharmony_ci} 56848c2ecf20Sopenharmony_ci 56858c2ecf20Sopenharmony_cistatic int ext4_index_trans_blocks(struct inode *inode, int lblocks, 56868c2ecf20Sopenharmony_ci int pextents) 56878c2ecf20Sopenharmony_ci{ 56888c2ecf20Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 56898c2ecf20Sopenharmony_ci return ext4_ind_trans_blocks(inode, lblocks); 56908c2ecf20Sopenharmony_ci return ext4_ext_index_trans_blocks(inode, pextents); 56918c2ecf20Sopenharmony_ci} 56928c2ecf20Sopenharmony_ci 56938c2ecf20Sopenharmony_ci/* 56948c2ecf20Sopenharmony_ci * Account for index blocks, block groups bitmaps and block group 56958c2ecf20Sopenharmony_ci * descriptor blocks if modify datablocks and index blocks 56968c2ecf20Sopenharmony_ci * worse case, the indexs blocks spread over different block groups 56978c2ecf20Sopenharmony_ci * 56988c2ecf20Sopenharmony_ci * If datablocks are discontiguous, they are possible to spread over 56998c2ecf20Sopenharmony_ci * different block groups too. If they are contiguous, with flexbg, 57008c2ecf20Sopenharmony_ci * they could still across block group boundary. 57018c2ecf20Sopenharmony_ci * 57028c2ecf20Sopenharmony_ci * Also account for superblock, inode, quota and xattr blocks 57038c2ecf20Sopenharmony_ci */ 57048c2ecf20Sopenharmony_cistatic int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 57058c2ecf20Sopenharmony_ci int pextents) 57068c2ecf20Sopenharmony_ci{ 57078c2ecf20Sopenharmony_ci ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 57088c2ecf20Sopenharmony_ci int gdpblocks; 57098c2ecf20Sopenharmony_ci int idxblocks; 57108c2ecf20Sopenharmony_ci int ret = 0; 57118c2ecf20Sopenharmony_ci 57128c2ecf20Sopenharmony_ci /* 57138c2ecf20Sopenharmony_ci * How many index blocks need to touch to map @lblocks logical blocks 57148c2ecf20Sopenharmony_ci * to @pextents physical extents? 57158c2ecf20Sopenharmony_ci */ 57168c2ecf20Sopenharmony_ci idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); 57178c2ecf20Sopenharmony_ci 57188c2ecf20Sopenharmony_ci ret = idxblocks; 57198c2ecf20Sopenharmony_ci 57208c2ecf20Sopenharmony_ci /* 57218c2ecf20Sopenharmony_ci * Now let's see how many group bitmaps and group descriptors need 57228c2ecf20Sopenharmony_ci * to account 57238c2ecf20Sopenharmony_ci */ 57248c2ecf20Sopenharmony_ci groups = idxblocks + pextents; 57258c2ecf20Sopenharmony_ci gdpblocks = groups; 57268c2ecf20Sopenharmony_ci if (groups > ngroups) 57278c2ecf20Sopenharmony_ci groups = ngroups; 57288c2ecf20Sopenharmony_ci if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) 57298c2ecf20Sopenharmony_ci gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; 57308c2ecf20Sopenharmony_ci 57318c2ecf20Sopenharmony_ci /* bitmaps and block group descriptor blocks */ 57328c2ecf20Sopenharmony_ci ret += groups + gdpblocks; 57338c2ecf20Sopenharmony_ci 57348c2ecf20Sopenharmony_ci /* Blocks for super block, inode, quota and xattr blocks */ 57358c2ecf20Sopenharmony_ci ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); 57368c2ecf20Sopenharmony_ci 57378c2ecf20Sopenharmony_ci return ret; 57388c2ecf20Sopenharmony_ci} 57398c2ecf20Sopenharmony_ci 57408c2ecf20Sopenharmony_ci/* 57418c2ecf20Sopenharmony_ci * Calculate the total number of credits to reserve to fit 57428c2ecf20Sopenharmony_ci * the modification of a single pages into a single transaction, 57438c2ecf20Sopenharmony_ci * which may include multiple chunks of block allocations. 57448c2ecf20Sopenharmony_ci * 57458c2ecf20Sopenharmony_ci * This could be called via ext4_write_begin() 57468c2ecf20Sopenharmony_ci * 57478c2ecf20Sopenharmony_ci * We need to consider the worse case, when 57488c2ecf20Sopenharmony_ci * one new block per extent. 57498c2ecf20Sopenharmony_ci */ 57508c2ecf20Sopenharmony_ciint ext4_writepage_trans_blocks(struct inode *inode) 57518c2ecf20Sopenharmony_ci{ 57528c2ecf20Sopenharmony_ci int bpp = ext4_journal_blocks_per_page(inode); 57538c2ecf20Sopenharmony_ci int ret; 57548c2ecf20Sopenharmony_ci 57558c2ecf20Sopenharmony_ci ret = ext4_meta_trans_blocks(inode, bpp, bpp); 57568c2ecf20Sopenharmony_ci 57578c2ecf20Sopenharmony_ci /* Account for data blocks for journalled mode */ 57588c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) 57598c2ecf20Sopenharmony_ci ret += bpp; 57608c2ecf20Sopenharmony_ci return ret; 57618c2ecf20Sopenharmony_ci} 57628c2ecf20Sopenharmony_ci 57638c2ecf20Sopenharmony_ci/* 57648c2ecf20Sopenharmony_ci * Calculate the journal credits for a chunk of data modification. 57658c2ecf20Sopenharmony_ci * 57668c2ecf20Sopenharmony_ci * This is called from DIO, fallocate or whoever calling 57678c2ecf20Sopenharmony_ci * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. 57688c2ecf20Sopenharmony_ci * 57698c2ecf20Sopenharmony_ci * journal buffers for data blocks are not included here, as DIO 57708c2ecf20Sopenharmony_ci * and fallocate do no need to journal data buffers. 57718c2ecf20Sopenharmony_ci */ 57728c2ecf20Sopenharmony_ciint ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) 57738c2ecf20Sopenharmony_ci{ 57748c2ecf20Sopenharmony_ci return ext4_meta_trans_blocks(inode, nrblocks, 1); 57758c2ecf20Sopenharmony_ci} 57768c2ecf20Sopenharmony_ci 57778c2ecf20Sopenharmony_ci/* 57788c2ecf20Sopenharmony_ci * The caller must have previously called ext4_reserve_inode_write(). 57798c2ecf20Sopenharmony_ci * Give this, we know that the caller already has write access to iloc->bh. 57808c2ecf20Sopenharmony_ci */ 57818c2ecf20Sopenharmony_ciint ext4_mark_iloc_dirty(handle_t *handle, 57828c2ecf20Sopenharmony_ci struct inode *inode, struct ext4_iloc *iloc) 57838c2ecf20Sopenharmony_ci{ 57848c2ecf20Sopenharmony_ci int err = 0; 57858c2ecf20Sopenharmony_ci 57868c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) { 57878c2ecf20Sopenharmony_ci put_bh(iloc->bh); 57888c2ecf20Sopenharmony_ci return -EIO; 57898c2ecf20Sopenharmony_ci } 57908c2ecf20Sopenharmony_ci ext4_fc_track_inode(handle, inode); 57918c2ecf20Sopenharmony_ci 57928c2ecf20Sopenharmony_ci /* 57938c2ecf20Sopenharmony_ci * ea_inodes are using i_version for storing reference count, don't 57948c2ecf20Sopenharmony_ci * mess with it 57958c2ecf20Sopenharmony_ci */ 57968c2ecf20Sopenharmony_ci if (IS_I_VERSION(inode) && 57978c2ecf20Sopenharmony_ci !(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 57988c2ecf20Sopenharmony_ci inode_inc_iversion(inode); 57998c2ecf20Sopenharmony_ci 58008c2ecf20Sopenharmony_ci /* the do_update_inode consumes one bh->b_count */ 58018c2ecf20Sopenharmony_ci get_bh(iloc->bh); 58028c2ecf20Sopenharmony_ci 58038c2ecf20Sopenharmony_ci /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ 58048c2ecf20Sopenharmony_ci err = ext4_do_update_inode(handle, inode, iloc); 58058c2ecf20Sopenharmony_ci put_bh(iloc->bh); 58068c2ecf20Sopenharmony_ci return err; 58078c2ecf20Sopenharmony_ci} 58088c2ecf20Sopenharmony_ci 58098c2ecf20Sopenharmony_ci/* 58108c2ecf20Sopenharmony_ci * On success, We end up with an outstanding reference count against 58118c2ecf20Sopenharmony_ci * iloc->bh. This _must_ be cleaned up later. 58128c2ecf20Sopenharmony_ci */ 58138c2ecf20Sopenharmony_ci 58148c2ecf20Sopenharmony_ciint 58158c2ecf20Sopenharmony_ciext4_reserve_inode_write(handle_t *handle, struct inode *inode, 58168c2ecf20Sopenharmony_ci struct ext4_iloc *iloc) 58178c2ecf20Sopenharmony_ci{ 58188c2ecf20Sopenharmony_ci int err; 58198c2ecf20Sopenharmony_ci 58208c2ecf20Sopenharmony_ci if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) 58218c2ecf20Sopenharmony_ci return -EIO; 58228c2ecf20Sopenharmony_ci 58238c2ecf20Sopenharmony_ci err = ext4_get_inode_loc(inode, iloc); 58248c2ecf20Sopenharmony_ci if (!err) { 58258c2ecf20Sopenharmony_ci BUFFER_TRACE(iloc->bh, "get_write_access"); 58268c2ecf20Sopenharmony_ci err = ext4_journal_get_write_access(handle, iloc->bh); 58278c2ecf20Sopenharmony_ci if (err) { 58288c2ecf20Sopenharmony_ci brelse(iloc->bh); 58298c2ecf20Sopenharmony_ci iloc->bh = NULL; 58308c2ecf20Sopenharmony_ci } 58318c2ecf20Sopenharmony_ci } 58328c2ecf20Sopenharmony_ci ext4_std_error(inode->i_sb, err); 58338c2ecf20Sopenharmony_ci return err; 58348c2ecf20Sopenharmony_ci} 58358c2ecf20Sopenharmony_ci 58368c2ecf20Sopenharmony_cistatic int __ext4_expand_extra_isize(struct inode *inode, 58378c2ecf20Sopenharmony_ci unsigned int new_extra_isize, 58388c2ecf20Sopenharmony_ci struct ext4_iloc *iloc, 58398c2ecf20Sopenharmony_ci handle_t *handle, int *no_expand) 58408c2ecf20Sopenharmony_ci{ 58418c2ecf20Sopenharmony_ci struct ext4_inode *raw_inode; 58428c2ecf20Sopenharmony_ci struct ext4_xattr_ibody_header *header; 58438c2ecf20Sopenharmony_ci unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); 58448c2ecf20Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 58458c2ecf20Sopenharmony_ci int error; 58468c2ecf20Sopenharmony_ci 58478c2ecf20Sopenharmony_ci /* this was checked at iget time, but double check for good measure */ 58488c2ecf20Sopenharmony_ci if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || 58498c2ecf20Sopenharmony_ci (ei->i_extra_isize & 3)) { 58508c2ecf20Sopenharmony_ci EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", 58518c2ecf20Sopenharmony_ci ei->i_extra_isize, 58528c2ecf20Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb)); 58538c2ecf20Sopenharmony_ci return -EFSCORRUPTED; 58548c2ecf20Sopenharmony_ci } 58558c2ecf20Sopenharmony_ci if ((new_extra_isize < ei->i_extra_isize) || 58568c2ecf20Sopenharmony_ci (new_extra_isize < 4) || 58578c2ecf20Sopenharmony_ci (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) 58588c2ecf20Sopenharmony_ci return -EINVAL; /* Should never happen */ 58598c2ecf20Sopenharmony_ci 58608c2ecf20Sopenharmony_ci raw_inode = ext4_raw_inode(iloc); 58618c2ecf20Sopenharmony_ci 58628c2ecf20Sopenharmony_ci header = IHDR(inode, raw_inode); 58638c2ecf20Sopenharmony_ci 58648c2ecf20Sopenharmony_ci /* No extended attributes present */ 58658c2ecf20Sopenharmony_ci if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 58668c2ecf20Sopenharmony_ci header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 58678c2ecf20Sopenharmony_ci memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + 58688c2ecf20Sopenharmony_ci EXT4_I(inode)->i_extra_isize, 0, 58698c2ecf20Sopenharmony_ci new_extra_isize - EXT4_I(inode)->i_extra_isize); 58708c2ecf20Sopenharmony_ci EXT4_I(inode)->i_extra_isize = new_extra_isize; 58718c2ecf20Sopenharmony_ci return 0; 58728c2ecf20Sopenharmony_ci } 58738c2ecf20Sopenharmony_ci 58748c2ecf20Sopenharmony_ci /* 58758c2ecf20Sopenharmony_ci * We may need to allocate external xattr block so we need quotas 58768c2ecf20Sopenharmony_ci * initialized. Here we can be called with various locks held so we 58778c2ecf20Sopenharmony_ci * cannot affort to initialize quotas ourselves. So just bail. 58788c2ecf20Sopenharmony_ci */ 58798c2ecf20Sopenharmony_ci if (dquot_initialize_needed(inode)) 58808c2ecf20Sopenharmony_ci return -EAGAIN; 58818c2ecf20Sopenharmony_ci 58828c2ecf20Sopenharmony_ci /* try to expand with EAs present */ 58838c2ecf20Sopenharmony_ci error = ext4_expand_extra_isize_ea(inode, new_extra_isize, 58848c2ecf20Sopenharmony_ci raw_inode, handle); 58858c2ecf20Sopenharmony_ci if (error) { 58868c2ecf20Sopenharmony_ci /* 58878c2ecf20Sopenharmony_ci * Inode size expansion failed; don't try again 58888c2ecf20Sopenharmony_ci */ 58898c2ecf20Sopenharmony_ci *no_expand = 1; 58908c2ecf20Sopenharmony_ci } 58918c2ecf20Sopenharmony_ci 58928c2ecf20Sopenharmony_ci return error; 58938c2ecf20Sopenharmony_ci} 58948c2ecf20Sopenharmony_ci 58958c2ecf20Sopenharmony_ci/* 58968c2ecf20Sopenharmony_ci * Expand an inode by new_extra_isize bytes. 58978c2ecf20Sopenharmony_ci * Returns 0 on success or negative error number on failure. 58988c2ecf20Sopenharmony_ci */ 58998c2ecf20Sopenharmony_cistatic int ext4_try_to_expand_extra_isize(struct inode *inode, 59008c2ecf20Sopenharmony_ci unsigned int new_extra_isize, 59018c2ecf20Sopenharmony_ci struct ext4_iloc iloc, 59028c2ecf20Sopenharmony_ci handle_t *handle) 59038c2ecf20Sopenharmony_ci{ 59048c2ecf20Sopenharmony_ci int no_expand; 59058c2ecf20Sopenharmony_ci int error; 59068c2ecf20Sopenharmony_ci 59078c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) 59088c2ecf20Sopenharmony_ci return -EOVERFLOW; 59098c2ecf20Sopenharmony_ci 59108c2ecf20Sopenharmony_ci /* 59118c2ecf20Sopenharmony_ci * In nojournal mode, we can immediately attempt to expand 59128c2ecf20Sopenharmony_ci * the inode. When journaled, we first need to obtain extra 59138c2ecf20Sopenharmony_ci * buffer credits since we may write into the EA block 59148c2ecf20Sopenharmony_ci * with this same handle. If journal_extend fails, then it will 59158c2ecf20Sopenharmony_ci * only result in a minor loss of functionality for that inode. 59168c2ecf20Sopenharmony_ci * If this is felt to be critical, then e2fsck should be run to 59178c2ecf20Sopenharmony_ci * force a large enough s_min_extra_isize. 59188c2ecf20Sopenharmony_ci */ 59198c2ecf20Sopenharmony_ci if (ext4_journal_extend(handle, 59208c2ecf20Sopenharmony_ci EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0) 59218c2ecf20Sopenharmony_ci return -ENOSPC; 59228c2ecf20Sopenharmony_ci 59238c2ecf20Sopenharmony_ci if (ext4_write_trylock_xattr(inode, &no_expand) == 0) 59248c2ecf20Sopenharmony_ci return -EBUSY; 59258c2ecf20Sopenharmony_ci 59268c2ecf20Sopenharmony_ci error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, 59278c2ecf20Sopenharmony_ci handle, &no_expand); 59288c2ecf20Sopenharmony_ci ext4_write_unlock_xattr(inode, &no_expand); 59298c2ecf20Sopenharmony_ci 59308c2ecf20Sopenharmony_ci return error; 59318c2ecf20Sopenharmony_ci} 59328c2ecf20Sopenharmony_ci 59338c2ecf20Sopenharmony_ciint ext4_expand_extra_isize(struct inode *inode, 59348c2ecf20Sopenharmony_ci unsigned int new_extra_isize, 59358c2ecf20Sopenharmony_ci struct ext4_iloc *iloc) 59368c2ecf20Sopenharmony_ci{ 59378c2ecf20Sopenharmony_ci handle_t *handle; 59388c2ecf20Sopenharmony_ci int no_expand; 59398c2ecf20Sopenharmony_ci int error, rc; 59408c2ecf20Sopenharmony_ci 59418c2ecf20Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { 59428c2ecf20Sopenharmony_ci brelse(iloc->bh); 59438c2ecf20Sopenharmony_ci return -EOVERFLOW; 59448c2ecf20Sopenharmony_ci } 59458c2ecf20Sopenharmony_ci 59468c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 59478c2ecf20Sopenharmony_ci EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 59488c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 59498c2ecf20Sopenharmony_ci error = PTR_ERR(handle); 59508c2ecf20Sopenharmony_ci brelse(iloc->bh); 59518c2ecf20Sopenharmony_ci return error; 59528c2ecf20Sopenharmony_ci } 59538c2ecf20Sopenharmony_ci 59548c2ecf20Sopenharmony_ci ext4_write_lock_xattr(inode, &no_expand); 59558c2ecf20Sopenharmony_ci 59568c2ecf20Sopenharmony_ci BUFFER_TRACE(iloc->bh, "get_write_access"); 59578c2ecf20Sopenharmony_ci error = ext4_journal_get_write_access(handle, iloc->bh); 59588c2ecf20Sopenharmony_ci if (error) { 59598c2ecf20Sopenharmony_ci brelse(iloc->bh); 59608c2ecf20Sopenharmony_ci goto out_unlock; 59618c2ecf20Sopenharmony_ci } 59628c2ecf20Sopenharmony_ci 59638c2ecf20Sopenharmony_ci error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, 59648c2ecf20Sopenharmony_ci handle, &no_expand); 59658c2ecf20Sopenharmony_ci 59668c2ecf20Sopenharmony_ci rc = ext4_mark_iloc_dirty(handle, inode, iloc); 59678c2ecf20Sopenharmony_ci if (!error) 59688c2ecf20Sopenharmony_ci error = rc; 59698c2ecf20Sopenharmony_ci 59708c2ecf20Sopenharmony_ciout_unlock: 59718c2ecf20Sopenharmony_ci ext4_write_unlock_xattr(inode, &no_expand); 59728c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 59738c2ecf20Sopenharmony_ci return error; 59748c2ecf20Sopenharmony_ci} 59758c2ecf20Sopenharmony_ci 59768c2ecf20Sopenharmony_ci/* 59778c2ecf20Sopenharmony_ci * What we do here is to mark the in-core inode as clean with respect to inode 59788c2ecf20Sopenharmony_ci * dirtiness (it may still be data-dirty). 59798c2ecf20Sopenharmony_ci * This means that the in-core inode may be reaped by prune_icache 59808c2ecf20Sopenharmony_ci * without having to perform any I/O. This is a very good thing, 59818c2ecf20Sopenharmony_ci * because *any* task may call prune_icache - even ones which 59828c2ecf20Sopenharmony_ci * have a transaction open against a different journal. 59838c2ecf20Sopenharmony_ci * 59848c2ecf20Sopenharmony_ci * Is this cheating? Not really. Sure, we haven't written the 59858c2ecf20Sopenharmony_ci * inode out, but prune_icache isn't a user-visible syncing function. 59868c2ecf20Sopenharmony_ci * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 59878c2ecf20Sopenharmony_ci * we start and wait on commits. 59888c2ecf20Sopenharmony_ci */ 59898c2ecf20Sopenharmony_ciint __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, 59908c2ecf20Sopenharmony_ci const char *func, unsigned int line) 59918c2ecf20Sopenharmony_ci{ 59928c2ecf20Sopenharmony_ci struct ext4_iloc iloc; 59938c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 59948c2ecf20Sopenharmony_ci int err; 59958c2ecf20Sopenharmony_ci 59968c2ecf20Sopenharmony_ci might_sleep(); 59978c2ecf20Sopenharmony_ci trace_ext4_mark_inode_dirty(inode, _RET_IP_); 59988c2ecf20Sopenharmony_ci err = ext4_reserve_inode_write(handle, inode, &iloc); 59998c2ecf20Sopenharmony_ci if (err) 60008c2ecf20Sopenharmony_ci goto out; 60018c2ecf20Sopenharmony_ci 60028c2ecf20Sopenharmony_ci if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize) 60038c2ecf20Sopenharmony_ci ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize, 60048c2ecf20Sopenharmony_ci iloc, handle); 60058c2ecf20Sopenharmony_ci 60068c2ecf20Sopenharmony_ci err = ext4_mark_iloc_dirty(handle, inode, &iloc); 60078c2ecf20Sopenharmony_ciout: 60088c2ecf20Sopenharmony_ci if (unlikely(err)) 60098c2ecf20Sopenharmony_ci ext4_error_inode_err(inode, func, line, 0, err, 60108c2ecf20Sopenharmony_ci "mark_inode_dirty error"); 60118c2ecf20Sopenharmony_ci return err; 60128c2ecf20Sopenharmony_ci} 60138c2ecf20Sopenharmony_ci 60148c2ecf20Sopenharmony_ci/* 60158c2ecf20Sopenharmony_ci * ext4_dirty_inode() is called from __mark_inode_dirty() 60168c2ecf20Sopenharmony_ci * 60178c2ecf20Sopenharmony_ci * We're really interested in the case where a file is being extended. 60188c2ecf20Sopenharmony_ci * i_size has been changed by generic_commit_write() and we thus need 60198c2ecf20Sopenharmony_ci * to include the updated inode in the current transaction. 60208c2ecf20Sopenharmony_ci * 60218c2ecf20Sopenharmony_ci * Also, dquot_alloc_block() will always dirty the inode when blocks 60228c2ecf20Sopenharmony_ci * are allocated to the file. 60238c2ecf20Sopenharmony_ci * 60248c2ecf20Sopenharmony_ci * If the inode is marked synchronous, we don't honour that here - doing 60258c2ecf20Sopenharmony_ci * so would cause a commit on atime updates, which we don't bother doing. 60268c2ecf20Sopenharmony_ci * We handle synchronous inodes at the highest possible level. 60278c2ecf20Sopenharmony_ci * 60288c2ecf20Sopenharmony_ci * If only the I_DIRTY_TIME flag is set, we can skip everything. If 60298c2ecf20Sopenharmony_ci * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need 60308c2ecf20Sopenharmony_ci * to copy into the on-disk inode structure are the timestamp files. 60318c2ecf20Sopenharmony_ci */ 60328c2ecf20Sopenharmony_civoid ext4_dirty_inode(struct inode *inode, int flags) 60338c2ecf20Sopenharmony_ci{ 60348c2ecf20Sopenharmony_ci handle_t *handle; 60358c2ecf20Sopenharmony_ci 60368c2ecf20Sopenharmony_ci if (flags == I_DIRTY_TIME) 60378c2ecf20Sopenharmony_ci return; 60388c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 60398c2ecf20Sopenharmony_ci if (IS_ERR(handle)) 60408c2ecf20Sopenharmony_ci goto out; 60418c2ecf20Sopenharmony_ci 60428c2ecf20Sopenharmony_ci ext4_mark_inode_dirty(handle, inode); 60438c2ecf20Sopenharmony_ci 60448c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 60458c2ecf20Sopenharmony_ciout: 60468c2ecf20Sopenharmony_ci return; 60478c2ecf20Sopenharmony_ci} 60488c2ecf20Sopenharmony_ci 60498c2ecf20Sopenharmony_ciint ext4_change_inode_journal_flag(struct inode *inode, int val) 60508c2ecf20Sopenharmony_ci{ 60518c2ecf20Sopenharmony_ci journal_t *journal; 60528c2ecf20Sopenharmony_ci handle_t *handle; 60538c2ecf20Sopenharmony_ci int err; 60548c2ecf20Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 60558c2ecf20Sopenharmony_ci 60568c2ecf20Sopenharmony_ci /* 60578c2ecf20Sopenharmony_ci * We have to be very careful here: changing a data block's 60588c2ecf20Sopenharmony_ci * journaling status dynamically is dangerous. If we write a 60598c2ecf20Sopenharmony_ci * data block to the journal, change the status and then delete 60608c2ecf20Sopenharmony_ci * that block, we risk forgetting to revoke the old log record 60618c2ecf20Sopenharmony_ci * from the journal and so a subsequent replay can corrupt data. 60628c2ecf20Sopenharmony_ci * So, first we make sure that the journal is empty and that 60638c2ecf20Sopenharmony_ci * nobody is changing anything. 60648c2ecf20Sopenharmony_ci */ 60658c2ecf20Sopenharmony_ci 60668c2ecf20Sopenharmony_ci journal = EXT4_JOURNAL(inode); 60678c2ecf20Sopenharmony_ci if (!journal) 60688c2ecf20Sopenharmony_ci return 0; 60698c2ecf20Sopenharmony_ci if (is_journal_aborted(journal)) 60708c2ecf20Sopenharmony_ci return -EROFS; 60718c2ecf20Sopenharmony_ci 60728c2ecf20Sopenharmony_ci /* Wait for all existing dio workers */ 60738c2ecf20Sopenharmony_ci inode_dio_wait(inode); 60748c2ecf20Sopenharmony_ci 60758c2ecf20Sopenharmony_ci /* 60768c2ecf20Sopenharmony_ci * Before flushing the journal and switching inode's aops, we have 60778c2ecf20Sopenharmony_ci * to flush all dirty data the inode has. There can be outstanding 60788c2ecf20Sopenharmony_ci * delayed allocations, there can be unwritten extents created by 60798c2ecf20Sopenharmony_ci * fallocate or buffered writes in dioread_nolock mode covered by 60808c2ecf20Sopenharmony_ci * dirty data which can be converted only after flushing the dirty 60818c2ecf20Sopenharmony_ci * data (and journalled aops don't know how to handle these cases). 60828c2ecf20Sopenharmony_ci */ 60838c2ecf20Sopenharmony_ci if (val) { 60848c2ecf20Sopenharmony_ci down_write(&EXT4_I(inode)->i_mmap_sem); 60858c2ecf20Sopenharmony_ci err = filemap_write_and_wait(inode->i_mapping); 60868c2ecf20Sopenharmony_ci if (err < 0) { 60878c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_mmap_sem); 60888c2ecf20Sopenharmony_ci return err; 60898c2ecf20Sopenharmony_ci } 60908c2ecf20Sopenharmony_ci } 60918c2ecf20Sopenharmony_ci 60928c2ecf20Sopenharmony_ci percpu_down_write(&sbi->s_writepages_rwsem); 60938c2ecf20Sopenharmony_ci jbd2_journal_lock_updates(journal); 60948c2ecf20Sopenharmony_ci 60958c2ecf20Sopenharmony_ci /* 60968c2ecf20Sopenharmony_ci * OK, there are no updates running now, and all cached data is 60978c2ecf20Sopenharmony_ci * synced to disk. We are now in a completely consistent state 60988c2ecf20Sopenharmony_ci * which doesn't have anything in the journal, and we know that 60998c2ecf20Sopenharmony_ci * no filesystem updates are running, so it is safe to modify 61008c2ecf20Sopenharmony_ci * the inode's in-core data-journaling state flag now. 61018c2ecf20Sopenharmony_ci */ 61028c2ecf20Sopenharmony_ci 61038c2ecf20Sopenharmony_ci if (val) 61048c2ecf20Sopenharmony_ci ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 61058c2ecf20Sopenharmony_ci else { 61068c2ecf20Sopenharmony_ci err = jbd2_journal_flush(journal); 61078c2ecf20Sopenharmony_ci if (err < 0) { 61088c2ecf20Sopenharmony_ci jbd2_journal_unlock_updates(journal); 61098c2ecf20Sopenharmony_ci percpu_up_write(&sbi->s_writepages_rwsem); 61108c2ecf20Sopenharmony_ci return err; 61118c2ecf20Sopenharmony_ci } 61128c2ecf20Sopenharmony_ci ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 61138c2ecf20Sopenharmony_ci } 61148c2ecf20Sopenharmony_ci ext4_set_aops(inode); 61158c2ecf20Sopenharmony_ci 61168c2ecf20Sopenharmony_ci jbd2_journal_unlock_updates(journal); 61178c2ecf20Sopenharmony_ci percpu_up_write(&sbi->s_writepages_rwsem); 61188c2ecf20Sopenharmony_ci 61198c2ecf20Sopenharmony_ci if (val) 61208c2ecf20Sopenharmony_ci up_write(&EXT4_I(inode)->i_mmap_sem); 61218c2ecf20Sopenharmony_ci 61228c2ecf20Sopenharmony_ci /* Finally we can mark the inode as dirty. */ 61238c2ecf20Sopenharmony_ci 61248c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 61258c2ecf20Sopenharmony_ci if (IS_ERR(handle)) 61268c2ecf20Sopenharmony_ci return PTR_ERR(handle); 61278c2ecf20Sopenharmony_ci 61288c2ecf20Sopenharmony_ci ext4_fc_mark_ineligible(inode->i_sb, 61298c2ecf20Sopenharmony_ci EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); 61308c2ecf20Sopenharmony_ci err = ext4_mark_inode_dirty(handle, inode); 61318c2ecf20Sopenharmony_ci ext4_handle_sync(handle); 61328c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 61338c2ecf20Sopenharmony_ci ext4_std_error(inode->i_sb, err); 61348c2ecf20Sopenharmony_ci 61358c2ecf20Sopenharmony_ci return err; 61368c2ecf20Sopenharmony_ci} 61378c2ecf20Sopenharmony_ci 61388c2ecf20Sopenharmony_cistatic int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh) 61398c2ecf20Sopenharmony_ci{ 61408c2ecf20Sopenharmony_ci return !buffer_mapped(bh); 61418c2ecf20Sopenharmony_ci} 61428c2ecf20Sopenharmony_ci 61438c2ecf20Sopenharmony_civm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) 61448c2ecf20Sopenharmony_ci{ 61458c2ecf20Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 61468c2ecf20Sopenharmony_ci struct page *page = vmf->page; 61478c2ecf20Sopenharmony_ci loff_t size; 61488c2ecf20Sopenharmony_ci unsigned long len; 61498c2ecf20Sopenharmony_ci int err; 61508c2ecf20Sopenharmony_ci vm_fault_t ret; 61518c2ecf20Sopenharmony_ci struct file *file = vma->vm_file; 61528c2ecf20Sopenharmony_ci struct inode *inode = file_inode(file); 61538c2ecf20Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 61548c2ecf20Sopenharmony_ci handle_t *handle; 61558c2ecf20Sopenharmony_ci get_block_t *get_block; 61568c2ecf20Sopenharmony_ci int retries = 0; 61578c2ecf20Sopenharmony_ci 61588c2ecf20Sopenharmony_ci if (unlikely(IS_IMMUTABLE(inode))) 61598c2ecf20Sopenharmony_ci return VM_FAULT_SIGBUS; 61608c2ecf20Sopenharmony_ci 61618c2ecf20Sopenharmony_ci sb_start_pagefault(inode->i_sb); 61628c2ecf20Sopenharmony_ci file_update_time(vma->vm_file); 61638c2ecf20Sopenharmony_ci 61648c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_mmap_sem); 61658c2ecf20Sopenharmony_ci 61668c2ecf20Sopenharmony_ci err = ext4_convert_inline_data(inode); 61678c2ecf20Sopenharmony_ci if (err) 61688c2ecf20Sopenharmony_ci goto out_ret; 61698c2ecf20Sopenharmony_ci 61708c2ecf20Sopenharmony_ci /* 61718c2ecf20Sopenharmony_ci * On data journalling we skip straight to the transaction handle: 61728c2ecf20Sopenharmony_ci * there's no delalloc; page truncated will be checked later; the 61738c2ecf20Sopenharmony_ci * early return w/ all buffers mapped (calculates size/len) can't 61748c2ecf20Sopenharmony_ci * be used; and there's no dioread_nolock, so only ext4_get_block. 61758c2ecf20Sopenharmony_ci */ 61768c2ecf20Sopenharmony_ci if (ext4_should_journal_data(inode)) 61778c2ecf20Sopenharmony_ci goto retry_alloc; 61788c2ecf20Sopenharmony_ci 61798c2ecf20Sopenharmony_ci /* Delalloc case is easy... */ 61808c2ecf20Sopenharmony_ci if (test_opt(inode->i_sb, DELALLOC) && 61818c2ecf20Sopenharmony_ci !ext4_nonda_switch(inode->i_sb)) { 61828c2ecf20Sopenharmony_ci do { 61838c2ecf20Sopenharmony_ci err = block_page_mkwrite(vma, vmf, 61848c2ecf20Sopenharmony_ci ext4_da_get_block_prep); 61858c2ecf20Sopenharmony_ci } while (err == -ENOSPC && 61868c2ecf20Sopenharmony_ci ext4_should_retry_alloc(inode->i_sb, &retries)); 61878c2ecf20Sopenharmony_ci goto out_ret; 61888c2ecf20Sopenharmony_ci } 61898c2ecf20Sopenharmony_ci 61908c2ecf20Sopenharmony_ci lock_page(page); 61918c2ecf20Sopenharmony_ci size = i_size_read(inode); 61928c2ecf20Sopenharmony_ci /* Page got truncated from under us? */ 61938c2ecf20Sopenharmony_ci if (page->mapping != mapping || page_offset(page) > size) { 61948c2ecf20Sopenharmony_ci unlock_page(page); 61958c2ecf20Sopenharmony_ci ret = VM_FAULT_NOPAGE; 61968c2ecf20Sopenharmony_ci goto out; 61978c2ecf20Sopenharmony_ci } 61988c2ecf20Sopenharmony_ci 61998c2ecf20Sopenharmony_ci if (page->index == size >> PAGE_SHIFT) 62008c2ecf20Sopenharmony_ci len = size & ~PAGE_MASK; 62018c2ecf20Sopenharmony_ci else 62028c2ecf20Sopenharmony_ci len = PAGE_SIZE; 62038c2ecf20Sopenharmony_ci /* 62048c2ecf20Sopenharmony_ci * Return if we have all the buffers mapped. This avoids the need to do 62058c2ecf20Sopenharmony_ci * journal_start/journal_stop which can block and take a long time 62068c2ecf20Sopenharmony_ci * 62078c2ecf20Sopenharmony_ci * This cannot be done for data journalling, as we have to add the 62088c2ecf20Sopenharmony_ci * inode to the transaction's list to writeprotect pages on commit. 62098c2ecf20Sopenharmony_ci */ 62108c2ecf20Sopenharmony_ci if (page_has_buffers(page)) { 62118c2ecf20Sopenharmony_ci if (!ext4_walk_page_buffers(NULL, page_buffers(page), 62128c2ecf20Sopenharmony_ci 0, len, NULL, 62138c2ecf20Sopenharmony_ci ext4_bh_unmapped)) { 62148c2ecf20Sopenharmony_ci /* Wait so that we don't change page under IO */ 62158c2ecf20Sopenharmony_ci wait_for_stable_page(page); 62168c2ecf20Sopenharmony_ci ret = VM_FAULT_LOCKED; 62178c2ecf20Sopenharmony_ci goto out; 62188c2ecf20Sopenharmony_ci } 62198c2ecf20Sopenharmony_ci } 62208c2ecf20Sopenharmony_ci unlock_page(page); 62218c2ecf20Sopenharmony_ci /* OK, we need to fill the hole... */ 62228c2ecf20Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) 62238c2ecf20Sopenharmony_ci get_block = ext4_get_block_unwritten; 62248c2ecf20Sopenharmony_ci else 62258c2ecf20Sopenharmony_ci get_block = ext4_get_block; 62268c2ecf20Sopenharmony_ciretry_alloc: 62278c2ecf20Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 62288c2ecf20Sopenharmony_ci ext4_writepage_trans_blocks(inode)); 62298c2ecf20Sopenharmony_ci if (IS_ERR(handle)) { 62308c2ecf20Sopenharmony_ci ret = VM_FAULT_SIGBUS; 62318c2ecf20Sopenharmony_ci goto out; 62328c2ecf20Sopenharmony_ci } 62338c2ecf20Sopenharmony_ci /* 62348c2ecf20Sopenharmony_ci * Data journalling can't use block_page_mkwrite() because it 62358c2ecf20Sopenharmony_ci * will set_buffer_dirty() before do_journal_get_write_access() 62368c2ecf20Sopenharmony_ci * thus might hit warning messages for dirty metadata buffers. 62378c2ecf20Sopenharmony_ci */ 62388c2ecf20Sopenharmony_ci if (!ext4_should_journal_data(inode)) { 62398c2ecf20Sopenharmony_ci err = block_page_mkwrite(vma, vmf, get_block); 62408c2ecf20Sopenharmony_ci } else { 62418c2ecf20Sopenharmony_ci lock_page(page); 62428c2ecf20Sopenharmony_ci size = i_size_read(inode); 62438c2ecf20Sopenharmony_ci /* Page got truncated from under us? */ 62448c2ecf20Sopenharmony_ci if (page->mapping != mapping || page_offset(page) > size) { 62458c2ecf20Sopenharmony_ci ret = VM_FAULT_NOPAGE; 62468c2ecf20Sopenharmony_ci goto out_error; 62478c2ecf20Sopenharmony_ci } 62488c2ecf20Sopenharmony_ci 62498c2ecf20Sopenharmony_ci if (page->index == size >> PAGE_SHIFT) 62508c2ecf20Sopenharmony_ci len = size & ~PAGE_MASK; 62518c2ecf20Sopenharmony_ci else 62528c2ecf20Sopenharmony_ci len = PAGE_SIZE; 62538c2ecf20Sopenharmony_ci 62548c2ecf20Sopenharmony_ci err = __block_write_begin(page, 0, len, ext4_get_block); 62558c2ecf20Sopenharmony_ci if (!err) { 62568c2ecf20Sopenharmony_ci ret = VM_FAULT_SIGBUS; 62578c2ecf20Sopenharmony_ci if (ext4_walk_page_buffers(handle, page_buffers(page), 62588c2ecf20Sopenharmony_ci 0, len, NULL, do_journal_get_write_access)) 62598c2ecf20Sopenharmony_ci goto out_error; 62608c2ecf20Sopenharmony_ci if (ext4_walk_page_buffers(handle, page_buffers(page), 62618c2ecf20Sopenharmony_ci 0, len, NULL, write_end_fn)) 62628c2ecf20Sopenharmony_ci goto out_error; 62638c2ecf20Sopenharmony_ci if (ext4_jbd2_inode_add_write(handle, inode, 62648c2ecf20Sopenharmony_ci page_offset(page), len)) 62658c2ecf20Sopenharmony_ci goto out_error; 62668c2ecf20Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_JDATA); 62678c2ecf20Sopenharmony_ci } else { 62688c2ecf20Sopenharmony_ci unlock_page(page); 62698c2ecf20Sopenharmony_ci } 62708c2ecf20Sopenharmony_ci } 62718c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 62728c2ecf20Sopenharmony_ci if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 62738c2ecf20Sopenharmony_ci goto retry_alloc; 62748c2ecf20Sopenharmony_ciout_ret: 62758c2ecf20Sopenharmony_ci ret = block_page_mkwrite_return(err); 62768c2ecf20Sopenharmony_ciout: 62778c2ecf20Sopenharmony_ci up_read(&EXT4_I(inode)->i_mmap_sem); 62788c2ecf20Sopenharmony_ci sb_end_pagefault(inode->i_sb); 62798c2ecf20Sopenharmony_ci return ret; 62808c2ecf20Sopenharmony_ciout_error: 62818c2ecf20Sopenharmony_ci unlock_page(page); 62828c2ecf20Sopenharmony_ci ext4_journal_stop(handle); 62838c2ecf20Sopenharmony_ci goto out; 62848c2ecf20Sopenharmony_ci} 62858c2ecf20Sopenharmony_ci 62868c2ecf20Sopenharmony_civm_fault_t ext4_filemap_fault(struct vm_fault *vmf) 62878c2ecf20Sopenharmony_ci{ 62888c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vmf->vma->vm_file); 62898c2ecf20Sopenharmony_ci vm_fault_t ret; 62908c2ecf20Sopenharmony_ci 62918c2ecf20Sopenharmony_ci down_read(&EXT4_I(inode)->i_mmap_sem); 62928c2ecf20Sopenharmony_ci ret = filemap_fault(vmf); 62938c2ecf20Sopenharmony_ci up_read(&EXT4_I(inode)->i_mmap_sem); 62948c2ecf20Sopenharmony_ci 62958c2ecf20Sopenharmony_ci return ret; 62968c2ecf20Sopenharmony_ci} 6297