162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * linux/fs/ext4/inode.c 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 1992, 1993, 1994, 1995 662306a36Sopenharmony_ci * Remy Card (card@masi.ibp.fr) 762306a36Sopenharmony_ci * Laboratoire MASI - Institut Blaise Pascal 862306a36Sopenharmony_ci * Universite Pierre et Marie Curie (Paris VI) 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * from 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * linux/fs/minix/inode.c 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * Copyright (C) 1991, 1992 Linus Torvalds 1562306a36Sopenharmony_ci * 1662306a36Sopenharmony_ci * 64-bit file support on 64-bit platforms by Jakub Jelinek 1762306a36Sopenharmony_ci * (jj@sunsite.ms.mff.cuni.cz) 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_ci#include <linux/fs.h> 2362306a36Sopenharmony_ci#include <linux/mount.h> 2462306a36Sopenharmony_ci#include <linux/time.h> 2562306a36Sopenharmony_ci#include <linux/highuid.h> 2662306a36Sopenharmony_ci#include <linux/pagemap.h> 2762306a36Sopenharmony_ci#include <linux/dax.h> 2862306a36Sopenharmony_ci#include <linux/quotaops.h> 2962306a36Sopenharmony_ci#include <linux/string.h> 3062306a36Sopenharmony_ci#include <linux/buffer_head.h> 3162306a36Sopenharmony_ci#include <linux/writeback.h> 3262306a36Sopenharmony_ci#include <linux/pagevec.h> 3362306a36Sopenharmony_ci#include <linux/mpage.h> 3462306a36Sopenharmony_ci#include <linux/namei.h> 3562306a36Sopenharmony_ci#include <linux/uio.h> 3662306a36Sopenharmony_ci#include <linux/bio.h> 3762306a36Sopenharmony_ci#include <linux/workqueue.h> 3862306a36Sopenharmony_ci#include <linux/kernel.h> 3962306a36Sopenharmony_ci#include <linux/printk.h> 4062306a36Sopenharmony_ci#include <linux/slab.h> 4162306a36Sopenharmony_ci#include <linux/bitops.h> 4262306a36Sopenharmony_ci#include <linux/iomap.h> 4362306a36Sopenharmony_ci#include <linux/iversion.h> 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_ci#include "ext4_jbd2.h" 4662306a36Sopenharmony_ci#include "xattr.h" 4762306a36Sopenharmony_ci#include "acl.h" 4862306a36Sopenharmony_ci#include "truncate.h" 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#include <trace/events/ext4.h> 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistatic __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw, 5362306a36Sopenharmony_ci struct ext4_inode_info *ei) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5662306a36Sopenharmony_ci __u32 csum; 5762306a36Sopenharmony_ci __u16 dummy_csum = 0; 5862306a36Sopenharmony_ci int offset = offsetof(struct ext4_inode, i_checksum_lo); 5962306a36Sopenharmony_ci unsigned int csum_size = sizeof(dummy_csum); 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset); 6262306a36Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size); 6362306a36Sopenharmony_ci offset += csum_size; 6462306a36Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, 6562306a36Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE - offset); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 6862306a36Sopenharmony_ci offset = offsetof(struct ext4_inode, i_checksum_hi); 6962306a36Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)raw + 7062306a36Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE, 7162306a36Sopenharmony_ci offset - EXT4_GOOD_OLD_INODE_SIZE); 7262306a36Sopenharmony_ci if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) { 7362306a36Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, 7462306a36Sopenharmony_ci csum_size); 7562306a36Sopenharmony_ci offset += csum_size; 7662306a36Sopenharmony_ci } 7762306a36Sopenharmony_ci csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, 7862306a36Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb) - offset); 7962306a36Sopenharmony_ci } 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci return csum; 8262306a36Sopenharmony_ci} 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cistatic int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw, 8562306a36Sopenharmony_ci struct ext4_inode_info *ei) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci __u32 provided, calculated; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 9062306a36Sopenharmony_ci cpu_to_le32(EXT4_OS_LINUX) || 9162306a36Sopenharmony_ci !ext4_has_metadata_csum(inode->i_sb)) 9262306a36Sopenharmony_ci return 1; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci provided = le16_to_cpu(raw->i_checksum_lo); 9562306a36Sopenharmony_ci calculated = ext4_inode_csum(inode, raw, ei); 9662306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 9762306a36Sopenharmony_ci EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 9862306a36Sopenharmony_ci provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16; 9962306a36Sopenharmony_ci else 10062306a36Sopenharmony_ci calculated &= 0xFFFF; 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci return provided == calculated; 10362306a36Sopenharmony_ci} 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_civoid ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw, 10662306a36Sopenharmony_ci struct ext4_inode_info *ei) 10762306a36Sopenharmony_ci{ 10862306a36Sopenharmony_ci __u32 csum; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 11162306a36Sopenharmony_ci cpu_to_le32(EXT4_OS_LINUX) || 11262306a36Sopenharmony_ci !ext4_has_metadata_csum(inode->i_sb)) 11362306a36Sopenharmony_ci return; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci csum = ext4_inode_csum(inode, raw, ei); 11662306a36Sopenharmony_ci raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF); 11762306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 11862306a36Sopenharmony_ci EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) 11962306a36Sopenharmony_ci raw->i_checksum_hi = cpu_to_le16(csum >> 16); 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_cistatic inline int ext4_begin_ordered_truncate(struct inode *inode, 12362306a36Sopenharmony_ci loff_t new_size) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci trace_ext4_begin_ordered_truncate(inode, new_size); 12662306a36Sopenharmony_ci /* 12762306a36Sopenharmony_ci * If jinode is zero, then we never opened the file for 12862306a36Sopenharmony_ci * writing, so there's no need to call 12962306a36Sopenharmony_ci * jbd2_journal_begin_ordered_truncate() since there's no 13062306a36Sopenharmony_ci * outstanding writes we need to flush. 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_ci if (!EXT4_I(inode)->jinode) 13362306a36Sopenharmony_ci return 0; 13462306a36Sopenharmony_ci return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode), 13562306a36Sopenharmony_ci EXT4_I(inode)->jinode, 13662306a36Sopenharmony_ci new_size); 13762306a36Sopenharmony_ci} 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_cistatic int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 14062306a36Sopenharmony_ci int pextents); 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci/* 14362306a36Sopenharmony_ci * Test whether an inode is a fast symlink. 14462306a36Sopenharmony_ci * A fast symlink has its symlink data stored in ext4_inode_info->i_data. 14562306a36Sopenharmony_ci */ 14662306a36Sopenharmony_ciint ext4_inode_is_fast_symlink(struct inode *inode) 14762306a36Sopenharmony_ci{ 14862306a36Sopenharmony_ci if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { 14962306a36Sopenharmony_ci int ea_blocks = EXT4_I(inode)->i_file_acl ? 15062306a36Sopenharmony_ci EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 15362306a36Sopenharmony_ci return 0; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 15662306a36Sopenharmony_ci } 15762306a36Sopenharmony_ci return S_ISLNK(inode->i_mode) && inode->i_size && 15862306a36Sopenharmony_ci (inode->i_size < EXT4_N_BLOCKS * 4); 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci/* 16262306a36Sopenharmony_ci * Called at the last iput() if i_nlink is zero. 16362306a36Sopenharmony_ci */ 16462306a36Sopenharmony_civoid ext4_evict_inode(struct inode *inode) 16562306a36Sopenharmony_ci{ 16662306a36Sopenharmony_ci handle_t *handle; 16762306a36Sopenharmony_ci int err; 16862306a36Sopenharmony_ci /* 16962306a36Sopenharmony_ci * Credits for final inode cleanup and freeing: 17062306a36Sopenharmony_ci * sb + inode (ext4_orphan_del()), block bitmap, group descriptor 17162306a36Sopenharmony_ci * (xattr block freeing), bitmap, group descriptor (inode freeing) 17262306a36Sopenharmony_ci */ 17362306a36Sopenharmony_ci int extra_credits = 6; 17462306a36Sopenharmony_ci struct ext4_xattr_inode_array *ea_inode_array = NULL; 17562306a36Sopenharmony_ci bool freeze_protected = false; 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci trace_ext4_evict_inode(inode); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) 18062306a36Sopenharmony_ci ext4_evict_ea_inode(inode); 18162306a36Sopenharmony_ci if (inode->i_nlink) { 18262306a36Sopenharmony_ci truncate_inode_pages_final(&inode->i_data); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci goto no_delete; 18562306a36Sopenharmony_ci } 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci if (is_bad_inode(inode)) 18862306a36Sopenharmony_ci goto no_delete; 18962306a36Sopenharmony_ci dquot_initialize(inode); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci if (ext4_should_order_data(inode)) 19262306a36Sopenharmony_ci ext4_begin_ordered_truncate(inode, 0); 19362306a36Sopenharmony_ci truncate_inode_pages_final(&inode->i_data); 19462306a36Sopenharmony_ci 19562306a36Sopenharmony_ci /* 19662306a36Sopenharmony_ci * For inodes with journalled data, transaction commit could have 19762306a36Sopenharmony_ci * dirtied the inode. And for inodes with dioread_nolock, unwritten 19862306a36Sopenharmony_ci * extents converting worker could merge extents and also have dirtied 19962306a36Sopenharmony_ci * the inode. Flush worker is ignoring it because of I_FREEING flag but 20062306a36Sopenharmony_ci * we still need to remove the inode from the writeback lists. 20162306a36Sopenharmony_ci */ 20262306a36Sopenharmony_ci if (!list_empty_careful(&inode->i_io_list)) 20362306a36Sopenharmony_ci inode_io_list_del(inode); 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci /* 20662306a36Sopenharmony_ci * Protect us against freezing - iput() caller didn't have to have any 20762306a36Sopenharmony_ci * protection against it. When we are in a running transaction though, 20862306a36Sopenharmony_ci * we are already protected against freezing and we cannot grab further 20962306a36Sopenharmony_ci * protection due to lock ordering constraints. 21062306a36Sopenharmony_ci */ 21162306a36Sopenharmony_ci if (!ext4_journal_current_handle()) { 21262306a36Sopenharmony_ci sb_start_intwrite(inode->i_sb); 21362306a36Sopenharmony_ci freeze_protected = true; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci if (!IS_NOQUOTA(inode)) 21762306a36Sopenharmony_ci extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci /* 22062306a36Sopenharmony_ci * Block bitmap, group descriptor, and inode are accounted in both 22162306a36Sopenharmony_ci * ext4_blocks_for_truncate() and extra_credits. So subtract 3. 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, 22462306a36Sopenharmony_ci ext4_blocks_for_truncate(inode) + extra_credits - 3); 22562306a36Sopenharmony_ci if (IS_ERR(handle)) { 22662306a36Sopenharmony_ci ext4_std_error(inode->i_sb, PTR_ERR(handle)); 22762306a36Sopenharmony_ci /* 22862306a36Sopenharmony_ci * If we're going to skip the normal cleanup, we still need to 22962306a36Sopenharmony_ci * make sure that the in-core orphan linked list is properly 23062306a36Sopenharmony_ci * cleaned up. 23162306a36Sopenharmony_ci */ 23262306a36Sopenharmony_ci ext4_orphan_del(NULL, inode); 23362306a36Sopenharmony_ci if (freeze_protected) 23462306a36Sopenharmony_ci sb_end_intwrite(inode->i_sb); 23562306a36Sopenharmony_ci goto no_delete; 23662306a36Sopenharmony_ci } 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (IS_SYNC(inode)) 23962306a36Sopenharmony_ci ext4_handle_sync(handle); 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci /* 24262306a36Sopenharmony_ci * Set inode->i_size to 0 before calling ext4_truncate(). We need 24362306a36Sopenharmony_ci * special handling of symlinks here because i_size is used to 24462306a36Sopenharmony_ci * determine whether ext4_inode_info->i_data contains symlink data or 24562306a36Sopenharmony_ci * block mappings. Setting i_size to 0 will remove its fast symlink 24662306a36Sopenharmony_ci * status. Erase i_data so that it becomes a valid empty block map. 24762306a36Sopenharmony_ci */ 24862306a36Sopenharmony_ci if (ext4_inode_is_fast_symlink(inode)) 24962306a36Sopenharmony_ci memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data)); 25062306a36Sopenharmony_ci inode->i_size = 0; 25162306a36Sopenharmony_ci err = ext4_mark_inode_dirty(handle, inode); 25262306a36Sopenharmony_ci if (err) { 25362306a36Sopenharmony_ci ext4_warning(inode->i_sb, 25462306a36Sopenharmony_ci "couldn't mark inode dirty (err %d)", err); 25562306a36Sopenharmony_ci goto stop_handle; 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci if (inode->i_blocks) { 25862306a36Sopenharmony_ci err = ext4_truncate(inode); 25962306a36Sopenharmony_ci if (err) { 26062306a36Sopenharmony_ci ext4_error_err(inode->i_sb, -err, 26162306a36Sopenharmony_ci "couldn't truncate inode %lu (err %d)", 26262306a36Sopenharmony_ci inode->i_ino, err); 26362306a36Sopenharmony_ci goto stop_handle; 26462306a36Sopenharmony_ci } 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci /* Remove xattr references. */ 26862306a36Sopenharmony_ci err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array, 26962306a36Sopenharmony_ci extra_credits); 27062306a36Sopenharmony_ci if (err) { 27162306a36Sopenharmony_ci ext4_warning(inode->i_sb, "xattr delete (err %d)", err); 27262306a36Sopenharmony_cistop_handle: 27362306a36Sopenharmony_ci ext4_journal_stop(handle); 27462306a36Sopenharmony_ci ext4_orphan_del(NULL, inode); 27562306a36Sopenharmony_ci if (freeze_protected) 27662306a36Sopenharmony_ci sb_end_intwrite(inode->i_sb); 27762306a36Sopenharmony_ci ext4_xattr_inode_array_free(ea_inode_array); 27862306a36Sopenharmony_ci goto no_delete; 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci /* 28262306a36Sopenharmony_ci * Kill off the orphan record which ext4_truncate created. 28362306a36Sopenharmony_ci * AKPM: I think this can be inside the above `if'. 28462306a36Sopenharmony_ci * Note that ext4_orphan_del() has to be able to cope with the 28562306a36Sopenharmony_ci * deletion of a non-existent orphan - this is because we don't 28662306a36Sopenharmony_ci * know if ext4_truncate() actually created an orphan record. 28762306a36Sopenharmony_ci * (Well, we could do this if we need to, but heck - it works) 28862306a36Sopenharmony_ci */ 28962306a36Sopenharmony_ci ext4_orphan_del(handle, inode); 29062306a36Sopenharmony_ci EXT4_I(inode)->i_dtime = (__u32)ktime_get_real_seconds(); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci /* 29362306a36Sopenharmony_ci * One subtle ordering requirement: if anything has gone wrong 29462306a36Sopenharmony_ci * (transaction abort, IO errors, whatever), then we can still 29562306a36Sopenharmony_ci * do these next steps (the fs will already have been marked as 29662306a36Sopenharmony_ci * having errors), but we can't free the inode if the mark_dirty 29762306a36Sopenharmony_ci * fails. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci if (ext4_mark_inode_dirty(handle, inode)) 30062306a36Sopenharmony_ci /* If that failed, just do the required in-core inode clear. */ 30162306a36Sopenharmony_ci ext4_clear_inode(inode); 30262306a36Sopenharmony_ci else 30362306a36Sopenharmony_ci ext4_free_inode(handle, inode); 30462306a36Sopenharmony_ci ext4_journal_stop(handle); 30562306a36Sopenharmony_ci if (freeze_protected) 30662306a36Sopenharmony_ci sb_end_intwrite(inode->i_sb); 30762306a36Sopenharmony_ci ext4_xattr_inode_array_free(ea_inode_array); 30862306a36Sopenharmony_ci return; 30962306a36Sopenharmony_cino_delete: 31062306a36Sopenharmony_ci /* 31162306a36Sopenharmony_ci * Check out some where else accidentally dirty the evicting inode, 31262306a36Sopenharmony_ci * which may probably cause inode use-after-free issues later. 31362306a36Sopenharmony_ci */ 31462306a36Sopenharmony_ci WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list)); 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci if (!list_empty(&EXT4_I(inode)->i_fc_list)) 31762306a36Sopenharmony_ci ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); 31862306a36Sopenharmony_ci ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci#ifdef CONFIG_QUOTA 32262306a36Sopenharmony_ciqsize_t *ext4_get_reserved_space(struct inode *inode) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci return &EXT4_I(inode)->i_reserved_quota; 32562306a36Sopenharmony_ci} 32662306a36Sopenharmony_ci#endif 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci/* 32962306a36Sopenharmony_ci * Called with i_data_sem down, which is important since we can call 33062306a36Sopenharmony_ci * ext4_discard_preallocations() from here. 33162306a36Sopenharmony_ci */ 33262306a36Sopenharmony_civoid ext4_da_update_reserve_space(struct inode *inode, 33362306a36Sopenharmony_ci int used, int quota_claim) 33462306a36Sopenharmony_ci{ 33562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 33662306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci spin_lock(&ei->i_block_reservation_lock); 33962306a36Sopenharmony_ci trace_ext4_da_update_reserve_space(inode, used, quota_claim); 34062306a36Sopenharmony_ci if (unlikely(used > ei->i_reserved_data_blocks)) { 34162306a36Sopenharmony_ci ext4_warning(inode->i_sb, "%s: ino %lu, used %d " 34262306a36Sopenharmony_ci "with only %d reserved data blocks", 34362306a36Sopenharmony_ci __func__, inode->i_ino, used, 34462306a36Sopenharmony_ci ei->i_reserved_data_blocks); 34562306a36Sopenharmony_ci WARN_ON(1); 34662306a36Sopenharmony_ci used = ei->i_reserved_data_blocks; 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci /* Update per-inode reservations */ 35062306a36Sopenharmony_ci ei->i_reserved_data_blocks -= used; 35162306a36Sopenharmony_ci percpu_counter_sub(&sbi->s_dirtyclusters_counter, used); 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci spin_unlock(&ei->i_block_reservation_lock); 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci /* Update quota subsystem for data blocks */ 35662306a36Sopenharmony_ci if (quota_claim) 35762306a36Sopenharmony_ci dquot_claim_block(inode, EXT4_C2B(sbi, used)); 35862306a36Sopenharmony_ci else { 35962306a36Sopenharmony_ci /* 36062306a36Sopenharmony_ci * We did fallocate with an offset that is already delayed 36162306a36Sopenharmony_ci * allocated. So on delayed allocated writeback we should 36262306a36Sopenharmony_ci * not re-claim the quota for fallocated blocks. 36362306a36Sopenharmony_ci */ 36462306a36Sopenharmony_ci dquot_release_reservation_block(inode, EXT4_C2B(sbi, used)); 36562306a36Sopenharmony_ci } 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci /* 36862306a36Sopenharmony_ci * If we have done all the pending block allocations and if 36962306a36Sopenharmony_ci * there aren't any writers on the inode, we can discard the 37062306a36Sopenharmony_ci * inode's preallocations. 37162306a36Sopenharmony_ci */ 37262306a36Sopenharmony_ci if ((ei->i_reserved_data_blocks == 0) && 37362306a36Sopenharmony_ci !inode_is_open_for_write(inode)) 37462306a36Sopenharmony_ci ext4_discard_preallocations(inode, 0); 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_cistatic int __check_block_validity(struct inode *inode, const char *func, 37862306a36Sopenharmony_ci unsigned int line, 37962306a36Sopenharmony_ci struct ext4_map_blocks *map) 38062306a36Sopenharmony_ci{ 38162306a36Sopenharmony_ci if (ext4_has_feature_journal(inode->i_sb) && 38262306a36Sopenharmony_ci (inode->i_ino == 38362306a36Sopenharmony_ci le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) 38462306a36Sopenharmony_ci return 0; 38562306a36Sopenharmony_ci if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { 38662306a36Sopenharmony_ci ext4_error_inode(inode, func, line, map->m_pblk, 38762306a36Sopenharmony_ci "lblock %lu mapped to illegal pblock %llu " 38862306a36Sopenharmony_ci "(length %d)", (unsigned long) map->m_lblk, 38962306a36Sopenharmony_ci map->m_pblk, map->m_len); 39062306a36Sopenharmony_ci return -EFSCORRUPTED; 39162306a36Sopenharmony_ci } 39262306a36Sopenharmony_ci return 0; 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ciint ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk, 39662306a36Sopenharmony_ci ext4_lblk_t len) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci int ret; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) 40162306a36Sopenharmony_ci return fscrypt_zeroout_range(inode, lblk, pblk, len); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS); 40462306a36Sopenharmony_ci if (ret > 0) 40562306a36Sopenharmony_ci ret = 0; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci return ret; 40862306a36Sopenharmony_ci} 40962306a36Sopenharmony_ci 41062306a36Sopenharmony_ci#define check_block_validity(inode, map) \ 41162306a36Sopenharmony_ci __check_block_validity((inode), __func__, __LINE__, (map)) 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 41462306a36Sopenharmony_cistatic void ext4_map_blocks_es_recheck(handle_t *handle, 41562306a36Sopenharmony_ci struct inode *inode, 41662306a36Sopenharmony_ci struct ext4_map_blocks *es_map, 41762306a36Sopenharmony_ci struct ext4_map_blocks *map, 41862306a36Sopenharmony_ci int flags) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci int retval; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci map->m_flags = 0; 42362306a36Sopenharmony_ci /* 42462306a36Sopenharmony_ci * There is a race window that the result is not the same. 42562306a36Sopenharmony_ci * e.g. xfstests #223 when dioread_nolock enables. The reason 42662306a36Sopenharmony_ci * is that we lookup a block mapping in extent status tree with 42762306a36Sopenharmony_ci * out taking i_data_sem. So at the time the unwritten extent 42862306a36Sopenharmony_ci * could be converted. 42962306a36Sopenharmony_ci */ 43062306a36Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 43162306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 43262306a36Sopenharmony_ci retval = ext4_ext_map_blocks(handle, inode, map, 0); 43362306a36Sopenharmony_ci } else { 43462306a36Sopenharmony_ci retval = ext4_ind_map_blocks(handle, inode, map, 0); 43562306a36Sopenharmony_ci } 43662306a36Sopenharmony_ci up_read((&EXT4_I(inode)->i_data_sem)); 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci /* 43962306a36Sopenharmony_ci * We don't check m_len because extent will be collpased in status 44062306a36Sopenharmony_ci * tree. So the m_len might not equal. 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_ci if (es_map->m_lblk != map->m_lblk || 44362306a36Sopenharmony_ci es_map->m_flags != map->m_flags || 44462306a36Sopenharmony_ci es_map->m_pblk != map->m_pblk) { 44562306a36Sopenharmony_ci printk("ES cache assertion failed for inode: %lu " 44662306a36Sopenharmony_ci "es_cached ex [%d/%d/%llu/%x] != " 44762306a36Sopenharmony_ci "found ex [%d/%d/%llu/%x] retval %d flags %x\n", 44862306a36Sopenharmony_ci inode->i_ino, es_map->m_lblk, es_map->m_len, 44962306a36Sopenharmony_ci es_map->m_pblk, es_map->m_flags, map->m_lblk, 45062306a36Sopenharmony_ci map->m_len, map->m_pblk, map->m_flags, 45162306a36Sopenharmony_ci retval, flags); 45262306a36Sopenharmony_ci } 45362306a36Sopenharmony_ci} 45462306a36Sopenharmony_ci#endif /* ES_AGGRESSIVE_TEST */ 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci/* 45762306a36Sopenharmony_ci * The ext4_map_blocks() function tries to look up the requested blocks, 45862306a36Sopenharmony_ci * and returns if the blocks are already mapped. 45962306a36Sopenharmony_ci * 46062306a36Sopenharmony_ci * Otherwise it takes the write lock of the i_data_sem and allocate blocks 46162306a36Sopenharmony_ci * and store the allocated blocks in the result buffer head and mark it 46262306a36Sopenharmony_ci * mapped. 46362306a36Sopenharmony_ci * 46462306a36Sopenharmony_ci * If file type is extents based, it will call ext4_ext_map_blocks(), 46562306a36Sopenharmony_ci * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping 46662306a36Sopenharmony_ci * based files 46762306a36Sopenharmony_ci * 46862306a36Sopenharmony_ci * On success, it returns the number of blocks being mapped or allocated. if 46962306a36Sopenharmony_ci * create==0 and the blocks are pre-allocated and unwritten, the resulting @map 47062306a36Sopenharmony_ci * is marked as unwritten. If the create == 1, it will mark @map as mapped. 47162306a36Sopenharmony_ci * 47262306a36Sopenharmony_ci * It returns 0 if plain look up failed (blocks have not been allocated), in 47362306a36Sopenharmony_ci * that case, @map is returned as unmapped but we still do fill map->m_len to 47462306a36Sopenharmony_ci * indicate the length of a hole starting at map->m_lblk. 47562306a36Sopenharmony_ci * 47662306a36Sopenharmony_ci * It returns the error in case of allocation failure. 47762306a36Sopenharmony_ci */ 47862306a36Sopenharmony_ciint ext4_map_blocks(handle_t *handle, struct inode *inode, 47962306a36Sopenharmony_ci struct ext4_map_blocks *map, int flags) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci struct extent_status es; 48262306a36Sopenharmony_ci int retval; 48362306a36Sopenharmony_ci int ret = 0; 48462306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 48562306a36Sopenharmony_ci struct ext4_map_blocks orig_map; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci memcpy(&orig_map, map, sizeof(*map)); 48862306a36Sopenharmony_ci#endif 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci map->m_flags = 0; 49162306a36Sopenharmony_ci ext_debug(inode, "flag 0x%x, max_blocks %u, logical block %lu\n", 49262306a36Sopenharmony_ci flags, map->m_len, (unsigned long) map->m_lblk); 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci /* 49562306a36Sopenharmony_ci * ext4_map_blocks returns an int, and m_len is an unsigned int 49662306a36Sopenharmony_ci */ 49762306a36Sopenharmony_ci if (unlikely(map->m_len > INT_MAX)) 49862306a36Sopenharmony_ci map->m_len = INT_MAX; 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci /* We can handle the block number less than EXT_MAX_BLOCKS */ 50162306a36Sopenharmony_ci if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS)) 50262306a36Sopenharmony_ci return -EFSCORRUPTED; 50362306a36Sopenharmony_ci 50462306a36Sopenharmony_ci /* Lookup extent status tree firstly */ 50562306a36Sopenharmony_ci if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) && 50662306a36Sopenharmony_ci ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 50762306a36Sopenharmony_ci if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { 50862306a36Sopenharmony_ci map->m_pblk = ext4_es_pblock(&es) + 50962306a36Sopenharmony_ci map->m_lblk - es.es_lblk; 51062306a36Sopenharmony_ci map->m_flags |= ext4_es_is_written(&es) ? 51162306a36Sopenharmony_ci EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN; 51262306a36Sopenharmony_ci retval = es.es_len - (map->m_lblk - es.es_lblk); 51362306a36Sopenharmony_ci if (retval > map->m_len) 51462306a36Sopenharmony_ci retval = map->m_len; 51562306a36Sopenharmony_ci map->m_len = retval; 51662306a36Sopenharmony_ci } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { 51762306a36Sopenharmony_ci map->m_pblk = 0; 51862306a36Sopenharmony_ci retval = es.es_len - (map->m_lblk - es.es_lblk); 51962306a36Sopenharmony_ci if (retval > map->m_len) 52062306a36Sopenharmony_ci retval = map->m_len; 52162306a36Sopenharmony_ci map->m_len = retval; 52262306a36Sopenharmony_ci retval = 0; 52362306a36Sopenharmony_ci } else { 52462306a36Sopenharmony_ci BUG(); 52562306a36Sopenharmony_ci } 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT) 52862306a36Sopenharmony_ci return retval; 52962306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 53062306a36Sopenharmony_ci ext4_map_blocks_es_recheck(handle, inode, map, 53162306a36Sopenharmony_ci &orig_map, flags); 53262306a36Sopenharmony_ci#endif 53362306a36Sopenharmony_ci goto found; 53462306a36Sopenharmony_ci } 53562306a36Sopenharmony_ci /* 53662306a36Sopenharmony_ci * In the query cache no-wait mode, nothing we can do more if we 53762306a36Sopenharmony_ci * cannot find extent in the cache. 53862306a36Sopenharmony_ci */ 53962306a36Sopenharmony_ci if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT) 54062306a36Sopenharmony_ci return 0; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci /* 54362306a36Sopenharmony_ci * Try to see if we can get the block without requesting a new 54462306a36Sopenharmony_ci * file system block. 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 54762306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 54862306a36Sopenharmony_ci retval = ext4_ext_map_blocks(handle, inode, map, 0); 54962306a36Sopenharmony_ci } else { 55062306a36Sopenharmony_ci retval = ext4_ind_map_blocks(handle, inode, map, 0); 55162306a36Sopenharmony_ci } 55262306a36Sopenharmony_ci if (retval > 0) { 55362306a36Sopenharmony_ci unsigned int status; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci if (unlikely(retval != map->m_len)) { 55662306a36Sopenharmony_ci ext4_warning(inode->i_sb, 55762306a36Sopenharmony_ci "ES len assertion failed for inode " 55862306a36Sopenharmony_ci "%lu: retval %d != map->m_len %d", 55962306a36Sopenharmony_ci inode->i_ino, retval, map->m_len); 56062306a36Sopenharmony_ci WARN_ON(1); 56162306a36Sopenharmony_ci } 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci status = map->m_flags & EXT4_MAP_UNWRITTEN ? 56462306a36Sopenharmony_ci EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 56562306a36Sopenharmony_ci if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 56662306a36Sopenharmony_ci !(status & EXTENT_STATUS_WRITTEN) && 56762306a36Sopenharmony_ci ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, 56862306a36Sopenharmony_ci map->m_lblk + map->m_len - 1)) 56962306a36Sopenharmony_ci status |= EXTENT_STATUS_DELAYED; 57062306a36Sopenharmony_ci ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 57162306a36Sopenharmony_ci map->m_pblk, status); 57262306a36Sopenharmony_ci } 57362306a36Sopenharmony_ci up_read((&EXT4_I(inode)->i_data_sem)); 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_cifound: 57662306a36Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 57762306a36Sopenharmony_ci ret = check_block_validity(inode, map); 57862306a36Sopenharmony_ci if (ret != 0) 57962306a36Sopenharmony_ci return ret; 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci 58262306a36Sopenharmony_ci /* If it is only a block(s) look up */ 58362306a36Sopenharmony_ci if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) 58462306a36Sopenharmony_ci return retval; 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci /* 58762306a36Sopenharmony_ci * Returns if the blocks have already allocated 58862306a36Sopenharmony_ci * 58962306a36Sopenharmony_ci * Note that if blocks have been preallocated 59062306a36Sopenharmony_ci * ext4_ext_get_block() returns the create = 0 59162306a36Sopenharmony_ci * with buffer head unmapped. 59262306a36Sopenharmony_ci */ 59362306a36Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) 59462306a36Sopenharmony_ci /* 59562306a36Sopenharmony_ci * If we need to convert extent to unwritten 59662306a36Sopenharmony_ci * we continue and do the actual work in 59762306a36Sopenharmony_ci * ext4_ext_map_blocks() 59862306a36Sopenharmony_ci */ 59962306a36Sopenharmony_ci if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) 60062306a36Sopenharmony_ci return retval; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci /* 60362306a36Sopenharmony_ci * Here we clear m_flags because after allocating an new extent, 60462306a36Sopenharmony_ci * it will be set again. 60562306a36Sopenharmony_ci */ 60662306a36Sopenharmony_ci map->m_flags &= ~EXT4_MAP_FLAGS; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci /* 60962306a36Sopenharmony_ci * New blocks allocate and/or writing to unwritten extent 61062306a36Sopenharmony_ci * will possibly result in updating i_data, so we take 61162306a36Sopenharmony_ci * the write lock of i_data_sem, and call get_block() 61262306a36Sopenharmony_ci * with create == 1 flag. 61362306a36Sopenharmony_ci */ 61462306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci /* 61762306a36Sopenharmony_ci * We need to check for EXT4 here because migrate 61862306a36Sopenharmony_ci * could have changed the inode type in between 61962306a36Sopenharmony_ci */ 62062306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 62162306a36Sopenharmony_ci retval = ext4_ext_map_blocks(handle, inode, map, flags); 62262306a36Sopenharmony_ci } else { 62362306a36Sopenharmony_ci retval = ext4_ind_map_blocks(handle, inode, map, flags); 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_NEW) { 62662306a36Sopenharmony_ci /* 62762306a36Sopenharmony_ci * We allocated new blocks which will result in 62862306a36Sopenharmony_ci * i_data's format changing. Force the migrate 62962306a36Sopenharmony_ci * to fail by clearing migrate flags 63062306a36Sopenharmony_ci */ 63162306a36Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 63262306a36Sopenharmony_ci } 63362306a36Sopenharmony_ci } 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci if (retval > 0) { 63662306a36Sopenharmony_ci unsigned int status; 63762306a36Sopenharmony_ci 63862306a36Sopenharmony_ci if (unlikely(retval != map->m_len)) { 63962306a36Sopenharmony_ci ext4_warning(inode->i_sb, 64062306a36Sopenharmony_ci "ES len assertion failed for inode " 64162306a36Sopenharmony_ci "%lu: retval %d != map->m_len %d", 64262306a36Sopenharmony_ci inode->i_ino, retval, map->m_len); 64362306a36Sopenharmony_ci WARN_ON(1); 64462306a36Sopenharmony_ci } 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci /* 64762306a36Sopenharmony_ci * We have to zeroout blocks before inserting them into extent 64862306a36Sopenharmony_ci * status tree. Otherwise someone could look them up there and 64962306a36Sopenharmony_ci * use them before they are really zeroed. We also have to 65062306a36Sopenharmony_ci * unmap metadata before zeroing as otherwise writeback can 65162306a36Sopenharmony_ci * overwrite zeros with stale data from block device. 65262306a36Sopenharmony_ci */ 65362306a36Sopenharmony_ci if (flags & EXT4_GET_BLOCKS_ZERO && 65462306a36Sopenharmony_ci map->m_flags & EXT4_MAP_MAPPED && 65562306a36Sopenharmony_ci map->m_flags & EXT4_MAP_NEW) { 65662306a36Sopenharmony_ci ret = ext4_issue_zeroout(inode, map->m_lblk, 65762306a36Sopenharmony_ci map->m_pblk, map->m_len); 65862306a36Sopenharmony_ci if (ret) { 65962306a36Sopenharmony_ci retval = ret; 66062306a36Sopenharmony_ci goto out_sem; 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci } 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci /* 66562306a36Sopenharmony_ci * If the extent has been zeroed out, we don't need to update 66662306a36Sopenharmony_ci * extent status tree. 66762306a36Sopenharmony_ci */ 66862306a36Sopenharmony_ci if ((flags & EXT4_GET_BLOCKS_PRE_IO) && 66962306a36Sopenharmony_ci ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) { 67062306a36Sopenharmony_ci if (ext4_es_is_written(&es)) 67162306a36Sopenharmony_ci goto out_sem; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci status = map->m_flags & EXT4_MAP_UNWRITTEN ? 67462306a36Sopenharmony_ci EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 67562306a36Sopenharmony_ci if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 67662306a36Sopenharmony_ci !(status & EXTENT_STATUS_WRITTEN) && 67762306a36Sopenharmony_ci ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, 67862306a36Sopenharmony_ci map->m_lblk + map->m_len - 1)) 67962306a36Sopenharmony_ci status |= EXTENT_STATUS_DELAYED; 68062306a36Sopenharmony_ci ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 68162306a36Sopenharmony_ci map->m_pblk, status); 68262306a36Sopenharmony_ci } 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ciout_sem: 68562306a36Sopenharmony_ci up_write((&EXT4_I(inode)->i_data_sem)); 68662306a36Sopenharmony_ci if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 68762306a36Sopenharmony_ci ret = check_block_validity(inode, map); 68862306a36Sopenharmony_ci if (ret != 0) 68962306a36Sopenharmony_ci return ret; 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci /* 69262306a36Sopenharmony_ci * Inodes with freshly allocated blocks where contents will be 69362306a36Sopenharmony_ci * visible after transaction commit must be on transaction's 69462306a36Sopenharmony_ci * ordered data list. 69562306a36Sopenharmony_ci */ 69662306a36Sopenharmony_ci if (map->m_flags & EXT4_MAP_NEW && 69762306a36Sopenharmony_ci !(map->m_flags & EXT4_MAP_UNWRITTEN) && 69862306a36Sopenharmony_ci !(flags & EXT4_GET_BLOCKS_ZERO) && 69962306a36Sopenharmony_ci !ext4_is_quota_file(inode) && 70062306a36Sopenharmony_ci ext4_should_order_data(inode)) { 70162306a36Sopenharmony_ci loff_t start_byte = 70262306a36Sopenharmony_ci (loff_t)map->m_lblk << inode->i_blkbits; 70362306a36Sopenharmony_ci loff_t length = (loff_t)map->m_len << inode->i_blkbits; 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci if (flags & EXT4_GET_BLOCKS_IO_SUBMIT) 70662306a36Sopenharmony_ci ret = ext4_jbd2_inode_add_wait(handle, inode, 70762306a36Sopenharmony_ci start_byte, length); 70862306a36Sopenharmony_ci else 70962306a36Sopenharmony_ci ret = ext4_jbd2_inode_add_write(handle, inode, 71062306a36Sopenharmony_ci start_byte, length); 71162306a36Sopenharmony_ci if (ret) 71262306a36Sopenharmony_ci return ret; 71362306a36Sopenharmony_ci } 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN || 71662306a36Sopenharmony_ci map->m_flags & EXT4_MAP_MAPPED)) 71762306a36Sopenharmony_ci ext4_fc_track_range(handle, inode, map->m_lblk, 71862306a36Sopenharmony_ci map->m_lblk + map->m_len - 1); 71962306a36Sopenharmony_ci if (retval < 0) 72062306a36Sopenharmony_ci ext_debug(inode, "failed with err %d\n", retval); 72162306a36Sopenharmony_ci return retval; 72262306a36Sopenharmony_ci} 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci/* 72562306a36Sopenharmony_ci * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages 72662306a36Sopenharmony_ci * we have to be careful as someone else may be manipulating b_state as well. 72762306a36Sopenharmony_ci */ 72862306a36Sopenharmony_cistatic void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) 72962306a36Sopenharmony_ci{ 73062306a36Sopenharmony_ci unsigned long old_state; 73162306a36Sopenharmony_ci unsigned long new_state; 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci flags &= EXT4_MAP_FLAGS; 73462306a36Sopenharmony_ci 73562306a36Sopenharmony_ci /* Dummy buffer_head? Set non-atomically. */ 73662306a36Sopenharmony_ci if (!bh->b_page) { 73762306a36Sopenharmony_ci bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; 73862306a36Sopenharmony_ci return; 73962306a36Sopenharmony_ci } 74062306a36Sopenharmony_ci /* 74162306a36Sopenharmony_ci * Someone else may be modifying b_state. Be careful! This is ugly but 74262306a36Sopenharmony_ci * once we get rid of using bh as a container for mapping information 74362306a36Sopenharmony_ci * to pass to / from get_block functions, this can go away. 74462306a36Sopenharmony_ci */ 74562306a36Sopenharmony_ci old_state = READ_ONCE(bh->b_state); 74662306a36Sopenharmony_ci do { 74762306a36Sopenharmony_ci new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; 74862306a36Sopenharmony_ci } while (unlikely(!try_cmpxchg(&bh->b_state, &old_state, new_state))); 74962306a36Sopenharmony_ci} 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_cistatic int _ext4_get_block(struct inode *inode, sector_t iblock, 75262306a36Sopenharmony_ci struct buffer_head *bh, int flags) 75362306a36Sopenharmony_ci{ 75462306a36Sopenharmony_ci struct ext4_map_blocks map; 75562306a36Sopenharmony_ci int ret = 0; 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 75862306a36Sopenharmony_ci return -ERANGE; 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_ci map.m_lblk = iblock; 76162306a36Sopenharmony_ci map.m_len = bh->b_size >> inode->i_blkbits; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map, 76462306a36Sopenharmony_ci flags); 76562306a36Sopenharmony_ci if (ret > 0) { 76662306a36Sopenharmony_ci map_bh(bh, inode->i_sb, map.m_pblk); 76762306a36Sopenharmony_ci ext4_update_bh_state(bh, map.m_flags); 76862306a36Sopenharmony_ci bh->b_size = inode->i_sb->s_blocksize * map.m_len; 76962306a36Sopenharmony_ci ret = 0; 77062306a36Sopenharmony_ci } else if (ret == 0) { 77162306a36Sopenharmony_ci /* hole case, need to fill in bh->b_size */ 77262306a36Sopenharmony_ci bh->b_size = inode->i_sb->s_blocksize * map.m_len; 77362306a36Sopenharmony_ci } 77462306a36Sopenharmony_ci return ret; 77562306a36Sopenharmony_ci} 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ciint ext4_get_block(struct inode *inode, sector_t iblock, 77862306a36Sopenharmony_ci struct buffer_head *bh, int create) 77962306a36Sopenharmony_ci{ 78062306a36Sopenharmony_ci return _ext4_get_block(inode, iblock, bh, 78162306a36Sopenharmony_ci create ? EXT4_GET_BLOCKS_CREATE : 0); 78262306a36Sopenharmony_ci} 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci/* 78562306a36Sopenharmony_ci * Get block function used when preparing for buffered write if we require 78662306a36Sopenharmony_ci * creating an unwritten extent if blocks haven't been allocated. The extent 78762306a36Sopenharmony_ci * will be converted to written after the IO is complete. 78862306a36Sopenharmony_ci */ 78962306a36Sopenharmony_ciint ext4_get_block_unwritten(struct inode *inode, sector_t iblock, 79062306a36Sopenharmony_ci struct buffer_head *bh_result, int create) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci int ret = 0; 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", 79562306a36Sopenharmony_ci inode->i_ino, create); 79662306a36Sopenharmony_ci ret = _ext4_get_block(inode, iblock, bh_result, 79762306a36Sopenharmony_ci EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT); 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci /* 80062306a36Sopenharmony_ci * If the buffer is marked unwritten, mark it as new to make sure it is 80162306a36Sopenharmony_ci * zeroed out correctly in case of partial writes. Otherwise, there is 80262306a36Sopenharmony_ci * a chance of stale data getting exposed. 80362306a36Sopenharmony_ci */ 80462306a36Sopenharmony_ci if (ret == 0 && buffer_unwritten(bh_result)) 80562306a36Sopenharmony_ci set_buffer_new(bh_result); 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci return ret; 80862306a36Sopenharmony_ci} 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci/* Maximum number of blocks we map for direct IO at once. */ 81162306a36Sopenharmony_ci#define DIO_MAX_BLOCKS 4096 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci/* 81462306a36Sopenharmony_ci * `handle' can be NULL if create is zero 81562306a36Sopenharmony_ci */ 81662306a36Sopenharmony_cistruct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 81762306a36Sopenharmony_ci ext4_lblk_t block, int map_flags) 81862306a36Sopenharmony_ci{ 81962306a36Sopenharmony_ci struct ext4_map_blocks map; 82062306a36Sopenharmony_ci struct buffer_head *bh; 82162306a36Sopenharmony_ci int create = map_flags & EXT4_GET_BLOCKS_CREATE; 82262306a36Sopenharmony_ci bool nowait = map_flags & EXT4_GET_BLOCKS_CACHED_NOWAIT; 82362306a36Sopenharmony_ci int err; 82462306a36Sopenharmony_ci 82562306a36Sopenharmony_ci ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 82662306a36Sopenharmony_ci || handle != NULL || create == 0); 82762306a36Sopenharmony_ci ASSERT(create == 0 || !nowait); 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci map.m_lblk = block; 83062306a36Sopenharmony_ci map.m_len = 1; 83162306a36Sopenharmony_ci err = ext4_map_blocks(handle, inode, &map, map_flags); 83262306a36Sopenharmony_ci 83362306a36Sopenharmony_ci if (err == 0) 83462306a36Sopenharmony_ci return create ? ERR_PTR(-ENOSPC) : NULL; 83562306a36Sopenharmony_ci if (err < 0) 83662306a36Sopenharmony_ci return ERR_PTR(err); 83762306a36Sopenharmony_ci 83862306a36Sopenharmony_ci if (nowait) 83962306a36Sopenharmony_ci return sb_find_get_block(inode->i_sb, map.m_pblk); 84062306a36Sopenharmony_ci 84162306a36Sopenharmony_ci bh = sb_getblk(inode->i_sb, map.m_pblk); 84262306a36Sopenharmony_ci if (unlikely(!bh)) 84362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 84462306a36Sopenharmony_ci if (map.m_flags & EXT4_MAP_NEW) { 84562306a36Sopenharmony_ci ASSERT(create != 0); 84662306a36Sopenharmony_ci ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) 84762306a36Sopenharmony_ci || (handle != NULL)); 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci /* 85062306a36Sopenharmony_ci * Now that we do not always journal data, we should 85162306a36Sopenharmony_ci * keep in mind whether this should always journal the 85262306a36Sopenharmony_ci * new buffer as metadata. For now, regular file 85362306a36Sopenharmony_ci * writes use ext4_get_block instead, so it's not a 85462306a36Sopenharmony_ci * problem. 85562306a36Sopenharmony_ci */ 85662306a36Sopenharmony_ci lock_buffer(bh); 85762306a36Sopenharmony_ci BUFFER_TRACE(bh, "call get_create_access"); 85862306a36Sopenharmony_ci err = ext4_journal_get_create_access(handle, inode->i_sb, bh, 85962306a36Sopenharmony_ci EXT4_JTR_NONE); 86062306a36Sopenharmony_ci if (unlikely(err)) { 86162306a36Sopenharmony_ci unlock_buffer(bh); 86262306a36Sopenharmony_ci goto errout; 86362306a36Sopenharmony_ci } 86462306a36Sopenharmony_ci if (!buffer_uptodate(bh)) { 86562306a36Sopenharmony_ci memset(bh->b_data, 0, inode->i_sb->s_blocksize); 86662306a36Sopenharmony_ci set_buffer_uptodate(bh); 86762306a36Sopenharmony_ci } 86862306a36Sopenharmony_ci unlock_buffer(bh); 86962306a36Sopenharmony_ci BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 87062306a36Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, inode, bh); 87162306a36Sopenharmony_ci if (unlikely(err)) 87262306a36Sopenharmony_ci goto errout; 87362306a36Sopenharmony_ci } else 87462306a36Sopenharmony_ci BUFFER_TRACE(bh, "not a new buffer"); 87562306a36Sopenharmony_ci return bh; 87662306a36Sopenharmony_cierrout: 87762306a36Sopenharmony_ci brelse(bh); 87862306a36Sopenharmony_ci return ERR_PTR(err); 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_cistruct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 88262306a36Sopenharmony_ci ext4_lblk_t block, int map_flags) 88362306a36Sopenharmony_ci{ 88462306a36Sopenharmony_ci struct buffer_head *bh; 88562306a36Sopenharmony_ci int ret; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci bh = ext4_getblk(handle, inode, block, map_flags); 88862306a36Sopenharmony_ci if (IS_ERR(bh)) 88962306a36Sopenharmony_ci return bh; 89062306a36Sopenharmony_ci if (!bh || ext4_buffer_uptodate(bh)) 89162306a36Sopenharmony_ci return bh; 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci ret = ext4_read_bh_lock(bh, REQ_META | REQ_PRIO, true); 89462306a36Sopenharmony_ci if (ret) { 89562306a36Sopenharmony_ci put_bh(bh); 89662306a36Sopenharmony_ci return ERR_PTR(ret); 89762306a36Sopenharmony_ci } 89862306a36Sopenharmony_ci return bh; 89962306a36Sopenharmony_ci} 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci/* Read a contiguous batch of blocks. */ 90262306a36Sopenharmony_ciint ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count, 90362306a36Sopenharmony_ci bool wait, struct buffer_head **bhs) 90462306a36Sopenharmony_ci{ 90562306a36Sopenharmony_ci int i, err; 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ci for (i = 0; i < bh_count; i++) { 90862306a36Sopenharmony_ci bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */); 90962306a36Sopenharmony_ci if (IS_ERR(bhs[i])) { 91062306a36Sopenharmony_ci err = PTR_ERR(bhs[i]); 91162306a36Sopenharmony_ci bh_count = i; 91262306a36Sopenharmony_ci goto out_brelse; 91362306a36Sopenharmony_ci } 91462306a36Sopenharmony_ci } 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci for (i = 0; i < bh_count; i++) 91762306a36Sopenharmony_ci /* Note that NULL bhs[i] is valid because of holes. */ 91862306a36Sopenharmony_ci if (bhs[i] && !ext4_buffer_uptodate(bhs[i])) 91962306a36Sopenharmony_ci ext4_read_bh_lock(bhs[i], REQ_META | REQ_PRIO, false); 92062306a36Sopenharmony_ci 92162306a36Sopenharmony_ci if (!wait) 92262306a36Sopenharmony_ci return 0; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci for (i = 0; i < bh_count; i++) 92562306a36Sopenharmony_ci if (bhs[i]) 92662306a36Sopenharmony_ci wait_on_buffer(bhs[i]); 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci for (i = 0; i < bh_count; i++) { 92962306a36Sopenharmony_ci if (bhs[i] && !buffer_uptodate(bhs[i])) { 93062306a36Sopenharmony_ci err = -EIO; 93162306a36Sopenharmony_ci goto out_brelse; 93262306a36Sopenharmony_ci } 93362306a36Sopenharmony_ci } 93462306a36Sopenharmony_ci return 0; 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ciout_brelse: 93762306a36Sopenharmony_ci for (i = 0; i < bh_count; i++) { 93862306a36Sopenharmony_ci brelse(bhs[i]); 93962306a36Sopenharmony_ci bhs[i] = NULL; 94062306a36Sopenharmony_ci } 94162306a36Sopenharmony_ci return err; 94262306a36Sopenharmony_ci} 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ciint ext4_walk_page_buffers(handle_t *handle, struct inode *inode, 94562306a36Sopenharmony_ci struct buffer_head *head, 94662306a36Sopenharmony_ci unsigned from, 94762306a36Sopenharmony_ci unsigned to, 94862306a36Sopenharmony_ci int *partial, 94962306a36Sopenharmony_ci int (*fn)(handle_t *handle, struct inode *inode, 95062306a36Sopenharmony_ci struct buffer_head *bh)) 95162306a36Sopenharmony_ci{ 95262306a36Sopenharmony_ci struct buffer_head *bh; 95362306a36Sopenharmony_ci unsigned block_start, block_end; 95462306a36Sopenharmony_ci unsigned blocksize = head->b_size; 95562306a36Sopenharmony_ci int err, ret = 0; 95662306a36Sopenharmony_ci struct buffer_head *next; 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci for (bh = head, block_start = 0; 95962306a36Sopenharmony_ci ret == 0 && (bh != head || !block_start); 96062306a36Sopenharmony_ci block_start = block_end, bh = next) { 96162306a36Sopenharmony_ci next = bh->b_this_page; 96262306a36Sopenharmony_ci block_end = block_start + blocksize; 96362306a36Sopenharmony_ci if (block_end <= from || block_start >= to) { 96462306a36Sopenharmony_ci if (partial && !buffer_uptodate(bh)) 96562306a36Sopenharmony_ci *partial = 1; 96662306a36Sopenharmony_ci continue; 96762306a36Sopenharmony_ci } 96862306a36Sopenharmony_ci err = (*fn)(handle, inode, bh); 96962306a36Sopenharmony_ci if (!ret) 97062306a36Sopenharmony_ci ret = err; 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci return ret; 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci/* 97662306a36Sopenharmony_ci * Helper for handling dirtying of journalled data. We also mark the folio as 97762306a36Sopenharmony_ci * dirty so that writeback code knows about this page (and inode) contains 97862306a36Sopenharmony_ci * dirty data. ext4_writepages() then commits appropriate transaction to 97962306a36Sopenharmony_ci * make data stable. 98062306a36Sopenharmony_ci */ 98162306a36Sopenharmony_cistatic int ext4_dirty_journalled_data(handle_t *handle, struct buffer_head *bh) 98262306a36Sopenharmony_ci{ 98362306a36Sopenharmony_ci folio_mark_dirty(bh->b_folio); 98462306a36Sopenharmony_ci return ext4_handle_dirty_metadata(handle, NULL, bh); 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_ciint do_journal_get_write_access(handle_t *handle, struct inode *inode, 98862306a36Sopenharmony_ci struct buffer_head *bh) 98962306a36Sopenharmony_ci{ 99062306a36Sopenharmony_ci int dirty = buffer_dirty(bh); 99162306a36Sopenharmony_ci int ret; 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_ci if (!buffer_mapped(bh) || buffer_freed(bh)) 99462306a36Sopenharmony_ci return 0; 99562306a36Sopenharmony_ci /* 99662306a36Sopenharmony_ci * __block_write_begin() could have dirtied some buffers. Clean 99762306a36Sopenharmony_ci * the dirty bit as jbd2_journal_get_write_access() could complain 99862306a36Sopenharmony_ci * otherwise about fs integrity issues. Setting of the dirty bit 99962306a36Sopenharmony_ci * by __block_write_begin() isn't a real problem here as we clear 100062306a36Sopenharmony_ci * the bit before releasing a page lock and thus writeback cannot 100162306a36Sopenharmony_ci * ever write the buffer. 100262306a36Sopenharmony_ci */ 100362306a36Sopenharmony_ci if (dirty) 100462306a36Sopenharmony_ci clear_buffer_dirty(bh); 100562306a36Sopenharmony_ci BUFFER_TRACE(bh, "get write access"); 100662306a36Sopenharmony_ci ret = ext4_journal_get_write_access(handle, inode->i_sb, bh, 100762306a36Sopenharmony_ci EXT4_JTR_NONE); 100862306a36Sopenharmony_ci if (!ret && dirty) 100962306a36Sopenharmony_ci ret = ext4_dirty_journalled_data(handle, bh); 101062306a36Sopenharmony_ci return ret; 101162306a36Sopenharmony_ci} 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 101462306a36Sopenharmony_cistatic int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, 101562306a36Sopenharmony_ci get_block_t *get_block) 101662306a36Sopenharmony_ci{ 101762306a36Sopenharmony_ci unsigned from = pos & (PAGE_SIZE - 1); 101862306a36Sopenharmony_ci unsigned to = from + len; 101962306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 102062306a36Sopenharmony_ci unsigned block_start, block_end; 102162306a36Sopenharmony_ci sector_t block; 102262306a36Sopenharmony_ci int err = 0; 102362306a36Sopenharmony_ci unsigned blocksize = inode->i_sb->s_blocksize; 102462306a36Sopenharmony_ci unsigned bbits; 102562306a36Sopenharmony_ci struct buffer_head *bh, *head, *wait[2]; 102662306a36Sopenharmony_ci int nr_wait = 0; 102762306a36Sopenharmony_ci int i; 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 103062306a36Sopenharmony_ci BUG_ON(from > PAGE_SIZE); 103162306a36Sopenharmony_ci BUG_ON(to > PAGE_SIZE); 103262306a36Sopenharmony_ci BUG_ON(from > to); 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci head = folio_buffers(folio); 103562306a36Sopenharmony_ci if (!head) { 103662306a36Sopenharmony_ci create_empty_buffers(&folio->page, blocksize, 0); 103762306a36Sopenharmony_ci head = folio_buffers(folio); 103862306a36Sopenharmony_ci } 103962306a36Sopenharmony_ci bbits = ilog2(blocksize); 104062306a36Sopenharmony_ci block = (sector_t)folio->index << (PAGE_SHIFT - bbits); 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ci for (bh = head, block_start = 0; bh != head || !block_start; 104362306a36Sopenharmony_ci block++, block_start = block_end, bh = bh->b_this_page) { 104462306a36Sopenharmony_ci block_end = block_start + blocksize; 104562306a36Sopenharmony_ci if (block_end <= from || block_start >= to) { 104662306a36Sopenharmony_ci if (folio_test_uptodate(folio)) { 104762306a36Sopenharmony_ci set_buffer_uptodate(bh); 104862306a36Sopenharmony_ci } 104962306a36Sopenharmony_ci continue; 105062306a36Sopenharmony_ci } 105162306a36Sopenharmony_ci if (buffer_new(bh)) 105262306a36Sopenharmony_ci clear_buffer_new(bh); 105362306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 105462306a36Sopenharmony_ci WARN_ON(bh->b_size != blocksize); 105562306a36Sopenharmony_ci err = get_block(inode, block, bh, 1); 105662306a36Sopenharmony_ci if (err) 105762306a36Sopenharmony_ci break; 105862306a36Sopenharmony_ci if (buffer_new(bh)) { 105962306a36Sopenharmony_ci if (folio_test_uptodate(folio)) { 106062306a36Sopenharmony_ci clear_buffer_new(bh); 106162306a36Sopenharmony_ci set_buffer_uptodate(bh); 106262306a36Sopenharmony_ci mark_buffer_dirty(bh); 106362306a36Sopenharmony_ci continue; 106462306a36Sopenharmony_ci } 106562306a36Sopenharmony_ci if (block_end > to || block_start < from) 106662306a36Sopenharmony_ci folio_zero_segments(folio, to, 106762306a36Sopenharmony_ci block_end, 106862306a36Sopenharmony_ci block_start, from); 106962306a36Sopenharmony_ci continue; 107062306a36Sopenharmony_ci } 107162306a36Sopenharmony_ci } 107262306a36Sopenharmony_ci if (folio_test_uptodate(folio)) { 107362306a36Sopenharmony_ci set_buffer_uptodate(bh); 107462306a36Sopenharmony_ci continue; 107562306a36Sopenharmony_ci } 107662306a36Sopenharmony_ci if (!buffer_uptodate(bh) && !buffer_delay(bh) && 107762306a36Sopenharmony_ci !buffer_unwritten(bh) && 107862306a36Sopenharmony_ci (block_start < from || block_end > to)) { 107962306a36Sopenharmony_ci ext4_read_bh_lock(bh, 0, false); 108062306a36Sopenharmony_ci wait[nr_wait++] = bh; 108162306a36Sopenharmony_ci } 108262306a36Sopenharmony_ci } 108362306a36Sopenharmony_ci /* 108462306a36Sopenharmony_ci * If we issued read requests, let them complete. 108562306a36Sopenharmony_ci */ 108662306a36Sopenharmony_ci for (i = 0; i < nr_wait; i++) { 108762306a36Sopenharmony_ci wait_on_buffer(wait[i]); 108862306a36Sopenharmony_ci if (!buffer_uptodate(wait[i])) 108962306a36Sopenharmony_ci err = -EIO; 109062306a36Sopenharmony_ci } 109162306a36Sopenharmony_ci if (unlikely(err)) { 109262306a36Sopenharmony_ci folio_zero_new_buffers(folio, from, to); 109362306a36Sopenharmony_ci } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { 109462306a36Sopenharmony_ci for (i = 0; i < nr_wait; i++) { 109562306a36Sopenharmony_ci int err2; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci err2 = fscrypt_decrypt_pagecache_blocks(folio, 109862306a36Sopenharmony_ci blocksize, bh_offset(wait[i])); 109962306a36Sopenharmony_ci if (err2) { 110062306a36Sopenharmony_ci clear_buffer_uptodate(wait[i]); 110162306a36Sopenharmony_ci err = err2; 110262306a36Sopenharmony_ci } 110362306a36Sopenharmony_ci } 110462306a36Sopenharmony_ci } 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci return err; 110762306a36Sopenharmony_ci} 110862306a36Sopenharmony_ci#endif 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci/* 111162306a36Sopenharmony_ci * To preserve ordering, it is essential that the hole instantiation and 111262306a36Sopenharmony_ci * the data write be encapsulated in a single transaction. We cannot 111362306a36Sopenharmony_ci * close off a transaction and start a new one between the ext4_get_block() 111462306a36Sopenharmony_ci * and the ext4_write_end(). So doing the jbd2_journal_start at the start of 111562306a36Sopenharmony_ci * ext4_write_begin() is the right place. 111662306a36Sopenharmony_ci */ 111762306a36Sopenharmony_cistatic int ext4_write_begin(struct file *file, struct address_space *mapping, 111862306a36Sopenharmony_ci loff_t pos, unsigned len, 111962306a36Sopenharmony_ci struct page **pagep, void **fsdata) 112062306a36Sopenharmony_ci{ 112162306a36Sopenharmony_ci struct inode *inode = mapping->host; 112262306a36Sopenharmony_ci int ret, needed_blocks; 112362306a36Sopenharmony_ci handle_t *handle; 112462306a36Sopenharmony_ci int retries = 0; 112562306a36Sopenharmony_ci struct folio *folio; 112662306a36Sopenharmony_ci pgoff_t index; 112762306a36Sopenharmony_ci unsigned from, to; 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) 113062306a36Sopenharmony_ci return -EIO; 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci trace_ext4_write_begin(inode, pos, len); 113362306a36Sopenharmony_ci /* 113462306a36Sopenharmony_ci * Reserve one block more for addition to orphan list in case 113562306a36Sopenharmony_ci * we allocate blocks but write fails for some reason 113662306a36Sopenharmony_ci */ 113762306a36Sopenharmony_ci needed_blocks = ext4_writepage_trans_blocks(inode) + 1; 113862306a36Sopenharmony_ci index = pos >> PAGE_SHIFT; 113962306a36Sopenharmony_ci from = pos & (PAGE_SIZE - 1); 114062306a36Sopenharmony_ci to = from + len; 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 114362306a36Sopenharmony_ci ret = ext4_try_to_write_inline_data(mapping, inode, pos, len, 114462306a36Sopenharmony_ci pagep); 114562306a36Sopenharmony_ci if (ret < 0) 114662306a36Sopenharmony_ci return ret; 114762306a36Sopenharmony_ci if (ret == 1) 114862306a36Sopenharmony_ci return 0; 114962306a36Sopenharmony_ci } 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci /* 115262306a36Sopenharmony_ci * __filemap_get_folio() can take a long time if the 115362306a36Sopenharmony_ci * system is thrashing due to memory pressure, or if the folio 115462306a36Sopenharmony_ci * is being written back. So grab it first before we start 115562306a36Sopenharmony_ci * the transaction handle. This also allows us to allocate 115662306a36Sopenharmony_ci * the folio (if needed) without using GFP_NOFS. 115762306a36Sopenharmony_ci */ 115862306a36Sopenharmony_ciretry_grab: 115962306a36Sopenharmony_ci folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 116062306a36Sopenharmony_ci mapping_gfp_mask(mapping)); 116162306a36Sopenharmony_ci if (IS_ERR(folio)) 116262306a36Sopenharmony_ci return PTR_ERR(folio); 116362306a36Sopenharmony_ci /* 116462306a36Sopenharmony_ci * The same as page allocation, we prealloc buffer heads before 116562306a36Sopenharmony_ci * starting the handle. 116662306a36Sopenharmony_ci */ 116762306a36Sopenharmony_ci if (!folio_buffers(folio)) 116862306a36Sopenharmony_ci create_empty_buffers(&folio->page, inode->i_sb->s_blocksize, 0); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci folio_unlock(folio); 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ciretry_journal: 117362306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 117462306a36Sopenharmony_ci if (IS_ERR(handle)) { 117562306a36Sopenharmony_ci folio_put(folio); 117662306a36Sopenharmony_ci return PTR_ERR(handle); 117762306a36Sopenharmony_ci } 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci folio_lock(folio); 118062306a36Sopenharmony_ci if (folio->mapping != mapping) { 118162306a36Sopenharmony_ci /* The folio got truncated from under us */ 118262306a36Sopenharmony_ci folio_unlock(folio); 118362306a36Sopenharmony_ci folio_put(folio); 118462306a36Sopenharmony_ci ext4_journal_stop(handle); 118562306a36Sopenharmony_ci goto retry_grab; 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci /* In case writeback began while the folio was unlocked */ 118862306a36Sopenharmony_ci folio_wait_stable(folio); 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 119162306a36Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) 119262306a36Sopenharmony_ci ret = ext4_block_write_begin(folio, pos, len, 119362306a36Sopenharmony_ci ext4_get_block_unwritten); 119462306a36Sopenharmony_ci else 119562306a36Sopenharmony_ci ret = ext4_block_write_begin(folio, pos, len, ext4_get_block); 119662306a36Sopenharmony_ci#else 119762306a36Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) 119862306a36Sopenharmony_ci ret = __block_write_begin(&folio->page, pos, len, 119962306a36Sopenharmony_ci ext4_get_block_unwritten); 120062306a36Sopenharmony_ci else 120162306a36Sopenharmony_ci ret = __block_write_begin(&folio->page, pos, len, ext4_get_block); 120262306a36Sopenharmony_ci#endif 120362306a36Sopenharmony_ci if (!ret && ext4_should_journal_data(inode)) { 120462306a36Sopenharmony_ci ret = ext4_walk_page_buffers(handle, inode, 120562306a36Sopenharmony_ci folio_buffers(folio), from, to, 120662306a36Sopenharmony_ci NULL, do_journal_get_write_access); 120762306a36Sopenharmony_ci } 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci if (ret) { 121062306a36Sopenharmony_ci bool extended = (pos + len > inode->i_size) && 121162306a36Sopenharmony_ci !ext4_verity_in_progress(inode); 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_ci folio_unlock(folio); 121462306a36Sopenharmony_ci /* 121562306a36Sopenharmony_ci * __block_write_begin may have instantiated a few blocks 121662306a36Sopenharmony_ci * outside i_size. Trim these off again. Don't need 121762306a36Sopenharmony_ci * i_size_read because we hold i_rwsem. 121862306a36Sopenharmony_ci * 121962306a36Sopenharmony_ci * Add inode to orphan list in case we crash before 122062306a36Sopenharmony_ci * truncate finishes 122162306a36Sopenharmony_ci */ 122262306a36Sopenharmony_ci if (extended && ext4_can_truncate(inode)) 122362306a36Sopenharmony_ci ext4_orphan_add(handle, inode); 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci ext4_journal_stop(handle); 122662306a36Sopenharmony_ci if (extended) { 122762306a36Sopenharmony_ci ext4_truncate_failed_write(inode); 122862306a36Sopenharmony_ci /* 122962306a36Sopenharmony_ci * If truncate failed early the inode might 123062306a36Sopenharmony_ci * still be on the orphan list; we need to 123162306a36Sopenharmony_ci * make sure the inode is removed from the 123262306a36Sopenharmony_ci * orphan list in that case. 123362306a36Sopenharmony_ci */ 123462306a36Sopenharmony_ci if (inode->i_nlink) 123562306a36Sopenharmony_ci ext4_orphan_del(NULL, inode); 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci if (ret == -ENOSPC && 123962306a36Sopenharmony_ci ext4_should_retry_alloc(inode->i_sb, &retries)) 124062306a36Sopenharmony_ci goto retry_journal; 124162306a36Sopenharmony_ci folio_put(folio); 124262306a36Sopenharmony_ci return ret; 124362306a36Sopenharmony_ci } 124462306a36Sopenharmony_ci *pagep = &folio->page; 124562306a36Sopenharmony_ci return ret; 124662306a36Sopenharmony_ci} 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_ci/* For write_end() in data=journal mode */ 124962306a36Sopenharmony_cistatic int write_end_fn(handle_t *handle, struct inode *inode, 125062306a36Sopenharmony_ci struct buffer_head *bh) 125162306a36Sopenharmony_ci{ 125262306a36Sopenharmony_ci int ret; 125362306a36Sopenharmony_ci if (!buffer_mapped(bh) || buffer_freed(bh)) 125462306a36Sopenharmony_ci return 0; 125562306a36Sopenharmony_ci set_buffer_uptodate(bh); 125662306a36Sopenharmony_ci ret = ext4_dirty_journalled_data(handle, bh); 125762306a36Sopenharmony_ci clear_buffer_meta(bh); 125862306a36Sopenharmony_ci clear_buffer_prio(bh); 125962306a36Sopenharmony_ci return ret; 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci/* 126362306a36Sopenharmony_ci * We need to pick up the new inode size which generic_commit_write gave us 126462306a36Sopenharmony_ci * `file' can be NULL - eg, when called from page_symlink(). 126562306a36Sopenharmony_ci * 126662306a36Sopenharmony_ci * ext4 never places buffers on inode->i_mapping->private_list. metadata 126762306a36Sopenharmony_ci * buffers are managed internally. 126862306a36Sopenharmony_ci */ 126962306a36Sopenharmony_cistatic int ext4_write_end(struct file *file, 127062306a36Sopenharmony_ci struct address_space *mapping, 127162306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 127262306a36Sopenharmony_ci struct page *page, void *fsdata) 127362306a36Sopenharmony_ci{ 127462306a36Sopenharmony_ci struct folio *folio = page_folio(page); 127562306a36Sopenharmony_ci handle_t *handle = ext4_journal_current_handle(); 127662306a36Sopenharmony_ci struct inode *inode = mapping->host; 127762306a36Sopenharmony_ci loff_t old_size = inode->i_size; 127862306a36Sopenharmony_ci int ret = 0, ret2; 127962306a36Sopenharmony_ci int i_size_changed = 0; 128062306a36Sopenharmony_ci bool verity = ext4_verity_in_progress(inode); 128162306a36Sopenharmony_ci 128262306a36Sopenharmony_ci trace_ext4_write_end(inode, pos, len, copied); 128362306a36Sopenharmony_ci 128462306a36Sopenharmony_ci if (ext4_has_inline_data(inode) && 128562306a36Sopenharmony_ci ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 128662306a36Sopenharmony_ci return ext4_write_inline_data_end(inode, pos, len, copied, 128762306a36Sopenharmony_ci folio); 128862306a36Sopenharmony_ci 128962306a36Sopenharmony_ci copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); 129062306a36Sopenharmony_ci /* 129162306a36Sopenharmony_ci * it's important to update i_size while still holding folio lock: 129262306a36Sopenharmony_ci * page writeout could otherwise come in and zero beyond i_size. 129362306a36Sopenharmony_ci * 129462306a36Sopenharmony_ci * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree 129562306a36Sopenharmony_ci * blocks are being written past EOF, so skip the i_size update. 129662306a36Sopenharmony_ci */ 129762306a36Sopenharmony_ci if (!verity) 129862306a36Sopenharmony_ci i_size_changed = ext4_update_inode_size(inode, pos + copied); 129962306a36Sopenharmony_ci folio_unlock(folio); 130062306a36Sopenharmony_ci folio_put(folio); 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci if (old_size < pos && !verity) 130362306a36Sopenharmony_ci pagecache_isize_extended(inode, old_size, pos); 130462306a36Sopenharmony_ci /* 130562306a36Sopenharmony_ci * Don't mark the inode dirty under folio lock. First, it unnecessarily 130662306a36Sopenharmony_ci * makes the holding time of folio lock longer. Second, it forces lock 130762306a36Sopenharmony_ci * ordering of folio lock and transaction start for journaling 130862306a36Sopenharmony_ci * filesystems. 130962306a36Sopenharmony_ci */ 131062306a36Sopenharmony_ci if (i_size_changed) 131162306a36Sopenharmony_ci ret = ext4_mark_inode_dirty(handle, inode); 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) 131462306a36Sopenharmony_ci /* if we have allocated more blocks and copied 131562306a36Sopenharmony_ci * less. We will have blocks allocated outside 131662306a36Sopenharmony_ci * inode->i_size. So truncate them 131762306a36Sopenharmony_ci */ 131862306a36Sopenharmony_ci ext4_orphan_add(handle, inode); 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci ret2 = ext4_journal_stop(handle); 132162306a36Sopenharmony_ci if (!ret) 132262306a36Sopenharmony_ci ret = ret2; 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci if (pos + len > inode->i_size && !verity) { 132562306a36Sopenharmony_ci ext4_truncate_failed_write(inode); 132662306a36Sopenharmony_ci /* 132762306a36Sopenharmony_ci * If truncate failed early the inode might still be 132862306a36Sopenharmony_ci * on the orphan list; we need to make sure the inode 132962306a36Sopenharmony_ci * is removed from the orphan list in that case. 133062306a36Sopenharmony_ci */ 133162306a36Sopenharmony_ci if (inode->i_nlink) 133262306a36Sopenharmony_ci ext4_orphan_del(NULL, inode); 133362306a36Sopenharmony_ci } 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci return ret ? ret : copied; 133662306a36Sopenharmony_ci} 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci/* 133962306a36Sopenharmony_ci * This is a private version of folio_zero_new_buffers() which doesn't 134062306a36Sopenharmony_ci * set the buffer to be dirty, since in data=journalled mode we need 134162306a36Sopenharmony_ci * to call ext4_dirty_journalled_data() instead. 134262306a36Sopenharmony_ci */ 134362306a36Sopenharmony_cistatic void ext4_journalled_zero_new_buffers(handle_t *handle, 134462306a36Sopenharmony_ci struct inode *inode, 134562306a36Sopenharmony_ci struct folio *folio, 134662306a36Sopenharmony_ci unsigned from, unsigned to) 134762306a36Sopenharmony_ci{ 134862306a36Sopenharmony_ci unsigned int block_start = 0, block_end; 134962306a36Sopenharmony_ci struct buffer_head *head, *bh; 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci bh = head = folio_buffers(folio); 135262306a36Sopenharmony_ci do { 135362306a36Sopenharmony_ci block_end = block_start + bh->b_size; 135462306a36Sopenharmony_ci if (buffer_new(bh)) { 135562306a36Sopenharmony_ci if (block_end > from && block_start < to) { 135662306a36Sopenharmony_ci if (!folio_test_uptodate(folio)) { 135762306a36Sopenharmony_ci unsigned start, size; 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci start = max(from, block_start); 136062306a36Sopenharmony_ci size = min(to, block_end) - start; 136162306a36Sopenharmony_ci 136262306a36Sopenharmony_ci folio_zero_range(folio, start, size); 136362306a36Sopenharmony_ci write_end_fn(handle, inode, bh); 136462306a36Sopenharmony_ci } 136562306a36Sopenharmony_ci clear_buffer_new(bh); 136662306a36Sopenharmony_ci } 136762306a36Sopenharmony_ci } 136862306a36Sopenharmony_ci block_start = block_end; 136962306a36Sopenharmony_ci bh = bh->b_this_page; 137062306a36Sopenharmony_ci } while (bh != head); 137162306a36Sopenharmony_ci} 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_cistatic int ext4_journalled_write_end(struct file *file, 137462306a36Sopenharmony_ci struct address_space *mapping, 137562306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 137662306a36Sopenharmony_ci struct page *page, void *fsdata) 137762306a36Sopenharmony_ci{ 137862306a36Sopenharmony_ci struct folio *folio = page_folio(page); 137962306a36Sopenharmony_ci handle_t *handle = ext4_journal_current_handle(); 138062306a36Sopenharmony_ci struct inode *inode = mapping->host; 138162306a36Sopenharmony_ci loff_t old_size = inode->i_size; 138262306a36Sopenharmony_ci int ret = 0, ret2; 138362306a36Sopenharmony_ci int partial = 0; 138462306a36Sopenharmony_ci unsigned from, to; 138562306a36Sopenharmony_ci int size_changed = 0; 138662306a36Sopenharmony_ci bool verity = ext4_verity_in_progress(inode); 138762306a36Sopenharmony_ci 138862306a36Sopenharmony_ci trace_ext4_journalled_write_end(inode, pos, len, copied); 138962306a36Sopenharmony_ci from = pos & (PAGE_SIZE - 1); 139062306a36Sopenharmony_ci to = from + len; 139162306a36Sopenharmony_ci 139262306a36Sopenharmony_ci BUG_ON(!ext4_handle_valid(handle)); 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 139562306a36Sopenharmony_ci return ext4_write_inline_data_end(inode, pos, len, copied, 139662306a36Sopenharmony_ci folio); 139762306a36Sopenharmony_ci 139862306a36Sopenharmony_ci if (unlikely(copied < len) && !folio_test_uptodate(folio)) { 139962306a36Sopenharmony_ci copied = 0; 140062306a36Sopenharmony_ci ext4_journalled_zero_new_buffers(handle, inode, folio, 140162306a36Sopenharmony_ci from, to); 140262306a36Sopenharmony_ci } else { 140362306a36Sopenharmony_ci if (unlikely(copied < len)) 140462306a36Sopenharmony_ci ext4_journalled_zero_new_buffers(handle, inode, folio, 140562306a36Sopenharmony_ci from + copied, to); 140662306a36Sopenharmony_ci ret = ext4_walk_page_buffers(handle, inode, 140762306a36Sopenharmony_ci folio_buffers(folio), 140862306a36Sopenharmony_ci from, from + copied, &partial, 140962306a36Sopenharmony_ci write_end_fn); 141062306a36Sopenharmony_ci if (!partial) 141162306a36Sopenharmony_ci folio_mark_uptodate(folio); 141262306a36Sopenharmony_ci } 141362306a36Sopenharmony_ci if (!verity) 141462306a36Sopenharmony_ci size_changed = ext4_update_inode_size(inode, pos + copied); 141562306a36Sopenharmony_ci EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 141662306a36Sopenharmony_ci folio_unlock(folio); 141762306a36Sopenharmony_ci folio_put(folio); 141862306a36Sopenharmony_ci 141962306a36Sopenharmony_ci if (old_size < pos && !verity) 142062306a36Sopenharmony_ci pagecache_isize_extended(inode, old_size, pos); 142162306a36Sopenharmony_ci 142262306a36Sopenharmony_ci if (size_changed) { 142362306a36Sopenharmony_ci ret2 = ext4_mark_inode_dirty(handle, inode); 142462306a36Sopenharmony_ci if (!ret) 142562306a36Sopenharmony_ci ret = ret2; 142662306a36Sopenharmony_ci } 142762306a36Sopenharmony_ci 142862306a36Sopenharmony_ci if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) 142962306a36Sopenharmony_ci /* if we have allocated more blocks and copied 143062306a36Sopenharmony_ci * less. We will have blocks allocated outside 143162306a36Sopenharmony_ci * inode->i_size. So truncate them 143262306a36Sopenharmony_ci */ 143362306a36Sopenharmony_ci ext4_orphan_add(handle, inode); 143462306a36Sopenharmony_ci 143562306a36Sopenharmony_ci ret2 = ext4_journal_stop(handle); 143662306a36Sopenharmony_ci if (!ret) 143762306a36Sopenharmony_ci ret = ret2; 143862306a36Sopenharmony_ci if (pos + len > inode->i_size && !verity) { 143962306a36Sopenharmony_ci ext4_truncate_failed_write(inode); 144062306a36Sopenharmony_ci /* 144162306a36Sopenharmony_ci * If truncate failed early the inode might still be 144262306a36Sopenharmony_ci * on the orphan list; we need to make sure the inode 144362306a36Sopenharmony_ci * is removed from the orphan list in that case. 144462306a36Sopenharmony_ci */ 144562306a36Sopenharmony_ci if (inode->i_nlink) 144662306a36Sopenharmony_ci ext4_orphan_del(NULL, inode); 144762306a36Sopenharmony_ci } 144862306a36Sopenharmony_ci 144962306a36Sopenharmony_ci return ret ? ret : copied; 145062306a36Sopenharmony_ci} 145162306a36Sopenharmony_ci 145262306a36Sopenharmony_ci/* 145362306a36Sopenharmony_ci * Reserve space for a single cluster 145462306a36Sopenharmony_ci */ 145562306a36Sopenharmony_cistatic int ext4_da_reserve_space(struct inode *inode) 145662306a36Sopenharmony_ci{ 145762306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 145862306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 145962306a36Sopenharmony_ci int ret; 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci /* 146262306a36Sopenharmony_ci * We will charge metadata quota at writeout time; this saves 146362306a36Sopenharmony_ci * us from metadata over-estimation, though we may go over by 146462306a36Sopenharmony_ci * a small amount in the end. Here we just reserve for data. 146562306a36Sopenharmony_ci */ 146662306a36Sopenharmony_ci ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1)); 146762306a36Sopenharmony_ci if (ret) 146862306a36Sopenharmony_ci return ret; 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_ci spin_lock(&ei->i_block_reservation_lock); 147162306a36Sopenharmony_ci if (ext4_claim_free_clusters(sbi, 1, 0)) { 147262306a36Sopenharmony_ci spin_unlock(&ei->i_block_reservation_lock); 147362306a36Sopenharmony_ci dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 147462306a36Sopenharmony_ci return -ENOSPC; 147562306a36Sopenharmony_ci } 147662306a36Sopenharmony_ci ei->i_reserved_data_blocks++; 147762306a36Sopenharmony_ci trace_ext4_da_reserve_space(inode); 147862306a36Sopenharmony_ci spin_unlock(&ei->i_block_reservation_lock); 147962306a36Sopenharmony_ci 148062306a36Sopenharmony_ci return 0; /* success */ 148162306a36Sopenharmony_ci} 148262306a36Sopenharmony_ci 148362306a36Sopenharmony_civoid ext4_da_release_space(struct inode *inode, int to_free) 148462306a36Sopenharmony_ci{ 148562306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 148662306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 148762306a36Sopenharmony_ci 148862306a36Sopenharmony_ci if (!to_free) 148962306a36Sopenharmony_ci return; /* Nothing to release, exit */ 149062306a36Sopenharmony_ci 149162306a36Sopenharmony_ci spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci trace_ext4_da_release_space(inode, to_free); 149462306a36Sopenharmony_ci if (unlikely(to_free > ei->i_reserved_data_blocks)) { 149562306a36Sopenharmony_ci /* 149662306a36Sopenharmony_ci * if there aren't enough reserved blocks, then the 149762306a36Sopenharmony_ci * counter is messed up somewhere. Since this 149862306a36Sopenharmony_ci * function is called from invalidate page, it's 149962306a36Sopenharmony_ci * harmless to return without any action. 150062306a36Sopenharmony_ci */ 150162306a36Sopenharmony_ci ext4_warning(inode->i_sb, "ext4_da_release_space: " 150262306a36Sopenharmony_ci "ino %lu, to_free %d with only %d reserved " 150362306a36Sopenharmony_ci "data blocks", inode->i_ino, to_free, 150462306a36Sopenharmony_ci ei->i_reserved_data_blocks); 150562306a36Sopenharmony_ci WARN_ON(1); 150662306a36Sopenharmony_ci to_free = ei->i_reserved_data_blocks; 150762306a36Sopenharmony_ci } 150862306a36Sopenharmony_ci ei->i_reserved_data_blocks -= to_free; 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_ci /* update fs dirty data blocks counter */ 151162306a36Sopenharmony_ci percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free); 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 151462306a36Sopenharmony_ci 151562306a36Sopenharmony_ci dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free)); 151662306a36Sopenharmony_ci} 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci/* 151962306a36Sopenharmony_ci * Delayed allocation stuff 152062306a36Sopenharmony_ci */ 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_cistruct mpage_da_data { 152362306a36Sopenharmony_ci /* These are input fields for ext4_do_writepages() */ 152462306a36Sopenharmony_ci struct inode *inode; 152562306a36Sopenharmony_ci struct writeback_control *wbc; 152662306a36Sopenharmony_ci unsigned int can_map:1; /* Can writepages call map blocks? */ 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci /* These are internal state of ext4_do_writepages() */ 152962306a36Sopenharmony_ci pgoff_t first_page; /* The first page to write */ 153062306a36Sopenharmony_ci pgoff_t next_page; /* Current page to examine */ 153162306a36Sopenharmony_ci pgoff_t last_page; /* Last page to examine */ 153262306a36Sopenharmony_ci /* 153362306a36Sopenharmony_ci * Extent to map - this can be after first_page because that can be 153462306a36Sopenharmony_ci * fully mapped. We somewhat abuse m_flags to store whether the extent 153562306a36Sopenharmony_ci * is delalloc or unwritten. 153662306a36Sopenharmony_ci */ 153762306a36Sopenharmony_ci struct ext4_map_blocks map; 153862306a36Sopenharmony_ci struct ext4_io_submit io_submit; /* IO submission data */ 153962306a36Sopenharmony_ci unsigned int do_map:1; 154062306a36Sopenharmony_ci unsigned int scanned_until_end:1; 154162306a36Sopenharmony_ci unsigned int journalled_more_data:1; 154262306a36Sopenharmony_ci}; 154362306a36Sopenharmony_ci 154462306a36Sopenharmony_cistatic void mpage_release_unused_pages(struct mpage_da_data *mpd, 154562306a36Sopenharmony_ci bool invalidate) 154662306a36Sopenharmony_ci{ 154762306a36Sopenharmony_ci unsigned nr, i; 154862306a36Sopenharmony_ci pgoff_t index, end; 154962306a36Sopenharmony_ci struct folio_batch fbatch; 155062306a36Sopenharmony_ci struct inode *inode = mpd->inode; 155162306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci /* This is necessary when next_page == 0. */ 155462306a36Sopenharmony_ci if (mpd->first_page >= mpd->next_page) 155562306a36Sopenharmony_ci return; 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci mpd->scanned_until_end = 0; 155862306a36Sopenharmony_ci index = mpd->first_page; 155962306a36Sopenharmony_ci end = mpd->next_page - 1; 156062306a36Sopenharmony_ci if (invalidate) { 156162306a36Sopenharmony_ci ext4_lblk_t start, last; 156262306a36Sopenharmony_ci start = index << (PAGE_SHIFT - inode->i_blkbits); 156362306a36Sopenharmony_ci last = end << (PAGE_SHIFT - inode->i_blkbits); 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci /* 156662306a36Sopenharmony_ci * avoid racing with extent status tree scans made by 156762306a36Sopenharmony_ci * ext4_insert_delayed_block() 156862306a36Sopenharmony_ci */ 156962306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 157062306a36Sopenharmony_ci ext4_es_remove_extent(inode, start, last - start + 1); 157162306a36Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 157262306a36Sopenharmony_ci } 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci folio_batch_init(&fbatch); 157562306a36Sopenharmony_ci while (index <= end) { 157662306a36Sopenharmony_ci nr = filemap_get_folios(mapping, &index, end, &fbatch); 157762306a36Sopenharmony_ci if (nr == 0) 157862306a36Sopenharmony_ci break; 157962306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 158062306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci if (folio->index < mpd->first_page) 158362306a36Sopenharmony_ci continue; 158462306a36Sopenharmony_ci if (folio_next_index(folio) - 1 > end) 158562306a36Sopenharmony_ci continue; 158662306a36Sopenharmony_ci BUG_ON(!folio_test_locked(folio)); 158762306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 158862306a36Sopenharmony_ci if (invalidate) { 158962306a36Sopenharmony_ci if (folio_mapped(folio)) 159062306a36Sopenharmony_ci folio_clear_dirty_for_io(folio); 159162306a36Sopenharmony_ci block_invalidate_folio(folio, 0, 159262306a36Sopenharmony_ci folio_size(folio)); 159362306a36Sopenharmony_ci folio_clear_uptodate(folio); 159462306a36Sopenharmony_ci } 159562306a36Sopenharmony_ci folio_unlock(folio); 159662306a36Sopenharmony_ci } 159762306a36Sopenharmony_ci folio_batch_release(&fbatch); 159862306a36Sopenharmony_ci } 159962306a36Sopenharmony_ci} 160062306a36Sopenharmony_ci 160162306a36Sopenharmony_cistatic void ext4_print_free_blocks(struct inode *inode) 160262306a36Sopenharmony_ci{ 160362306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 160462306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 160562306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld", 160862306a36Sopenharmony_ci EXT4_C2B(EXT4_SB(inode->i_sb), 160962306a36Sopenharmony_ci ext4_count_free_clusters(sb))); 161062306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "Free/Dirty block details"); 161162306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "free_blocks=%lld", 161262306a36Sopenharmony_ci (long long) EXT4_C2B(EXT4_SB(sb), 161362306a36Sopenharmony_ci percpu_counter_sum(&sbi->s_freeclusters_counter))); 161462306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld", 161562306a36Sopenharmony_ci (long long) EXT4_C2B(EXT4_SB(sb), 161662306a36Sopenharmony_ci percpu_counter_sum(&sbi->s_dirtyclusters_counter))); 161762306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "Block reservation details"); 161862306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u", 161962306a36Sopenharmony_ci ei->i_reserved_data_blocks); 162062306a36Sopenharmony_ci return; 162162306a36Sopenharmony_ci} 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci/* 162462306a36Sopenharmony_ci * ext4_insert_delayed_block - adds a delayed block to the extents status 162562306a36Sopenharmony_ci * tree, incrementing the reserved cluster/block 162662306a36Sopenharmony_ci * count or making a pending reservation 162762306a36Sopenharmony_ci * where needed 162862306a36Sopenharmony_ci * 162962306a36Sopenharmony_ci * @inode - file containing the newly added block 163062306a36Sopenharmony_ci * @lblk - logical block to be added 163162306a36Sopenharmony_ci * 163262306a36Sopenharmony_ci * Returns 0 on success, negative error code on failure. 163362306a36Sopenharmony_ci */ 163462306a36Sopenharmony_cistatic int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) 163562306a36Sopenharmony_ci{ 163662306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 163762306a36Sopenharmony_ci int ret; 163862306a36Sopenharmony_ci bool allocated = false; 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci /* 164162306a36Sopenharmony_ci * If the cluster containing lblk is shared with a delayed, 164262306a36Sopenharmony_ci * written, or unwritten extent in a bigalloc file system, it's 164362306a36Sopenharmony_ci * already been accounted for and does not need to be reserved. 164462306a36Sopenharmony_ci * A pending reservation must be made for the cluster if it's 164562306a36Sopenharmony_ci * shared with a written or unwritten extent and doesn't already 164662306a36Sopenharmony_ci * have one. Written and unwritten extents can be purged from the 164762306a36Sopenharmony_ci * extents status tree if the system is under memory pressure, so 164862306a36Sopenharmony_ci * it's necessary to examine the extent tree if a search of the 164962306a36Sopenharmony_ci * extents status tree doesn't get a match. 165062306a36Sopenharmony_ci */ 165162306a36Sopenharmony_ci if (sbi->s_cluster_ratio == 1) { 165262306a36Sopenharmony_ci ret = ext4_da_reserve_space(inode); 165362306a36Sopenharmony_ci if (ret != 0) /* ENOSPC */ 165462306a36Sopenharmony_ci return ret; 165562306a36Sopenharmony_ci } else { /* bigalloc */ 165662306a36Sopenharmony_ci if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) { 165762306a36Sopenharmony_ci if (!ext4_es_scan_clu(inode, 165862306a36Sopenharmony_ci &ext4_es_is_mapped, lblk)) { 165962306a36Sopenharmony_ci ret = ext4_clu_mapped(inode, 166062306a36Sopenharmony_ci EXT4_B2C(sbi, lblk)); 166162306a36Sopenharmony_ci if (ret < 0) 166262306a36Sopenharmony_ci return ret; 166362306a36Sopenharmony_ci if (ret == 0) { 166462306a36Sopenharmony_ci ret = ext4_da_reserve_space(inode); 166562306a36Sopenharmony_ci if (ret != 0) /* ENOSPC */ 166662306a36Sopenharmony_ci return ret; 166762306a36Sopenharmony_ci } else { 166862306a36Sopenharmony_ci allocated = true; 166962306a36Sopenharmony_ci } 167062306a36Sopenharmony_ci } else { 167162306a36Sopenharmony_ci allocated = true; 167262306a36Sopenharmony_ci } 167362306a36Sopenharmony_ci } 167462306a36Sopenharmony_ci } 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci ext4_es_insert_delayed_block(inode, lblk, allocated); 167762306a36Sopenharmony_ci return 0; 167862306a36Sopenharmony_ci} 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci/* 168162306a36Sopenharmony_ci * This function is grabs code from the very beginning of 168262306a36Sopenharmony_ci * ext4_map_blocks, but assumes that the caller is from delayed write 168362306a36Sopenharmony_ci * time. This function looks up the requested blocks and sets the 168462306a36Sopenharmony_ci * buffer delay bit under the protection of i_data_sem. 168562306a36Sopenharmony_ci */ 168662306a36Sopenharmony_cistatic int ext4_da_map_blocks(struct inode *inode, sector_t iblock, 168762306a36Sopenharmony_ci struct ext4_map_blocks *map, 168862306a36Sopenharmony_ci struct buffer_head *bh) 168962306a36Sopenharmony_ci{ 169062306a36Sopenharmony_ci struct extent_status es; 169162306a36Sopenharmony_ci int retval; 169262306a36Sopenharmony_ci sector_t invalid_block = ~((sector_t) 0xffff); 169362306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 169462306a36Sopenharmony_ci struct ext4_map_blocks orig_map; 169562306a36Sopenharmony_ci 169662306a36Sopenharmony_ci memcpy(&orig_map, map, sizeof(*map)); 169762306a36Sopenharmony_ci#endif 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) 170062306a36Sopenharmony_ci invalid_block = ~0; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci map->m_flags = 0; 170362306a36Sopenharmony_ci ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len, 170462306a36Sopenharmony_ci (unsigned long) map->m_lblk); 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci /* Lookup extent status tree firstly */ 170762306a36Sopenharmony_ci if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { 170862306a36Sopenharmony_ci if (ext4_es_is_hole(&es)) { 170962306a36Sopenharmony_ci retval = 0; 171062306a36Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 171162306a36Sopenharmony_ci goto add_delayed; 171262306a36Sopenharmony_ci } 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci /* 171562306a36Sopenharmony_ci * Delayed extent could be allocated by fallocate. 171662306a36Sopenharmony_ci * So we need to check it. 171762306a36Sopenharmony_ci */ 171862306a36Sopenharmony_ci if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { 171962306a36Sopenharmony_ci map_bh(bh, inode->i_sb, invalid_block); 172062306a36Sopenharmony_ci set_buffer_new(bh); 172162306a36Sopenharmony_ci set_buffer_delay(bh); 172262306a36Sopenharmony_ci return 0; 172362306a36Sopenharmony_ci } 172462306a36Sopenharmony_ci 172562306a36Sopenharmony_ci map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk; 172662306a36Sopenharmony_ci retval = es.es_len - (iblock - es.es_lblk); 172762306a36Sopenharmony_ci if (retval > map->m_len) 172862306a36Sopenharmony_ci retval = map->m_len; 172962306a36Sopenharmony_ci map->m_len = retval; 173062306a36Sopenharmony_ci if (ext4_es_is_written(&es)) 173162306a36Sopenharmony_ci map->m_flags |= EXT4_MAP_MAPPED; 173262306a36Sopenharmony_ci else if (ext4_es_is_unwritten(&es)) 173362306a36Sopenharmony_ci map->m_flags |= EXT4_MAP_UNWRITTEN; 173462306a36Sopenharmony_ci else 173562306a36Sopenharmony_ci BUG(); 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST 173862306a36Sopenharmony_ci ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0); 173962306a36Sopenharmony_ci#endif 174062306a36Sopenharmony_ci return retval; 174162306a36Sopenharmony_ci } 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci /* 174462306a36Sopenharmony_ci * Try to see if we can get the block without requesting a new 174562306a36Sopenharmony_ci * file system block. 174662306a36Sopenharmony_ci */ 174762306a36Sopenharmony_ci down_read(&EXT4_I(inode)->i_data_sem); 174862306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 174962306a36Sopenharmony_ci retval = 0; 175062306a36Sopenharmony_ci else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 175162306a36Sopenharmony_ci retval = ext4_ext_map_blocks(NULL, inode, map, 0); 175262306a36Sopenharmony_ci else 175362306a36Sopenharmony_ci retval = ext4_ind_map_blocks(NULL, inode, map, 0); 175462306a36Sopenharmony_ci 175562306a36Sopenharmony_ciadd_delayed: 175662306a36Sopenharmony_ci if (retval == 0) { 175762306a36Sopenharmony_ci int ret; 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci /* 176062306a36Sopenharmony_ci * XXX: __block_prepare_write() unmaps passed block, 176162306a36Sopenharmony_ci * is it OK? 176262306a36Sopenharmony_ci */ 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci ret = ext4_insert_delayed_block(inode, map->m_lblk); 176562306a36Sopenharmony_ci if (ret != 0) { 176662306a36Sopenharmony_ci retval = ret; 176762306a36Sopenharmony_ci goto out_unlock; 176862306a36Sopenharmony_ci } 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci map_bh(bh, inode->i_sb, invalid_block); 177162306a36Sopenharmony_ci set_buffer_new(bh); 177262306a36Sopenharmony_ci set_buffer_delay(bh); 177362306a36Sopenharmony_ci } else if (retval > 0) { 177462306a36Sopenharmony_ci unsigned int status; 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci if (unlikely(retval != map->m_len)) { 177762306a36Sopenharmony_ci ext4_warning(inode->i_sb, 177862306a36Sopenharmony_ci "ES len assertion failed for inode " 177962306a36Sopenharmony_ci "%lu: retval %d != map->m_len %d", 178062306a36Sopenharmony_ci inode->i_ino, retval, map->m_len); 178162306a36Sopenharmony_ci WARN_ON(1); 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci status = map->m_flags & EXT4_MAP_UNWRITTEN ? 178562306a36Sopenharmony_ci EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 178662306a36Sopenharmony_ci ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 178762306a36Sopenharmony_ci map->m_pblk, status); 178862306a36Sopenharmony_ci } 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_ciout_unlock: 179162306a36Sopenharmony_ci up_read((&EXT4_I(inode)->i_data_sem)); 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci return retval; 179462306a36Sopenharmony_ci} 179562306a36Sopenharmony_ci 179662306a36Sopenharmony_ci/* 179762306a36Sopenharmony_ci * This is a special get_block_t callback which is used by 179862306a36Sopenharmony_ci * ext4_da_write_begin(). It will either return mapped block or 179962306a36Sopenharmony_ci * reserve space for a single block. 180062306a36Sopenharmony_ci * 180162306a36Sopenharmony_ci * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set. 180262306a36Sopenharmony_ci * We also have b_blocknr = -1 and b_bdev initialized properly 180362306a36Sopenharmony_ci * 180462306a36Sopenharmony_ci * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set. 180562306a36Sopenharmony_ci * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev 180662306a36Sopenharmony_ci * initialized properly. 180762306a36Sopenharmony_ci */ 180862306a36Sopenharmony_ciint ext4_da_get_block_prep(struct inode *inode, sector_t iblock, 180962306a36Sopenharmony_ci struct buffer_head *bh, int create) 181062306a36Sopenharmony_ci{ 181162306a36Sopenharmony_ci struct ext4_map_blocks map; 181262306a36Sopenharmony_ci int ret = 0; 181362306a36Sopenharmony_ci 181462306a36Sopenharmony_ci BUG_ON(create == 0); 181562306a36Sopenharmony_ci BUG_ON(bh->b_size != inode->i_sb->s_blocksize); 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci map.m_lblk = iblock; 181862306a36Sopenharmony_ci map.m_len = 1; 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci /* 182162306a36Sopenharmony_ci * first, we need to know whether the block is allocated already 182262306a36Sopenharmony_ci * preallocated blocks are unmapped but should treated 182362306a36Sopenharmony_ci * the same as allocated blocks. 182462306a36Sopenharmony_ci */ 182562306a36Sopenharmony_ci ret = ext4_da_map_blocks(inode, iblock, &map, bh); 182662306a36Sopenharmony_ci if (ret <= 0) 182762306a36Sopenharmony_ci return ret; 182862306a36Sopenharmony_ci 182962306a36Sopenharmony_ci map_bh(bh, inode->i_sb, map.m_pblk); 183062306a36Sopenharmony_ci ext4_update_bh_state(bh, map.m_flags); 183162306a36Sopenharmony_ci 183262306a36Sopenharmony_ci if (buffer_unwritten(bh)) { 183362306a36Sopenharmony_ci /* A delayed write to unwritten bh should be marked 183462306a36Sopenharmony_ci * new and mapped. Mapped ensures that we don't do 183562306a36Sopenharmony_ci * get_block multiple times when we write to the same 183662306a36Sopenharmony_ci * offset and new ensures that we do proper zero out 183762306a36Sopenharmony_ci * for partial write. 183862306a36Sopenharmony_ci */ 183962306a36Sopenharmony_ci set_buffer_new(bh); 184062306a36Sopenharmony_ci set_buffer_mapped(bh); 184162306a36Sopenharmony_ci } 184262306a36Sopenharmony_ci return 0; 184362306a36Sopenharmony_ci} 184462306a36Sopenharmony_ci 184562306a36Sopenharmony_cistatic void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio) 184662306a36Sopenharmony_ci{ 184762306a36Sopenharmony_ci mpd->first_page += folio_nr_pages(folio); 184862306a36Sopenharmony_ci folio_unlock(folio); 184962306a36Sopenharmony_ci} 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_cistatic int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio) 185262306a36Sopenharmony_ci{ 185362306a36Sopenharmony_ci size_t len; 185462306a36Sopenharmony_ci loff_t size; 185562306a36Sopenharmony_ci int err; 185662306a36Sopenharmony_ci 185762306a36Sopenharmony_ci BUG_ON(folio->index != mpd->first_page); 185862306a36Sopenharmony_ci folio_clear_dirty_for_io(folio); 185962306a36Sopenharmony_ci /* 186062306a36Sopenharmony_ci * We have to be very careful here! Nothing protects writeback path 186162306a36Sopenharmony_ci * against i_size changes and the page can be writeably mapped into 186262306a36Sopenharmony_ci * page tables. So an application can be growing i_size and writing 186362306a36Sopenharmony_ci * data through mmap while writeback runs. folio_clear_dirty_for_io() 186462306a36Sopenharmony_ci * write-protects our page in page tables and the page cannot get 186562306a36Sopenharmony_ci * written to again until we release folio lock. So only after 186662306a36Sopenharmony_ci * folio_clear_dirty_for_io() we are safe to sample i_size for 186762306a36Sopenharmony_ci * ext4_bio_write_folio() to zero-out tail of the written page. We rely 186862306a36Sopenharmony_ci * on the barrier provided by folio_test_clear_dirty() in 186962306a36Sopenharmony_ci * folio_clear_dirty_for_io() to make sure i_size is really sampled only 187062306a36Sopenharmony_ci * after page tables are updated. 187162306a36Sopenharmony_ci */ 187262306a36Sopenharmony_ci size = i_size_read(mpd->inode); 187362306a36Sopenharmony_ci len = folio_size(folio); 187462306a36Sopenharmony_ci if (folio_pos(folio) + len > size && 187562306a36Sopenharmony_ci !ext4_verity_in_progress(mpd->inode)) 187662306a36Sopenharmony_ci len = size & ~PAGE_MASK; 187762306a36Sopenharmony_ci err = ext4_bio_write_folio(&mpd->io_submit, folio, len); 187862306a36Sopenharmony_ci if (!err) 187962306a36Sopenharmony_ci mpd->wbc->nr_to_write--; 188062306a36Sopenharmony_ci 188162306a36Sopenharmony_ci return err; 188262306a36Sopenharmony_ci} 188362306a36Sopenharmony_ci 188462306a36Sopenharmony_ci#define BH_FLAGS (BIT(BH_Unwritten) | BIT(BH_Delay)) 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci/* 188762306a36Sopenharmony_ci * mballoc gives us at most this number of blocks... 188862306a36Sopenharmony_ci * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). 188962306a36Sopenharmony_ci * The rest of mballoc seems to handle chunks up to full group size. 189062306a36Sopenharmony_ci */ 189162306a36Sopenharmony_ci#define MAX_WRITEPAGES_EXTENT_LEN 2048 189262306a36Sopenharmony_ci 189362306a36Sopenharmony_ci/* 189462306a36Sopenharmony_ci * mpage_add_bh_to_extent - try to add bh to extent of blocks to map 189562306a36Sopenharmony_ci * 189662306a36Sopenharmony_ci * @mpd - extent of blocks 189762306a36Sopenharmony_ci * @lblk - logical number of the block in the file 189862306a36Sopenharmony_ci * @bh - buffer head we want to add to the extent 189962306a36Sopenharmony_ci * 190062306a36Sopenharmony_ci * The function is used to collect contig. blocks in the same state. If the 190162306a36Sopenharmony_ci * buffer doesn't require mapping for writeback and we haven't started the 190262306a36Sopenharmony_ci * extent of buffers to map yet, the function returns 'true' immediately - the 190362306a36Sopenharmony_ci * caller can write the buffer right away. Otherwise the function returns true 190462306a36Sopenharmony_ci * if the block has been added to the extent, false if the block couldn't be 190562306a36Sopenharmony_ci * added. 190662306a36Sopenharmony_ci */ 190762306a36Sopenharmony_cistatic bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, 190862306a36Sopenharmony_ci struct buffer_head *bh) 190962306a36Sopenharmony_ci{ 191062306a36Sopenharmony_ci struct ext4_map_blocks *map = &mpd->map; 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_ci /* Buffer that doesn't need mapping for writeback? */ 191362306a36Sopenharmony_ci if (!buffer_dirty(bh) || !buffer_mapped(bh) || 191462306a36Sopenharmony_ci (!buffer_delay(bh) && !buffer_unwritten(bh))) { 191562306a36Sopenharmony_ci /* So far no extent to map => we write the buffer right away */ 191662306a36Sopenharmony_ci if (map->m_len == 0) 191762306a36Sopenharmony_ci return true; 191862306a36Sopenharmony_ci return false; 191962306a36Sopenharmony_ci } 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci /* First block in the extent? */ 192262306a36Sopenharmony_ci if (map->m_len == 0) { 192362306a36Sopenharmony_ci /* We cannot map unless handle is started... */ 192462306a36Sopenharmony_ci if (!mpd->do_map) 192562306a36Sopenharmony_ci return false; 192662306a36Sopenharmony_ci map->m_lblk = lblk; 192762306a36Sopenharmony_ci map->m_len = 1; 192862306a36Sopenharmony_ci map->m_flags = bh->b_state & BH_FLAGS; 192962306a36Sopenharmony_ci return true; 193062306a36Sopenharmony_ci } 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci /* Don't go larger than mballoc is willing to allocate */ 193362306a36Sopenharmony_ci if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) 193462306a36Sopenharmony_ci return false; 193562306a36Sopenharmony_ci 193662306a36Sopenharmony_ci /* Can we merge the block to our big extent? */ 193762306a36Sopenharmony_ci if (lblk == map->m_lblk + map->m_len && 193862306a36Sopenharmony_ci (bh->b_state & BH_FLAGS) == map->m_flags) { 193962306a36Sopenharmony_ci map->m_len++; 194062306a36Sopenharmony_ci return true; 194162306a36Sopenharmony_ci } 194262306a36Sopenharmony_ci return false; 194362306a36Sopenharmony_ci} 194462306a36Sopenharmony_ci 194562306a36Sopenharmony_ci/* 194662306a36Sopenharmony_ci * mpage_process_page_bufs - submit page buffers for IO or add them to extent 194762306a36Sopenharmony_ci * 194862306a36Sopenharmony_ci * @mpd - extent of blocks for mapping 194962306a36Sopenharmony_ci * @head - the first buffer in the page 195062306a36Sopenharmony_ci * @bh - buffer we should start processing from 195162306a36Sopenharmony_ci * @lblk - logical number of the block in the file corresponding to @bh 195262306a36Sopenharmony_ci * 195362306a36Sopenharmony_ci * Walk through page buffers from @bh upto @head (exclusive) and either submit 195462306a36Sopenharmony_ci * the page for IO if all buffers in this page were mapped and there's no 195562306a36Sopenharmony_ci * accumulated extent of buffers to map or add buffers in the page to the 195662306a36Sopenharmony_ci * extent of buffers to map. The function returns 1 if the caller can continue 195762306a36Sopenharmony_ci * by processing the next page, 0 if it should stop adding buffers to the 195862306a36Sopenharmony_ci * extent to map because we cannot extend it anymore. It can also return value 195962306a36Sopenharmony_ci * < 0 in case of error during IO submission. 196062306a36Sopenharmony_ci */ 196162306a36Sopenharmony_cistatic int mpage_process_page_bufs(struct mpage_da_data *mpd, 196262306a36Sopenharmony_ci struct buffer_head *head, 196362306a36Sopenharmony_ci struct buffer_head *bh, 196462306a36Sopenharmony_ci ext4_lblk_t lblk) 196562306a36Sopenharmony_ci{ 196662306a36Sopenharmony_ci struct inode *inode = mpd->inode; 196762306a36Sopenharmony_ci int err; 196862306a36Sopenharmony_ci ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1) 196962306a36Sopenharmony_ci >> inode->i_blkbits; 197062306a36Sopenharmony_ci 197162306a36Sopenharmony_ci if (ext4_verity_in_progress(inode)) 197262306a36Sopenharmony_ci blocks = EXT_MAX_BLOCKS; 197362306a36Sopenharmony_ci 197462306a36Sopenharmony_ci do { 197562306a36Sopenharmony_ci BUG_ON(buffer_locked(bh)); 197662306a36Sopenharmony_ci 197762306a36Sopenharmony_ci if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) { 197862306a36Sopenharmony_ci /* Found extent to map? */ 197962306a36Sopenharmony_ci if (mpd->map.m_len) 198062306a36Sopenharmony_ci return 0; 198162306a36Sopenharmony_ci /* Buffer needs mapping and handle is not started? */ 198262306a36Sopenharmony_ci if (!mpd->do_map) 198362306a36Sopenharmony_ci return 0; 198462306a36Sopenharmony_ci /* Everything mapped so far and we hit EOF */ 198562306a36Sopenharmony_ci break; 198662306a36Sopenharmony_ci } 198762306a36Sopenharmony_ci } while (lblk++, (bh = bh->b_this_page) != head); 198862306a36Sopenharmony_ci /* So far everything mapped? Submit the page for IO. */ 198962306a36Sopenharmony_ci if (mpd->map.m_len == 0) { 199062306a36Sopenharmony_ci err = mpage_submit_folio(mpd, head->b_folio); 199162306a36Sopenharmony_ci if (err < 0) 199262306a36Sopenharmony_ci return err; 199362306a36Sopenharmony_ci mpage_folio_done(mpd, head->b_folio); 199462306a36Sopenharmony_ci } 199562306a36Sopenharmony_ci if (lblk >= blocks) { 199662306a36Sopenharmony_ci mpd->scanned_until_end = 1; 199762306a36Sopenharmony_ci return 0; 199862306a36Sopenharmony_ci } 199962306a36Sopenharmony_ci return 1; 200062306a36Sopenharmony_ci} 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci/* 200362306a36Sopenharmony_ci * mpage_process_folio - update folio buffers corresponding to changed extent 200462306a36Sopenharmony_ci * and may submit fully mapped page for IO 200562306a36Sopenharmony_ci * @mpd: description of extent to map, on return next extent to map 200662306a36Sopenharmony_ci * @folio: Contains these buffers. 200762306a36Sopenharmony_ci * @m_lblk: logical block mapping. 200862306a36Sopenharmony_ci * @m_pblk: corresponding physical mapping. 200962306a36Sopenharmony_ci * @map_bh: determines on return whether this page requires any further 201062306a36Sopenharmony_ci * mapping or not. 201162306a36Sopenharmony_ci * 201262306a36Sopenharmony_ci * Scan given folio buffers corresponding to changed extent and update buffer 201362306a36Sopenharmony_ci * state according to new extent state. 201462306a36Sopenharmony_ci * We map delalloc buffers to their physical location, clear unwritten bits. 201562306a36Sopenharmony_ci * If the given folio is not fully mapped, we update @mpd to the next extent in 201662306a36Sopenharmony_ci * the given folio that needs mapping & return @map_bh as true. 201762306a36Sopenharmony_ci */ 201862306a36Sopenharmony_cistatic int mpage_process_folio(struct mpage_da_data *mpd, struct folio *folio, 201962306a36Sopenharmony_ci ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk, 202062306a36Sopenharmony_ci bool *map_bh) 202162306a36Sopenharmony_ci{ 202262306a36Sopenharmony_ci struct buffer_head *head, *bh; 202362306a36Sopenharmony_ci ext4_io_end_t *io_end = mpd->io_submit.io_end; 202462306a36Sopenharmony_ci ext4_lblk_t lblk = *m_lblk; 202562306a36Sopenharmony_ci ext4_fsblk_t pblock = *m_pblk; 202662306a36Sopenharmony_ci int err = 0; 202762306a36Sopenharmony_ci int blkbits = mpd->inode->i_blkbits; 202862306a36Sopenharmony_ci ssize_t io_end_size = 0; 202962306a36Sopenharmony_ci struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end); 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ci bh = head = folio_buffers(folio); 203262306a36Sopenharmony_ci do { 203362306a36Sopenharmony_ci if (lblk < mpd->map.m_lblk) 203462306a36Sopenharmony_ci continue; 203562306a36Sopenharmony_ci if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { 203662306a36Sopenharmony_ci /* 203762306a36Sopenharmony_ci * Buffer after end of mapped extent. 203862306a36Sopenharmony_ci * Find next buffer in the folio to map. 203962306a36Sopenharmony_ci */ 204062306a36Sopenharmony_ci mpd->map.m_len = 0; 204162306a36Sopenharmony_ci mpd->map.m_flags = 0; 204262306a36Sopenharmony_ci io_end_vec->size += io_end_size; 204362306a36Sopenharmony_ci 204462306a36Sopenharmony_ci err = mpage_process_page_bufs(mpd, head, bh, lblk); 204562306a36Sopenharmony_ci if (err > 0) 204662306a36Sopenharmony_ci err = 0; 204762306a36Sopenharmony_ci if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) { 204862306a36Sopenharmony_ci io_end_vec = ext4_alloc_io_end_vec(io_end); 204962306a36Sopenharmony_ci if (IS_ERR(io_end_vec)) { 205062306a36Sopenharmony_ci err = PTR_ERR(io_end_vec); 205162306a36Sopenharmony_ci goto out; 205262306a36Sopenharmony_ci } 205362306a36Sopenharmony_ci io_end_vec->offset = (loff_t)mpd->map.m_lblk << blkbits; 205462306a36Sopenharmony_ci } 205562306a36Sopenharmony_ci *map_bh = true; 205662306a36Sopenharmony_ci goto out; 205762306a36Sopenharmony_ci } 205862306a36Sopenharmony_ci if (buffer_delay(bh)) { 205962306a36Sopenharmony_ci clear_buffer_delay(bh); 206062306a36Sopenharmony_ci bh->b_blocknr = pblock++; 206162306a36Sopenharmony_ci } 206262306a36Sopenharmony_ci clear_buffer_unwritten(bh); 206362306a36Sopenharmony_ci io_end_size += (1 << blkbits); 206462306a36Sopenharmony_ci } while (lblk++, (bh = bh->b_this_page) != head); 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_ci io_end_vec->size += io_end_size; 206762306a36Sopenharmony_ci *map_bh = false; 206862306a36Sopenharmony_ciout: 206962306a36Sopenharmony_ci *m_lblk = lblk; 207062306a36Sopenharmony_ci *m_pblk = pblock; 207162306a36Sopenharmony_ci return err; 207262306a36Sopenharmony_ci} 207362306a36Sopenharmony_ci 207462306a36Sopenharmony_ci/* 207562306a36Sopenharmony_ci * mpage_map_buffers - update buffers corresponding to changed extent and 207662306a36Sopenharmony_ci * submit fully mapped pages for IO 207762306a36Sopenharmony_ci * 207862306a36Sopenharmony_ci * @mpd - description of extent to map, on return next extent to map 207962306a36Sopenharmony_ci * 208062306a36Sopenharmony_ci * Scan buffers corresponding to changed extent (we expect corresponding pages 208162306a36Sopenharmony_ci * to be already locked) and update buffer state according to new extent state. 208262306a36Sopenharmony_ci * We map delalloc buffers to their physical location, clear unwritten bits, 208362306a36Sopenharmony_ci * and mark buffers as uninit when we perform writes to unwritten extents 208462306a36Sopenharmony_ci * and do extent conversion after IO is finished. If the last page is not fully 208562306a36Sopenharmony_ci * mapped, we update @map to the next extent in the last page that needs 208662306a36Sopenharmony_ci * mapping. Otherwise we submit the page for IO. 208762306a36Sopenharmony_ci */ 208862306a36Sopenharmony_cistatic int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) 208962306a36Sopenharmony_ci{ 209062306a36Sopenharmony_ci struct folio_batch fbatch; 209162306a36Sopenharmony_ci unsigned nr, i; 209262306a36Sopenharmony_ci struct inode *inode = mpd->inode; 209362306a36Sopenharmony_ci int bpp_bits = PAGE_SHIFT - inode->i_blkbits; 209462306a36Sopenharmony_ci pgoff_t start, end; 209562306a36Sopenharmony_ci ext4_lblk_t lblk; 209662306a36Sopenharmony_ci ext4_fsblk_t pblock; 209762306a36Sopenharmony_ci int err; 209862306a36Sopenharmony_ci bool map_bh = false; 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci start = mpd->map.m_lblk >> bpp_bits; 210162306a36Sopenharmony_ci end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; 210262306a36Sopenharmony_ci lblk = start << bpp_bits; 210362306a36Sopenharmony_ci pblock = mpd->map.m_pblk; 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci folio_batch_init(&fbatch); 210662306a36Sopenharmony_ci while (start <= end) { 210762306a36Sopenharmony_ci nr = filemap_get_folios(inode->i_mapping, &start, end, &fbatch); 210862306a36Sopenharmony_ci if (nr == 0) 210962306a36Sopenharmony_ci break; 211062306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 211162306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_ci err = mpage_process_folio(mpd, folio, &lblk, &pblock, 211462306a36Sopenharmony_ci &map_bh); 211562306a36Sopenharmony_ci /* 211662306a36Sopenharmony_ci * If map_bh is true, means page may require further bh 211762306a36Sopenharmony_ci * mapping, or maybe the page was submitted for IO. 211862306a36Sopenharmony_ci * So we return to call further extent mapping. 211962306a36Sopenharmony_ci */ 212062306a36Sopenharmony_ci if (err < 0 || map_bh) 212162306a36Sopenharmony_ci goto out; 212262306a36Sopenharmony_ci /* Page fully mapped - let IO run! */ 212362306a36Sopenharmony_ci err = mpage_submit_folio(mpd, folio); 212462306a36Sopenharmony_ci if (err < 0) 212562306a36Sopenharmony_ci goto out; 212662306a36Sopenharmony_ci mpage_folio_done(mpd, folio); 212762306a36Sopenharmony_ci } 212862306a36Sopenharmony_ci folio_batch_release(&fbatch); 212962306a36Sopenharmony_ci } 213062306a36Sopenharmony_ci /* Extent fully mapped and matches with page boundary. We are done. */ 213162306a36Sopenharmony_ci mpd->map.m_len = 0; 213262306a36Sopenharmony_ci mpd->map.m_flags = 0; 213362306a36Sopenharmony_ci return 0; 213462306a36Sopenharmony_ciout: 213562306a36Sopenharmony_ci folio_batch_release(&fbatch); 213662306a36Sopenharmony_ci return err; 213762306a36Sopenharmony_ci} 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_cistatic int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) 214062306a36Sopenharmony_ci{ 214162306a36Sopenharmony_ci struct inode *inode = mpd->inode; 214262306a36Sopenharmony_ci struct ext4_map_blocks *map = &mpd->map; 214362306a36Sopenharmony_ci int get_blocks_flags; 214462306a36Sopenharmony_ci int err, dioread_nolock; 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci trace_ext4_da_write_pages_extent(inode, map); 214762306a36Sopenharmony_ci /* 214862306a36Sopenharmony_ci * Call ext4_map_blocks() to allocate any delayed allocation blocks, or 214962306a36Sopenharmony_ci * to convert an unwritten extent to be initialized (in the case 215062306a36Sopenharmony_ci * where we have written into one or more preallocated blocks). It is 215162306a36Sopenharmony_ci * possible that we're going to need more metadata blocks than 215262306a36Sopenharmony_ci * previously reserved. However we must not fail because we're in 215362306a36Sopenharmony_ci * writeback and there is nothing we can do about it so it might result 215462306a36Sopenharmony_ci * in data loss. So use reserved blocks to allocate metadata if 215562306a36Sopenharmony_ci * possible. 215662306a36Sopenharmony_ci * 215762306a36Sopenharmony_ci * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if 215862306a36Sopenharmony_ci * the blocks in question are delalloc blocks. This indicates 215962306a36Sopenharmony_ci * that the blocks and quotas has already been checked when 216062306a36Sopenharmony_ci * the data was copied into the page cache. 216162306a36Sopenharmony_ci */ 216262306a36Sopenharmony_ci get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 216362306a36Sopenharmony_ci EXT4_GET_BLOCKS_METADATA_NOFAIL | 216462306a36Sopenharmony_ci EXT4_GET_BLOCKS_IO_SUBMIT; 216562306a36Sopenharmony_ci dioread_nolock = ext4_should_dioread_nolock(inode); 216662306a36Sopenharmony_ci if (dioread_nolock) 216762306a36Sopenharmony_ci get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; 216862306a36Sopenharmony_ci if (map->m_flags & BIT(BH_Delay)) 216962306a36Sopenharmony_ci get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci err = ext4_map_blocks(handle, inode, map, get_blocks_flags); 217262306a36Sopenharmony_ci if (err < 0) 217362306a36Sopenharmony_ci return err; 217462306a36Sopenharmony_ci if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) { 217562306a36Sopenharmony_ci if (!mpd->io_submit.io_end->handle && 217662306a36Sopenharmony_ci ext4_handle_valid(handle)) { 217762306a36Sopenharmony_ci mpd->io_submit.io_end->handle = handle->h_rsv_handle; 217862306a36Sopenharmony_ci handle->h_rsv_handle = NULL; 217962306a36Sopenharmony_ci } 218062306a36Sopenharmony_ci ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); 218162306a36Sopenharmony_ci } 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_ci BUG_ON(map->m_len == 0); 218462306a36Sopenharmony_ci return 0; 218562306a36Sopenharmony_ci} 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_ci/* 218862306a36Sopenharmony_ci * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length 218962306a36Sopenharmony_ci * mpd->len and submit pages underlying it for IO 219062306a36Sopenharmony_ci * 219162306a36Sopenharmony_ci * @handle - handle for journal operations 219262306a36Sopenharmony_ci * @mpd - extent to map 219362306a36Sopenharmony_ci * @give_up_on_write - we set this to true iff there is a fatal error and there 219462306a36Sopenharmony_ci * is no hope of writing the data. The caller should discard 219562306a36Sopenharmony_ci * dirty pages to avoid infinite loops. 219662306a36Sopenharmony_ci * 219762306a36Sopenharmony_ci * The function maps extent starting at mpd->lblk of length mpd->len. If it is 219862306a36Sopenharmony_ci * delayed, blocks are allocated, if it is unwritten, we may need to convert 219962306a36Sopenharmony_ci * them to initialized or split the described range from larger unwritten 220062306a36Sopenharmony_ci * extent. Note that we need not map all the described range since allocation 220162306a36Sopenharmony_ci * can return less blocks or the range is covered by more unwritten extents. We 220262306a36Sopenharmony_ci * cannot map more because we are limited by reserved transaction credits. On 220362306a36Sopenharmony_ci * the other hand we always make sure that the last touched page is fully 220462306a36Sopenharmony_ci * mapped so that it can be written out (and thus forward progress is 220562306a36Sopenharmony_ci * guaranteed). After mapping we submit all mapped pages for IO. 220662306a36Sopenharmony_ci */ 220762306a36Sopenharmony_cistatic int mpage_map_and_submit_extent(handle_t *handle, 220862306a36Sopenharmony_ci struct mpage_da_data *mpd, 220962306a36Sopenharmony_ci bool *give_up_on_write) 221062306a36Sopenharmony_ci{ 221162306a36Sopenharmony_ci struct inode *inode = mpd->inode; 221262306a36Sopenharmony_ci struct ext4_map_blocks *map = &mpd->map; 221362306a36Sopenharmony_ci int err; 221462306a36Sopenharmony_ci loff_t disksize; 221562306a36Sopenharmony_ci int progress = 0; 221662306a36Sopenharmony_ci ext4_io_end_t *io_end = mpd->io_submit.io_end; 221762306a36Sopenharmony_ci struct ext4_io_end_vec *io_end_vec; 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_ci io_end_vec = ext4_alloc_io_end_vec(io_end); 222062306a36Sopenharmony_ci if (IS_ERR(io_end_vec)) 222162306a36Sopenharmony_ci return PTR_ERR(io_end_vec); 222262306a36Sopenharmony_ci io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; 222362306a36Sopenharmony_ci do { 222462306a36Sopenharmony_ci err = mpage_map_one_extent(handle, mpd); 222562306a36Sopenharmony_ci if (err < 0) { 222662306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 222762306a36Sopenharmony_ci 222862306a36Sopenharmony_ci if (ext4_forced_shutdown(sb)) 222962306a36Sopenharmony_ci goto invalidate_dirty_pages; 223062306a36Sopenharmony_ci /* 223162306a36Sopenharmony_ci * Let the uper layers retry transient errors. 223262306a36Sopenharmony_ci * In the case of ENOSPC, if ext4_count_free_blocks() 223362306a36Sopenharmony_ci * is non-zero, a commit should free up blocks. 223462306a36Sopenharmony_ci */ 223562306a36Sopenharmony_ci if ((err == -ENOMEM) || 223662306a36Sopenharmony_ci (err == -ENOSPC && ext4_count_free_clusters(sb))) { 223762306a36Sopenharmony_ci if (progress) 223862306a36Sopenharmony_ci goto update_disksize; 223962306a36Sopenharmony_ci return err; 224062306a36Sopenharmony_ci } 224162306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, 224262306a36Sopenharmony_ci "Delayed block allocation failed for " 224362306a36Sopenharmony_ci "inode %lu at logical offset %llu with" 224462306a36Sopenharmony_ci " max blocks %u with error %d", 224562306a36Sopenharmony_ci inode->i_ino, 224662306a36Sopenharmony_ci (unsigned long long)map->m_lblk, 224762306a36Sopenharmony_ci (unsigned)map->m_len, -err); 224862306a36Sopenharmony_ci ext4_msg(sb, KERN_CRIT, 224962306a36Sopenharmony_ci "This should not happen!! Data will " 225062306a36Sopenharmony_ci "be lost\n"); 225162306a36Sopenharmony_ci if (err == -ENOSPC) 225262306a36Sopenharmony_ci ext4_print_free_blocks(inode); 225362306a36Sopenharmony_ci invalidate_dirty_pages: 225462306a36Sopenharmony_ci *give_up_on_write = true; 225562306a36Sopenharmony_ci return err; 225662306a36Sopenharmony_ci } 225762306a36Sopenharmony_ci progress = 1; 225862306a36Sopenharmony_ci /* 225962306a36Sopenharmony_ci * Update buffer state, submit mapped pages, and get us new 226062306a36Sopenharmony_ci * extent to map 226162306a36Sopenharmony_ci */ 226262306a36Sopenharmony_ci err = mpage_map_and_submit_buffers(mpd); 226362306a36Sopenharmony_ci if (err < 0) 226462306a36Sopenharmony_ci goto update_disksize; 226562306a36Sopenharmony_ci } while (map->m_len); 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ciupdate_disksize: 226862306a36Sopenharmony_ci /* 226962306a36Sopenharmony_ci * Update on-disk size after IO is submitted. Races with 227062306a36Sopenharmony_ci * truncate are avoided by checking i_size under i_data_sem. 227162306a36Sopenharmony_ci */ 227262306a36Sopenharmony_ci disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; 227362306a36Sopenharmony_ci if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { 227462306a36Sopenharmony_ci int err2; 227562306a36Sopenharmony_ci loff_t i_size; 227662306a36Sopenharmony_ci 227762306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 227862306a36Sopenharmony_ci i_size = i_size_read(inode); 227962306a36Sopenharmony_ci if (disksize > i_size) 228062306a36Sopenharmony_ci disksize = i_size; 228162306a36Sopenharmony_ci if (disksize > EXT4_I(inode)->i_disksize) 228262306a36Sopenharmony_ci EXT4_I(inode)->i_disksize = disksize; 228362306a36Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 228462306a36Sopenharmony_ci err2 = ext4_mark_inode_dirty(handle, inode); 228562306a36Sopenharmony_ci if (err2) { 228662306a36Sopenharmony_ci ext4_error_err(inode->i_sb, -err2, 228762306a36Sopenharmony_ci "Failed to mark inode %lu dirty", 228862306a36Sopenharmony_ci inode->i_ino); 228962306a36Sopenharmony_ci } 229062306a36Sopenharmony_ci if (!err) 229162306a36Sopenharmony_ci err = err2; 229262306a36Sopenharmony_ci } 229362306a36Sopenharmony_ci return err; 229462306a36Sopenharmony_ci} 229562306a36Sopenharmony_ci 229662306a36Sopenharmony_ci/* 229762306a36Sopenharmony_ci * Calculate the total number of credits to reserve for one writepages 229862306a36Sopenharmony_ci * iteration. This is called from ext4_writepages(). We map an extent of 229962306a36Sopenharmony_ci * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping 230062306a36Sopenharmony_ci * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + 230162306a36Sopenharmony_ci * bpp - 1 blocks in bpp different extents. 230262306a36Sopenharmony_ci */ 230362306a36Sopenharmony_cistatic int ext4_da_writepages_trans_blocks(struct inode *inode) 230462306a36Sopenharmony_ci{ 230562306a36Sopenharmony_ci int bpp = ext4_journal_blocks_per_page(inode); 230662306a36Sopenharmony_ci 230762306a36Sopenharmony_ci return ext4_meta_trans_blocks(inode, 230862306a36Sopenharmony_ci MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); 230962306a36Sopenharmony_ci} 231062306a36Sopenharmony_ci 231162306a36Sopenharmony_cistatic int ext4_journal_folio_buffers(handle_t *handle, struct folio *folio, 231262306a36Sopenharmony_ci size_t len) 231362306a36Sopenharmony_ci{ 231462306a36Sopenharmony_ci struct buffer_head *page_bufs = folio_buffers(folio); 231562306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 231662306a36Sopenharmony_ci int ret, err; 231762306a36Sopenharmony_ci 231862306a36Sopenharmony_ci ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, 231962306a36Sopenharmony_ci NULL, do_journal_get_write_access); 232062306a36Sopenharmony_ci err = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len, 232162306a36Sopenharmony_ci NULL, write_end_fn); 232262306a36Sopenharmony_ci if (ret == 0) 232362306a36Sopenharmony_ci ret = err; 232462306a36Sopenharmony_ci err = ext4_jbd2_inode_add_write(handle, inode, folio_pos(folio), len); 232562306a36Sopenharmony_ci if (ret == 0) 232662306a36Sopenharmony_ci ret = err; 232762306a36Sopenharmony_ci EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 232862306a36Sopenharmony_ci 232962306a36Sopenharmony_ci return ret; 233062306a36Sopenharmony_ci} 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_cistatic int mpage_journal_page_buffers(handle_t *handle, 233362306a36Sopenharmony_ci struct mpage_da_data *mpd, 233462306a36Sopenharmony_ci struct folio *folio) 233562306a36Sopenharmony_ci{ 233662306a36Sopenharmony_ci struct inode *inode = mpd->inode; 233762306a36Sopenharmony_ci loff_t size = i_size_read(inode); 233862306a36Sopenharmony_ci size_t len = folio_size(folio); 233962306a36Sopenharmony_ci 234062306a36Sopenharmony_ci folio_clear_checked(folio); 234162306a36Sopenharmony_ci mpd->wbc->nr_to_write--; 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci if (folio_pos(folio) + len > size && 234462306a36Sopenharmony_ci !ext4_verity_in_progress(inode)) 234562306a36Sopenharmony_ci len = size - folio_pos(folio); 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci return ext4_journal_folio_buffers(handle, folio, len); 234862306a36Sopenharmony_ci} 234962306a36Sopenharmony_ci 235062306a36Sopenharmony_ci/* 235162306a36Sopenharmony_ci * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages 235262306a36Sopenharmony_ci * needing mapping, submit mapped pages 235362306a36Sopenharmony_ci * 235462306a36Sopenharmony_ci * @mpd - where to look for pages 235562306a36Sopenharmony_ci * 235662306a36Sopenharmony_ci * Walk dirty pages in the mapping. If they are fully mapped, submit them for 235762306a36Sopenharmony_ci * IO immediately. If we cannot map blocks, we submit just already mapped 235862306a36Sopenharmony_ci * buffers in the page for IO and keep page dirty. When we can map blocks and 235962306a36Sopenharmony_ci * we find a page which isn't mapped we start accumulating extent of buffers 236062306a36Sopenharmony_ci * underlying these pages that needs mapping (formed by either delayed or 236162306a36Sopenharmony_ci * unwritten buffers). We also lock the pages containing these buffers. The 236262306a36Sopenharmony_ci * extent found is returned in @mpd structure (starting at mpd->lblk with 236362306a36Sopenharmony_ci * length mpd->len blocks). 236462306a36Sopenharmony_ci * 236562306a36Sopenharmony_ci * Note that this function can attach bios to one io_end structure which are 236662306a36Sopenharmony_ci * neither logically nor physically contiguous. Although it may seem as an 236762306a36Sopenharmony_ci * unnecessary complication, it is actually inevitable in blocksize < pagesize 236862306a36Sopenharmony_ci * case as we need to track IO to all buffers underlying a page in one io_end. 236962306a36Sopenharmony_ci */ 237062306a36Sopenharmony_cistatic int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) 237162306a36Sopenharmony_ci{ 237262306a36Sopenharmony_ci struct address_space *mapping = mpd->inode->i_mapping; 237362306a36Sopenharmony_ci struct folio_batch fbatch; 237462306a36Sopenharmony_ci unsigned int nr_folios; 237562306a36Sopenharmony_ci pgoff_t index = mpd->first_page; 237662306a36Sopenharmony_ci pgoff_t end = mpd->last_page; 237762306a36Sopenharmony_ci xa_mark_t tag; 237862306a36Sopenharmony_ci int i, err = 0; 237962306a36Sopenharmony_ci int blkbits = mpd->inode->i_blkbits; 238062306a36Sopenharmony_ci ext4_lblk_t lblk; 238162306a36Sopenharmony_ci struct buffer_head *head; 238262306a36Sopenharmony_ci handle_t *handle = NULL; 238362306a36Sopenharmony_ci int bpp = ext4_journal_blocks_per_page(mpd->inode); 238462306a36Sopenharmony_ci 238562306a36Sopenharmony_ci if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) 238662306a36Sopenharmony_ci tag = PAGECACHE_TAG_TOWRITE; 238762306a36Sopenharmony_ci else 238862306a36Sopenharmony_ci tag = PAGECACHE_TAG_DIRTY; 238962306a36Sopenharmony_ci 239062306a36Sopenharmony_ci mpd->map.m_len = 0; 239162306a36Sopenharmony_ci mpd->next_page = index; 239262306a36Sopenharmony_ci if (ext4_should_journal_data(mpd->inode)) { 239362306a36Sopenharmony_ci handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE, 239462306a36Sopenharmony_ci bpp); 239562306a36Sopenharmony_ci if (IS_ERR(handle)) 239662306a36Sopenharmony_ci return PTR_ERR(handle); 239762306a36Sopenharmony_ci } 239862306a36Sopenharmony_ci folio_batch_init(&fbatch); 239962306a36Sopenharmony_ci while (index <= end) { 240062306a36Sopenharmony_ci nr_folios = filemap_get_folios_tag(mapping, &index, end, 240162306a36Sopenharmony_ci tag, &fbatch); 240262306a36Sopenharmony_ci if (nr_folios == 0) 240362306a36Sopenharmony_ci break; 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci for (i = 0; i < nr_folios; i++) { 240662306a36Sopenharmony_ci struct folio *folio = fbatch.folios[i]; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci /* 240962306a36Sopenharmony_ci * Accumulated enough dirty pages? This doesn't apply 241062306a36Sopenharmony_ci * to WB_SYNC_ALL mode. For integrity sync we have to 241162306a36Sopenharmony_ci * keep going because someone may be concurrently 241262306a36Sopenharmony_ci * dirtying pages, and we might have synced a lot of 241362306a36Sopenharmony_ci * newly appeared dirty pages, but have not synced all 241462306a36Sopenharmony_ci * of the old dirty pages. 241562306a36Sopenharmony_ci */ 241662306a36Sopenharmony_ci if (mpd->wbc->sync_mode == WB_SYNC_NONE && 241762306a36Sopenharmony_ci mpd->wbc->nr_to_write <= 241862306a36Sopenharmony_ci mpd->map.m_len >> (PAGE_SHIFT - blkbits)) 241962306a36Sopenharmony_ci goto out; 242062306a36Sopenharmony_ci 242162306a36Sopenharmony_ci /* If we can't merge this page, we are done. */ 242262306a36Sopenharmony_ci if (mpd->map.m_len > 0 && mpd->next_page != folio->index) 242362306a36Sopenharmony_ci goto out; 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci if (handle) { 242662306a36Sopenharmony_ci err = ext4_journal_ensure_credits(handle, bpp, 242762306a36Sopenharmony_ci 0); 242862306a36Sopenharmony_ci if (err < 0) 242962306a36Sopenharmony_ci goto out; 243062306a36Sopenharmony_ci } 243162306a36Sopenharmony_ci 243262306a36Sopenharmony_ci folio_lock(folio); 243362306a36Sopenharmony_ci /* 243462306a36Sopenharmony_ci * If the page is no longer dirty, or its mapping no 243562306a36Sopenharmony_ci * longer corresponds to inode we are writing (which 243662306a36Sopenharmony_ci * means it has been truncated or invalidated), or the 243762306a36Sopenharmony_ci * page is already under writeback and we are not doing 243862306a36Sopenharmony_ci * a data integrity writeback, skip the page 243962306a36Sopenharmony_ci */ 244062306a36Sopenharmony_ci if (!folio_test_dirty(folio) || 244162306a36Sopenharmony_ci (folio_test_writeback(folio) && 244262306a36Sopenharmony_ci (mpd->wbc->sync_mode == WB_SYNC_NONE)) || 244362306a36Sopenharmony_ci unlikely(folio->mapping != mapping)) { 244462306a36Sopenharmony_ci folio_unlock(folio); 244562306a36Sopenharmony_ci continue; 244662306a36Sopenharmony_ci } 244762306a36Sopenharmony_ci 244862306a36Sopenharmony_ci folio_wait_writeback(folio); 244962306a36Sopenharmony_ci BUG_ON(folio_test_writeback(folio)); 245062306a36Sopenharmony_ci 245162306a36Sopenharmony_ci /* 245262306a36Sopenharmony_ci * Should never happen but for buggy code in 245362306a36Sopenharmony_ci * other subsystems that call 245462306a36Sopenharmony_ci * set_page_dirty() without properly warning 245562306a36Sopenharmony_ci * the file system first. See [1] for more 245662306a36Sopenharmony_ci * information. 245762306a36Sopenharmony_ci * 245862306a36Sopenharmony_ci * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz 245962306a36Sopenharmony_ci */ 246062306a36Sopenharmony_ci if (!folio_buffers(folio)) { 246162306a36Sopenharmony_ci ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", folio->index); 246262306a36Sopenharmony_ci folio_clear_dirty(folio); 246362306a36Sopenharmony_ci folio_unlock(folio); 246462306a36Sopenharmony_ci continue; 246562306a36Sopenharmony_ci } 246662306a36Sopenharmony_ci 246762306a36Sopenharmony_ci if (mpd->map.m_len == 0) 246862306a36Sopenharmony_ci mpd->first_page = folio->index; 246962306a36Sopenharmony_ci mpd->next_page = folio_next_index(folio); 247062306a36Sopenharmony_ci /* 247162306a36Sopenharmony_ci * Writeout when we cannot modify metadata is simple. 247262306a36Sopenharmony_ci * Just submit the page. For data=journal mode we 247362306a36Sopenharmony_ci * first handle writeout of the page for checkpoint and 247462306a36Sopenharmony_ci * only after that handle delayed page dirtying. This 247562306a36Sopenharmony_ci * makes sure current data is checkpointed to the final 247662306a36Sopenharmony_ci * location before possibly journalling it again which 247762306a36Sopenharmony_ci * is desirable when the page is frequently dirtied 247862306a36Sopenharmony_ci * through a pin. 247962306a36Sopenharmony_ci */ 248062306a36Sopenharmony_ci if (!mpd->can_map) { 248162306a36Sopenharmony_ci err = mpage_submit_folio(mpd, folio); 248262306a36Sopenharmony_ci if (err < 0) 248362306a36Sopenharmony_ci goto out; 248462306a36Sopenharmony_ci /* Pending dirtying of journalled data? */ 248562306a36Sopenharmony_ci if (folio_test_checked(folio)) { 248662306a36Sopenharmony_ci err = mpage_journal_page_buffers(handle, 248762306a36Sopenharmony_ci mpd, folio); 248862306a36Sopenharmony_ci if (err < 0) 248962306a36Sopenharmony_ci goto out; 249062306a36Sopenharmony_ci mpd->journalled_more_data = 1; 249162306a36Sopenharmony_ci } 249262306a36Sopenharmony_ci mpage_folio_done(mpd, folio); 249362306a36Sopenharmony_ci } else { 249462306a36Sopenharmony_ci /* Add all dirty buffers to mpd */ 249562306a36Sopenharmony_ci lblk = ((ext4_lblk_t)folio->index) << 249662306a36Sopenharmony_ci (PAGE_SHIFT - blkbits); 249762306a36Sopenharmony_ci head = folio_buffers(folio); 249862306a36Sopenharmony_ci err = mpage_process_page_bufs(mpd, head, head, 249962306a36Sopenharmony_ci lblk); 250062306a36Sopenharmony_ci if (err <= 0) 250162306a36Sopenharmony_ci goto out; 250262306a36Sopenharmony_ci err = 0; 250362306a36Sopenharmony_ci } 250462306a36Sopenharmony_ci } 250562306a36Sopenharmony_ci folio_batch_release(&fbatch); 250662306a36Sopenharmony_ci cond_resched(); 250762306a36Sopenharmony_ci } 250862306a36Sopenharmony_ci mpd->scanned_until_end = 1; 250962306a36Sopenharmony_ci if (handle) 251062306a36Sopenharmony_ci ext4_journal_stop(handle); 251162306a36Sopenharmony_ci return 0; 251262306a36Sopenharmony_ciout: 251362306a36Sopenharmony_ci folio_batch_release(&fbatch); 251462306a36Sopenharmony_ci if (handle) 251562306a36Sopenharmony_ci ext4_journal_stop(handle); 251662306a36Sopenharmony_ci return err; 251762306a36Sopenharmony_ci} 251862306a36Sopenharmony_ci 251962306a36Sopenharmony_cistatic int ext4_do_writepages(struct mpage_da_data *mpd) 252062306a36Sopenharmony_ci{ 252162306a36Sopenharmony_ci struct writeback_control *wbc = mpd->wbc; 252262306a36Sopenharmony_ci pgoff_t writeback_index = 0; 252362306a36Sopenharmony_ci long nr_to_write = wbc->nr_to_write; 252462306a36Sopenharmony_ci int range_whole = 0; 252562306a36Sopenharmony_ci int cycled = 1; 252662306a36Sopenharmony_ci handle_t *handle = NULL; 252762306a36Sopenharmony_ci struct inode *inode = mpd->inode; 252862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 252962306a36Sopenharmony_ci int needed_blocks, rsv_blocks = 0, ret = 0; 253062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 253162306a36Sopenharmony_ci struct blk_plug plug; 253262306a36Sopenharmony_ci bool give_up_on_write = false; 253362306a36Sopenharmony_ci 253462306a36Sopenharmony_ci trace_ext4_writepages(inode, wbc); 253562306a36Sopenharmony_ci 253662306a36Sopenharmony_ci /* 253762306a36Sopenharmony_ci * No pages to write? This is mainly a kludge to avoid starting 253862306a36Sopenharmony_ci * a transaction for special inodes like journal inode on last iput() 253962306a36Sopenharmony_ci * because that could violate lock ordering on umount 254062306a36Sopenharmony_ci */ 254162306a36Sopenharmony_ci if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 254262306a36Sopenharmony_ci goto out_writepages; 254362306a36Sopenharmony_ci 254462306a36Sopenharmony_ci /* 254562306a36Sopenharmony_ci * If the filesystem has aborted, it is read-only, so return 254662306a36Sopenharmony_ci * right away instead of dumping stack traces later on that 254762306a36Sopenharmony_ci * will obscure the real source of the problem. We test 254862306a36Sopenharmony_ci * fs shutdown state instead of sb->s_flag's SB_RDONLY because 254962306a36Sopenharmony_ci * the latter could be true if the filesystem is mounted 255062306a36Sopenharmony_ci * read-only, and in that case, ext4_writepages should 255162306a36Sopenharmony_ci * *never* be called, so if that ever happens, we would want 255262306a36Sopenharmony_ci * the stack trace. 255362306a36Sopenharmony_ci */ 255462306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) { 255562306a36Sopenharmony_ci ret = -EROFS; 255662306a36Sopenharmony_ci goto out_writepages; 255762306a36Sopenharmony_ci } 255862306a36Sopenharmony_ci 255962306a36Sopenharmony_ci /* 256062306a36Sopenharmony_ci * If we have inline data and arrive here, it means that 256162306a36Sopenharmony_ci * we will soon create the block for the 1st page, so 256262306a36Sopenharmony_ci * we'd better clear the inline data here. 256362306a36Sopenharmony_ci */ 256462306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) { 256562306a36Sopenharmony_ci /* Just inode will be modified... */ 256662306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 256762306a36Sopenharmony_ci if (IS_ERR(handle)) { 256862306a36Sopenharmony_ci ret = PTR_ERR(handle); 256962306a36Sopenharmony_ci goto out_writepages; 257062306a36Sopenharmony_ci } 257162306a36Sopenharmony_ci BUG_ON(ext4_test_inode_state(inode, 257262306a36Sopenharmony_ci EXT4_STATE_MAY_INLINE_DATA)); 257362306a36Sopenharmony_ci ext4_destroy_inline_data(handle, inode); 257462306a36Sopenharmony_ci ext4_journal_stop(handle); 257562306a36Sopenharmony_ci } 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_ci /* 257862306a36Sopenharmony_ci * data=journal mode does not do delalloc so we just need to writeout / 257962306a36Sopenharmony_ci * journal already mapped buffers. On the other hand we need to commit 258062306a36Sopenharmony_ci * transaction to make data stable. We expect all the data to be 258162306a36Sopenharmony_ci * already in the journal (the only exception are DMA pinned pages 258262306a36Sopenharmony_ci * dirtied behind our back) so we commit transaction here and run the 258362306a36Sopenharmony_ci * writeback loop to checkpoint them. The checkpointing is not actually 258462306a36Sopenharmony_ci * necessary to make data persistent *but* quite a few places (extent 258562306a36Sopenharmony_ci * shifting operations, fsverity, ...) depend on being able to drop 258662306a36Sopenharmony_ci * pagecache pages after calling filemap_write_and_wait() and for that 258762306a36Sopenharmony_ci * checkpointing needs to happen. 258862306a36Sopenharmony_ci */ 258962306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) { 259062306a36Sopenharmony_ci mpd->can_map = 0; 259162306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL) 259262306a36Sopenharmony_ci ext4_fc_commit(sbi->s_journal, 259362306a36Sopenharmony_ci EXT4_I(inode)->i_datasync_tid); 259462306a36Sopenharmony_ci } 259562306a36Sopenharmony_ci mpd->journalled_more_data = 0; 259662306a36Sopenharmony_ci 259762306a36Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) { 259862306a36Sopenharmony_ci /* 259962306a36Sopenharmony_ci * We may need to convert up to one extent per block in 260062306a36Sopenharmony_ci * the page and we may dirty the inode. 260162306a36Sopenharmony_ci */ 260262306a36Sopenharmony_ci rsv_blocks = 1 + ext4_chunk_trans_blocks(inode, 260362306a36Sopenharmony_ci PAGE_SIZE >> inode->i_blkbits); 260462306a36Sopenharmony_ci } 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_ci if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 260762306a36Sopenharmony_ci range_whole = 1; 260862306a36Sopenharmony_ci 260962306a36Sopenharmony_ci if (wbc->range_cyclic) { 261062306a36Sopenharmony_ci writeback_index = mapping->writeback_index; 261162306a36Sopenharmony_ci if (writeback_index) 261262306a36Sopenharmony_ci cycled = 0; 261362306a36Sopenharmony_ci mpd->first_page = writeback_index; 261462306a36Sopenharmony_ci mpd->last_page = -1; 261562306a36Sopenharmony_ci } else { 261662306a36Sopenharmony_ci mpd->first_page = wbc->range_start >> PAGE_SHIFT; 261762306a36Sopenharmony_ci mpd->last_page = wbc->range_end >> PAGE_SHIFT; 261862306a36Sopenharmony_ci } 261962306a36Sopenharmony_ci 262062306a36Sopenharmony_ci ext4_io_submit_init(&mpd->io_submit, wbc); 262162306a36Sopenharmony_ciretry: 262262306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 262362306a36Sopenharmony_ci tag_pages_for_writeback(mapping, mpd->first_page, 262462306a36Sopenharmony_ci mpd->last_page); 262562306a36Sopenharmony_ci blk_start_plug(&plug); 262662306a36Sopenharmony_ci 262762306a36Sopenharmony_ci /* 262862306a36Sopenharmony_ci * First writeback pages that don't need mapping - we can avoid 262962306a36Sopenharmony_ci * starting a transaction unnecessarily and also avoid being blocked 263062306a36Sopenharmony_ci * in the block layer on device congestion while having transaction 263162306a36Sopenharmony_ci * started. 263262306a36Sopenharmony_ci */ 263362306a36Sopenharmony_ci mpd->do_map = 0; 263462306a36Sopenharmony_ci mpd->scanned_until_end = 0; 263562306a36Sopenharmony_ci mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); 263662306a36Sopenharmony_ci if (!mpd->io_submit.io_end) { 263762306a36Sopenharmony_ci ret = -ENOMEM; 263862306a36Sopenharmony_ci goto unplug; 263962306a36Sopenharmony_ci } 264062306a36Sopenharmony_ci ret = mpage_prepare_extent_to_map(mpd); 264162306a36Sopenharmony_ci /* Unlock pages we didn't use */ 264262306a36Sopenharmony_ci mpage_release_unused_pages(mpd, false); 264362306a36Sopenharmony_ci /* Submit prepared bio */ 264462306a36Sopenharmony_ci ext4_io_submit(&mpd->io_submit); 264562306a36Sopenharmony_ci ext4_put_io_end_defer(mpd->io_submit.io_end); 264662306a36Sopenharmony_ci mpd->io_submit.io_end = NULL; 264762306a36Sopenharmony_ci if (ret < 0) 264862306a36Sopenharmony_ci goto unplug; 264962306a36Sopenharmony_ci 265062306a36Sopenharmony_ci while (!mpd->scanned_until_end && wbc->nr_to_write > 0) { 265162306a36Sopenharmony_ci /* For each extent of pages we use new io_end */ 265262306a36Sopenharmony_ci mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); 265362306a36Sopenharmony_ci if (!mpd->io_submit.io_end) { 265462306a36Sopenharmony_ci ret = -ENOMEM; 265562306a36Sopenharmony_ci break; 265662306a36Sopenharmony_ci } 265762306a36Sopenharmony_ci 265862306a36Sopenharmony_ci WARN_ON_ONCE(!mpd->can_map); 265962306a36Sopenharmony_ci /* 266062306a36Sopenharmony_ci * We have two constraints: We find one extent to map and we 266162306a36Sopenharmony_ci * must always write out whole page (makes a difference when 266262306a36Sopenharmony_ci * blocksize < pagesize) so that we don't block on IO when we 266362306a36Sopenharmony_ci * try to write out the rest of the page. Journalled mode is 266462306a36Sopenharmony_ci * not supported by delalloc. 266562306a36Sopenharmony_ci */ 266662306a36Sopenharmony_ci BUG_ON(ext4_should_journal_data(inode)); 266762306a36Sopenharmony_ci needed_blocks = ext4_da_writepages_trans_blocks(inode); 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci /* start a new transaction */ 267062306a36Sopenharmony_ci handle = ext4_journal_start_with_reserve(inode, 267162306a36Sopenharmony_ci EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); 267262306a36Sopenharmony_ci if (IS_ERR(handle)) { 267362306a36Sopenharmony_ci ret = PTR_ERR(handle); 267462306a36Sopenharmony_ci ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " 267562306a36Sopenharmony_ci "%ld pages, ino %lu; err %d", __func__, 267662306a36Sopenharmony_ci wbc->nr_to_write, inode->i_ino, ret); 267762306a36Sopenharmony_ci /* Release allocated io_end */ 267862306a36Sopenharmony_ci ext4_put_io_end(mpd->io_submit.io_end); 267962306a36Sopenharmony_ci mpd->io_submit.io_end = NULL; 268062306a36Sopenharmony_ci break; 268162306a36Sopenharmony_ci } 268262306a36Sopenharmony_ci mpd->do_map = 1; 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci trace_ext4_da_write_pages(inode, mpd->first_page, wbc); 268562306a36Sopenharmony_ci ret = mpage_prepare_extent_to_map(mpd); 268662306a36Sopenharmony_ci if (!ret && mpd->map.m_len) 268762306a36Sopenharmony_ci ret = mpage_map_and_submit_extent(handle, mpd, 268862306a36Sopenharmony_ci &give_up_on_write); 268962306a36Sopenharmony_ci /* 269062306a36Sopenharmony_ci * Caution: If the handle is synchronous, 269162306a36Sopenharmony_ci * ext4_journal_stop() can wait for transaction commit 269262306a36Sopenharmony_ci * to finish which may depend on writeback of pages to 269362306a36Sopenharmony_ci * complete or on page lock to be released. In that 269462306a36Sopenharmony_ci * case, we have to wait until after we have 269562306a36Sopenharmony_ci * submitted all the IO, released page locks we hold, 269662306a36Sopenharmony_ci * and dropped io_end reference (for extent conversion 269762306a36Sopenharmony_ci * to be able to complete) before stopping the handle. 269862306a36Sopenharmony_ci */ 269962306a36Sopenharmony_ci if (!ext4_handle_valid(handle) || handle->h_sync == 0) { 270062306a36Sopenharmony_ci ext4_journal_stop(handle); 270162306a36Sopenharmony_ci handle = NULL; 270262306a36Sopenharmony_ci mpd->do_map = 0; 270362306a36Sopenharmony_ci } 270462306a36Sopenharmony_ci /* Unlock pages we didn't use */ 270562306a36Sopenharmony_ci mpage_release_unused_pages(mpd, give_up_on_write); 270662306a36Sopenharmony_ci /* Submit prepared bio */ 270762306a36Sopenharmony_ci ext4_io_submit(&mpd->io_submit); 270862306a36Sopenharmony_ci 270962306a36Sopenharmony_ci /* 271062306a36Sopenharmony_ci * Drop our io_end reference we got from init. We have 271162306a36Sopenharmony_ci * to be careful and use deferred io_end finishing if 271262306a36Sopenharmony_ci * we are still holding the transaction as we can 271362306a36Sopenharmony_ci * release the last reference to io_end which may end 271462306a36Sopenharmony_ci * up doing unwritten extent conversion. 271562306a36Sopenharmony_ci */ 271662306a36Sopenharmony_ci if (handle) { 271762306a36Sopenharmony_ci ext4_put_io_end_defer(mpd->io_submit.io_end); 271862306a36Sopenharmony_ci ext4_journal_stop(handle); 271962306a36Sopenharmony_ci } else 272062306a36Sopenharmony_ci ext4_put_io_end(mpd->io_submit.io_end); 272162306a36Sopenharmony_ci mpd->io_submit.io_end = NULL; 272262306a36Sopenharmony_ci 272362306a36Sopenharmony_ci if (ret == -ENOSPC && sbi->s_journal) { 272462306a36Sopenharmony_ci /* 272562306a36Sopenharmony_ci * Commit the transaction which would 272662306a36Sopenharmony_ci * free blocks released in the transaction 272762306a36Sopenharmony_ci * and try again 272862306a36Sopenharmony_ci */ 272962306a36Sopenharmony_ci jbd2_journal_force_commit_nested(sbi->s_journal); 273062306a36Sopenharmony_ci ret = 0; 273162306a36Sopenharmony_ci continue; 273262306a36Sopenharmony_ci } 273362306a36Sopenharmony_ci /* Fatal error - ENOMEM, EIO... */ 273462306a36Sopenharmony_ci if (ret) 273562306a36Sopenharmony_ci break; 273662306a36Sopenharmony_ci } 273762306a36Sopenharmony_ciunplug: 273862306a36Sopenharmony_ci blk_finish_plug(&plug); 273962306a36Sopenharmony_ci if (!ret && !cycled && wbc->nr_to_write > 0) { 274062306a36Sopenharmony_ci cycled = 1; 274162306a36Sopenharmony_ci mpd->last_page = writeback_index - 1; 274262306a36Sopenharmony_ci mpd->first_page = 0; 274362306a36Sopenharmony_ci goto retry; 274462306a36Sopenharmony_ci } 274562306a36Sopenharmony_ci 274662306a36Sopenharmony_ci /* Update index */ 274762306a36Sopenharmony_ci if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 274862306a36Sopenharmony_ci /* 274962306a36Sopenharmony_ci * Set the writeback_index so that range_cyclic 275062306a36Sopenharmony_ci * mode will write it back later 275162306a36Sopenharmony_ci */ 275262306a36Sopenharmony_ci mapping->writeback_index = mpd->first_page; 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ciout_writepages: 275562306a36Sopenharmony_ci trace_ext4_writepages_result(inode, wbc, ret, 275662306a36Sopenharmony_ci nr_to_write - wbc->nr_to_write); 275762306a36Sopenharmony_ci return ret; 275862306a36Sopenharmony_ci} 275962306a36Sopenharmony_ci 276062306a36Sopenharmony_cistatic int ext4_writepages(struct address_space *mapping, 276162306a36Sopenharmony_ci struct writeback_control *wbc) 276262306a36Sopenharmony_ci{ 276362306a36Sopenharmony_ci struct super_block *sb = mapping->host->i_sb; 276462306a36Sopenharmony_ci struct mpage_da_data mpd = { 276562306a36Sopenharmony_ci .inode = mapping->host, 276662306a36Sopenharmony_ci .wbc = wbc, 276762306a36Sopenharmony_ci .can_map = 1, 276862306a36Sopenharmony_ci }; 276962306a36Sopenharmony_ci int ret; 277062306a36Sopenharmony_ci int alloc_ctx; 277162306a36Sopenharmony_ci 277262306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(sb))) 277362306a36Sopenharmony_ci return -EIO; 277462306a36Sopenharmony_ci 277562306a36Sopenharmony_ci alloc_ctx = ext4_writepages_down_read(sb); 277662306a36Sopenharmony_ci ret = ext4_do_writepages(&mpd); 277762306a36Sopenharmony_ci /* 277862306a36Sopenharmony_ci * For data=journal writeback we could have come across pages marked 277962306a36Sopenharmony_ci * for delayed dirtying (PageChecked) which were just added to the 278062306a36Sopenharmony_ci * running transaction. Try once more to get them to stable storage. 278162306a36Sopenharmony_ci */ 278262306a36Sopenharmony_ci if (!ret && mpd.journalled_more_data) 278362306a36Sopenharmony_ci ret = ext4_do_writepages(&mpd); 278462306a36Sopenharmony_ci ext4_writepages_up_read(sb, alloc_ctx); 278562306a36Sopenharmony_ci 278662306a36Sopenharmony_ci return ret; 278762306a36Sopenharmony_ci} 278862306a36Sopenharmony_ci 278962306a36Sopenharmony_ciint ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode) 279062306a36Sopenharmony_ci{ 279162306a36Sopenharmony_ci struct writeback_control wbc = { 279262306a36Sopenharmony_ci .sync_mode = WB_SYNC_ALL, 279362306a36Sopenharmony_ci .nr_to_write = LONG_MAX, 279462306a36Sopenharmony_ci .range_start = jinode->i_dirty_start, 279562306a36Sopenharmony_ci .range_end = jinode->i_dirty_end, 279662306a36Sopenharmony_ci }; 279762306a36Sopenharmony_ci struct mpage_da_data mpd = { 279862306a36Sopenharmony_ci .inode = jinode->i_vfs_inode, 279962306a36Sopenharmony_ci .wbc = &wbc, 280062306a36Sopenharmony_ci .can_map = 0, 280162306a36Sopenharmony_ci }; 280262306a36Sopenharmony_ci return ext4_do_writepages(&mpd); 280362306a36Sopenharmony_ci} 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_cistatic int ext4_dax_writepages(struct address_space *mapping, 280662306a36Sopenharmony_ci struct writeback_control *wbc) 280762306a36Sopenharmony_ci{ 280862306a36Sopenharmony_ci int ret; 280962306a36Sopenharmony_ci long nr_to_write = wbc->nr_to_write; 281062306a36Sopenharmony_ci struct inode *inode = mapping->host; 281162306a36Sopenharmony_ci int alloc_ctx; 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) 281462306a36Sopenharmony_ci return -EIO; 281562306a36Sopenharmony_ci 281662306a36Sopenharmony_ci alloc_ctx = ext4_writepages_down_read(inode->i_sb); 281762306a36Sopenharmony_ci trace_ext4_writepages(inode, wbc); 281862306a36Sopenharmony_ci 281962306a36Sopenharmony_ci ret = dax_writeback_mapping_range(mapping, 282062306a36Sopenharmony_ci EXT4_SB(inode->i_sb)->s_daxdev, wbc); 282162306a36Sopenharmony_ci trace_ext4_writepages_result(inode, wbc, ret, 282262306a36Sopenharmony_ci nr_to_write - wbc->nr_to_write); 282362306a36Sopenharmony_ci ext4_writepages_up_read(inode->i_sb, alloc_ctx); 282462306a36Sopenharmony_ci return ret; 282562306a36Sopenharmony_ci} 282662306a36Sopenharmony_ci 282762306a36Sopenharmony_cistatic int ext4_nonda_switch(struct super_block *sb) 282862306a36Sopenharmony_ci{ 282962306a36Sopenharmony_ci s64 free_clusters, dirty_clusters; 283062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(sb); 283162306a36Sopenharmony_ci 283262306a36Sopenharmony_ci /* 283362306a36Sopenharmony_ci * switch to non delalloc mode if we are running low 283462306a36Sopenharmony_ci * on free block. The free block accounting via percpu 283562306a36Sopenharmony_ci * counters can get slightly wrong with percpu_counter_batch getting 283662306a36Sopenharmony_ci * accumulated on each CPU without updating global counters 283762306a36Sopenharmony_ci * Delalloc need an accurate free block accounting. So switch 283862306a36Sopenharmony_ci * to non delalloc when we are near to error range. 283962306a36Sopenharmony_ci */ 284062306a36Sopenharmony_ci free_clusters = 284162306a36Sopenharmony_ci percpu_counter_read_positive(&sbi->s_freeclusters_counter); 284262306a36Sopenharmony_ci dirty_clusters = 284362306a36Sopenharmony_ci percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); 284462306a36Sopenharmony_ci /* 284562306a36Sopenharmony_ci * Start pushing delalloc when 1/2 of free blocks are dirty. 284662306a36Sopenharmony_ci */ 284762306a36Sopenharmony_ci if (dirty_clusters && (free_clusters < 2 * dirty_clusters)) 284862306a36Sopenharmony_ci try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); 284962306a36Sopenharmony_ci 285062306a36Sopenharmony_ci if (2 * free_clusters < 3 * dirty_clusters || 285162306a36Sopenharmony_ci free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) { 285262306a36Sopenharmony_ci /* 285362306a36Sopenharmony_ci * free block count is less than 150% of dirty blocks 285462306a36Sopenharmony_ci * or free blocks is less than watermark 285562306a36Sopenharmony_ci */ 285662306a36Sopenharmony_ci return 1; 285762306a36Sopenharmony_ci } 285862306a36Sopenharmony_ci return 0; 285962306a36Sopenharmony_ci} 286062306a36Sopenharmony_ci 286162306a36Sopenharmony_cistatic int ext4_da_write_begin(struct file *file, struct address_space *mapping, 286262306a36Sopenharmony_ci loff_t pos, unsigned len, 286362306a36Sopenharmony_ci struct page **pagep, void **fsdata) 286462306a36Sopenharmony_ci{ 286562306a36Sopenharmony_ci int ret, retries = 0; 286662306a36Sopenharmony_ci struct folio *folio; 286762306a36Sopenharmony_ci pgoff_t index; 286862306a36Sopenharmony_ci struct inode *inode = mapping->host; 286962306a36Sopenharmony_ci 287062306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) 287162306a36Sopenharmony_ci return -EIO; 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci index = pos >> PAGE_SHIFT; 287462306a36Sopenharmony_ci 287562306a36Sopenharmony_ci if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) { 287662306a36Sopenharmony_ci *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; 287762306a36Sopenharmony_ci return ext4_write_begin(file, mapping, pos, 287862306a36Sopenharmony_ci len, pagep, fsdata); 287962306a36Sopenharmony_ci } 288062306a36Sopenharmony_ci *fsdata = (void *)0; 288162306a36Sopenharmony_ci trace_ext4_da_write_begin(inode, pos, len); 288262306a36Sopenharmony_ci 288362306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 288462306a36Sopenharmony_ci ret = ext4_da_write_inline_data_begin(mapping, inode, pos, len, 288562306a36Sopenharmony_ci pagep, fsdata); 288662306a36Sopenharmony_ci if (ret < 0) 288762306a36Sopenharmony_ci return ret; 288862306a36Sopenharmony_ci if (ret == 1) 288962306a36Sopenharmony_ci return 0; 289062306a36Sopenharmony_ci } 289162306a36Sopenharmony_ci 289262306a36Sopenharmony_ciretry: 289362306a36Sopenharmony_ci folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN, 289462306a36Sopenharmony_ci mapping_gfp_mask(mapping)); 289562306a36Sopenharmony_ci if (IS_ERR(folio)) 289662306a36Sopenharmony_ci return PTR_ERR(folio); 289762306a36Sopenharmony_ci 289862306a36Sopenharmony_ci /* In case writeback began while the folio was unlocked */ 289962306a36Sopenharmony_ci folio_wait_stable(folio); 290062306a36Sopenharmony_ci 290162306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION 290262306a36Sopenharmony_ci ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep); 290362306a36Sopenharmony_ci#else 290462306a36Sopenharmony_ci ret = __block_write_begin(&folio->page, pos, len, ext4_da_get_block_prep); 290562306a36Sopenharmony_ci#endif 290662306a36Sopenharmony_ci if (ret < 0) { 290762306a36Sopenharmony_ci folio_unlock(folio); 290862306a36Sopenharmony_ci folio_put(folio); 290962306a36Sopenharmony_ci /* 291062306a36Sopenharmony_ci * block_write_begin may have instantiated a few blocks 291162306a36Sopenharmony_ci * outside i_size. Trim these off again. Don't need 291262306a36Sopenharmony_ci * i_size_read because we hold inode lock. 291362306a36Sopenharmony_ci */ 291462306a36Sopenharmony_ci if (pos + len > inode->i_size) 291562306a36Sopenharmony_ci ext4_truncate_failed_write(inode); 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci if (ret == -ENOSPC && 291862306a36Sopenharmony_ci ext4_should_retry_alloc(inode->i_sb, &retries)) 291962306a36Sopenharmony_ci goto retry; 292062306a36Sopenharmony_ci return ret; 292162306a36Sopenharmony_ci } 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_ci *pagep = &folio->page; 292462306a36Sopenharmony_ci return ret; 292562306a36Sopenharmony_ci} 292662306a36Sopenharmony_ci 292762306a36Sopenharmony_ci/* 292862306a36Sopenharmony_ci * Check if we should update i_disksize 292962306a36Sopenharmony_ci * when write to the end of file but not require block allocation 293062306a36Sopenharmony_ci */ 293162306a36Sopenharmony_cistatic int ext4_da_should_update_i_disksize(struct folio *folio, 293262306a36Sopenharmony_ci unsigned long offset) 293362306a36Sopenharmony_ci{ 293462306a36Sopenharmony_ci struct buffer_head *bh; 293562306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 293662306a36Sopenharmony_ci unsigned int idx; 293762306a36Sopenharmony_ci int i; 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci bh = folio_buffers(folio); 294062306a36Sopenharmony_ci idx = offset >> inode->i_blkbits; 294162306a36Sopenharmony_ci 294262306a36Sopenharmony_ci for (i = 0; i < idx; i++) 294362306a36Sopenharmony_ci bh = bh->b_this_page; 294462306a36Sopenharmony_ci 294562306a36Sopenharmony_ci if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh)) 294662306a36Sopenharmony_ci return 0; 294762306a36Sopenharmony_ci return 1; 294862306a36Sopenharmony_ci} 294962306a36Sopenharmony_ci 295062306a36Sopenharmony_cistatic int ext4_da_do_write_end(struct address_space *mapping, 295162306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 295262306a36Sopenharmony_ci struct page *page) 295362306a36Sopenharmony_ci{ 295462306a36Sopenharmony_ci struct inode *inode = mapping->host; 295562306a36Sopenharmony_ci loff_t old_size = inode->i_size; 295662306a36Sopenharmony_ci bool disksize_changed = false; 295762306a36Sopenharmony_ci loff_t new_i_size; 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_ci /* 296062306a36Sopenharmony_ci * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES 296162306a36Sopenharmony_ci * flag, which all that's needed to trigger page writeback. 296262306a36Sopenharmony_ci */ 296362306a36Sopenharmony_ci copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL); 296462306a36Sopenharmony_ci new_i_size = pos + copied; 296562306a36Sopenharmony_ci 296662306a36Sopenharmony_ci /* 296762306a36Sopenharmony_ci * It's important to update i_size while still holding page lock, 296862306a36Sopenharmony_ci * because page writeout could otherwise come in and zero beyond 296962306a36Sopenharmony_ci * i_size. 297062306a36Sopenharmony_ci * 297162306a36Sopenharmony_ci * Since we are holding inode lock, we are sure i_disksize <= 297262306a36Sopenharmony_ci * i_size. We also know that if i_disksize < i_size, there are 297362306a36Sopenharmony_ci * delalloc writes pending in the range up to i_size. If the end of 297462306a36Sopenharmony_ci * the current write is <= i_size, there's no need to touch 297562306a36Sopenharmony_ci * i_disksize since writeback will push i_disksize up to i_size 297662306a36Sopenharmony_ci * eventually. If the end of the current write is > i_size and 297762306a36Sopenharmony_ci * inside an allocated block which ext4_da_should_update_i_disksize() 297862306a36Sopenharmony_ci * checked, we need to update i_disksize here as certain 297962306a36Sopenharmony_ci * ext4_writepages() paths not allocating blocks and update i_disksize. 298062306a36Sopenharmony_ci */ 298162306a36Sopenharmony_ci if (new_i_size > inode->i_size) { 298262306a36Sopenharmony_ci unsigned long end; 298362306a36Sopenharmony_ci 298462306a36Sopenharmony_ci i_size_write(inode, new_i_size); 298562306a36Sopenharmony_ci end = (new_i_size - 1) & (PAGE_SIZE - 1); 298662306a36Sopenharmony_ci if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) { 298762306a36Sopenharmony_ci ext4_update_i_disksize(inode, new_i_size); 298862306a36Sopenharmony_ci disksize_changed = true; 298962306a36Sopenharmony_ci } 299062306a36Sopenharmony_ci } 299162306a36Sopenharmony_ci 299262306a36Sopenharmony_ci unlock_page(page); 299362306a36Sopenharmony_ci put_page(page); 299462306a36Sopenharmony_ci 299562306a36Sopenharmony_ci if (old_size < pos) 299662306a36Sopenharmony_ci pagecache_isize_extended(inode, old_size, pos); 299762306a36Sopenharmony_ci 299862306a36Sopenharmony_ci if (disksize_changed) { 299962306a36Sopenharmony_ci handle_t *handle; 300062306a36Sopenharmony_ci 300162306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 300262306a36Sopenharmony_ci if (IS_ERR(handle)) 300362306a36Sopenharmony_ci return PTR_ERR(handle); 300462306a36Sopenharmony_ci ext4_mark_inode_dirty(handle, inode); 300562306a36Sopenharmony_ci ext4_journal_stop(handle); 300662306a36Sopenharmony_ci } 300762306a36Sopenharmony_ci 300862306a36Sopenharmony_ci return copied; 300962306a36Sopenharmony_ci} 301062306a36Sopenharmony_ci 301162306a36Sopenharmony_cistatic int ext4_da_write_end(struct file *file, 301262306a36Sopenharmony_ci struct address_space *mapping, 301362306a36Sopenharmony_ci loff_t pos, unsigned len, unsigned copied, 301462306a36Sopenharmony_ci struct page *page, void *fsdata) 301562306a36Sopenharmony_ci{ 301662306a36Sopenharmony_ci struct inode *inode = mapping->host; 301762306a36Sopenharmony_ci int write_mode = (int)(unsigned long)fsdata; 301862306a36Sopenharmony_ci struct folio *folio = page_folio(page); 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_ci if (write_mode == FALL_BACK_TO_NONDELALLOC) 302162306a36Sopenharmony_ci return ext4_write_end(file, mapping, pos, 302262306a36Sopenharmony_ci len, copied, &folio->page, fsdata); 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci trace_ext4_da_write_end(inode, pos, len, copied); 302562306a36Sopenharmony_ci 302662306a36Sopenharmony_ci if (write_mode != CONVERT_INLINE_DATA && 302762306a36Sopenharmony_ci ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && 302862306a36Sopenharmony_ci ext4_has_inline_data(inode)) 302962306a36Sopenharmony_ci return ext4_write_inline_data_end(inode, pos, len, copied, 303062306a36Sopenharmony_ci folio); 303162306a36Sopenharmony_ci 303262306a36Sopenharmony_ci if (unlikely(copied < len) && !PageUptodate(page)) 303362306a36Sopenharmony_ci copied = 0; 303462306a36Sopenharmony_ci 303562306a36Sopenharmony_ci return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page); 303662306a36Sopenharmony_ci} 303762306a36Sopenharmony_ci 303862306a36Sopenharmony_ci/* 303962306a36Sopenharmony_ci * Force all delayed allocation blocks to be allocated for a given inode. 304062306a36Sopenharmony_ci */ 304162306a36Sopenharmony_ciint ext4_alloc_da_blocks(struct inode *inode) 304262306a36Sopenharmony_ci{ 304362306a36Sopenharmony_ci trace_ext4_alloc_da_blocks(inode); 304462306a36Sopenharmony_ci 304562306a36Sopenharmony_ci if (!EXT4_I(inode)->i_reserved_data_blocks) 304662306a36Sopenharmony_ci return 0; 304762306a36Sopenharmony_ci 304862306a36Sopenharmony_ci /* 304962306a36Sopenharmony_ci * We do something simple for now. The filemap_flush() will 305062306a36Sopenharmony_ci * also start triggering a write of the data blocks, which is 305162306a36Sopenharmony_ci * not strictly speaking necessary (and for users of 305262306a36Sopenharmony_ci * laptop_mode, not even desirable). However, to do otherwise 305362306a36Sopenharmony_ci * would require replicating code paths in: 305462306a36Sopenharmony_ci * 305562306a36Sopenharmony_ci * ext4_writepages() -> 305662306a36Sopenharmony_ci * write_cache_pages() ---> (via passed in callback function) 305762306a36Sopenharmony_ci * __mpage_da_writepage() --> 305862306a36Sopenharmony_ci * mpage_add_bh_to_extent() 305962306a36Sopenharmony_ci * mpage_da_map_blocks() 306062306a36Sopenharmony_ci * 306162306a36Sopenharmony_ci * The problem is that write_cache_pages(), located in 306262306a36Sopenharmony_ci * mm/page-writeback.c, marks pages clean in preparation for 306362306a36Sopenharmony_ci * doing I/O, which is not desirable if we're not planning on 306462306a36Sopenharmony_ci * doing I/O at all. 306562306a36Sopenharmony_ci * 306662306a36Sopenharmony_ci * We could call write_cache_pages(), and then redirty all of 306762306a36Sopenharmony_ci * the pages by calling redirty_page_for_writepage() but that 306862306a36Sopenharmony_ci * would be ugly in the extreme. So instead we would need to 306962306a36Sopenharmony_ci * replicate parts of the code in the above functions, 307062306a36Sopenharmony_ci * simplifying them because we wouldn't actually intend to 307162306a36Sopenharmony_ci * write out the pages, but rather only collect contiguous 307262306a36Sopenharmony_ci * logical block extents, call the multi-block allocator, and 307362306a36Sopenharmony_ci * then update the buffer heads with the block allocations. 307462306a36Sopenharmony_ci * 307562306a36Sopenharmony_ci * For now, though, we'll cheat by calling filemap_flush(), 307662306a36Sopenharmony_ci * which will map the blocks, and start the I/O, but not 307762306a36Sopenharmony_ci * actually wait for the I/O to complete. 307862306a36Sopenharmony_ci */ 307962306a36Sopenharmony_ci return filemap_flush(inode->i_mapping); 308062306a36Sopenharmony_ci} 308162306a36Sopenharmony_ci 308262306a36Sopenharmony_ci/* 308362306a36Sopenharmony_ci * bmap() is special. It gets used by applications such as lilo and by 308462306a36Sopenharmony_ci * the swapper to find the on-disk block of a specific piece of data. 308562306a36Sopenharmony_ci * 308662306a36Sopenharmony_ci * Naturally, this is dangerous if the block concerned is still in the 308762306a36Sopenharmony_ci * journal. If somebody makes a swapfile on an ext4 data-journaling 308862306a36Sopenharmony_ci * filesystem and enables swap, then they may get a nasty shock when the 308962306a36Sopenharmony_ci * data getting swapped to that swapfile suddenly gets overwritten by 309062306a36Sopenharmony_ci * the original zero's written out previously to the journal and 309162306a36Sopenharmony_ci * awaiting writeback in the kernel's buffer cache. 309262306a36Sopenharmony_ci * 309362306a36Sopenharmony_ci * So, if we see any bmap calls here on a modified, data-journaled file, 309462306a36Sopenharmony_ci * take extra steps to flush any blocks which might be in the cache. 309562306a36Sopenharmony_ci */ 309662306a36Sopenharmony_cistatic sector_t ext4_bmap(struct address_space *mapping, sector_t block) 309762306a36Sopenharmony_ci{ 309862306a36Sopenharmony_ci struct inode *inode = mapping->host; 309962306a36Sopenharmony_ci sector_t ret = 0; 310062306a36Sopenharmony_ci 310162306a36Sopenharmony_ci inode_lock_shared(inode); 310262306a36Sopenharmony_ci /* 310362306a36Sopenharmony_ci * We can get here for an inline file via the FIBMAP ioctl 310462306a36Sopenharmony_ci */ 310562306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 310662306a36Sopenharmony_ci goto out; 310762306a36Sopenharmony_ci 310862306a36Sopenharmony_ci if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && 310962306a36Sopenharmony_ci (test_opt(inode->i_sb, DELALLOC) || 311062306a36Sopenharmony_ci ext4_should_journal_data(inode))) { 311162306a36Sopenharmony_ci /* 311262306a36Sopenharmony_ci * With delalloc or journalled data we want to sync the file so 311362306a36Sopenharmony_ci * that we can make sure we allocate blocks for file and data 311462306a36Sopenharmony_ci * is in place for the user to see it 311562306a36Sopenharmony_ci */ 311662306a36Sopenharmony_ci filemap_write_and_wait(mapping); 311762306a36Sopenharmony_ci } 311862306a36Sopenharmony_ci 311962306a36Sopenharmony_ci ret = iomap_bmap(mapping, block, &ext4_iomap_ops); 312062306a36Sopenharmony_ci 312162306a36Sopenharmony_ciout: 312262306a36Sopenharmony_ci inode_unlock_shared(inode); 312362306a36Sopenharmony_ci return ret; 312462306a36Sopenharmony_ci} 312562306a36Sopenharmony_ci 312662306a36Sopenharmony_cistatic int ext4_read_folio(struct file *file, struct folio *folio) 312762306a36Sopenharmony_ci{ 312862306a36Sopenharmony_ci int ret = -EAGAIN; 312962306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 313062306a36Sopenharmony_ci 313162306a36Sopenharmony_ci trace_ext4_read_folio(inode, folio); 313262306a36Sopenharmony_ci 313362306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 313462306a36Sopenharmony_ci ret = ext4_readpage_inline(inode, folio); 313562306a36Sopenharmony_ci 313662306a36Sopenharmony_ci if (ret == -EAGAIN) 313762306a36Sopenharmony_ci return ext4_mpage_readpages(inode, NULL, folio); 313862306a36Sopenharmony_ci 313962306a36Sopenharmony_ci return ret; 314062306a36Sopenharmony_ci} 314162306a36Sopenharmony_ci 314262306a36Sopenharmony_cistatic void ext4_readahead(struct readahead_control *rac) 314362306a36Sopenharmony_ci{ 314462306a36Sopenharmony_ci struct inode *inode = rac->mapping->host; 314562306a36Sopenharmony_ci 314662306a36Sopenharmony_ci /* If the file has inline data, no need to do readahead. */ 314762306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 314862306a36Sopenharmony_ci return; 314962306a36Sopenharmony_ci 315062306a36Sopenharmony_ci ext4_mpage_readpages(inode, rac, NULL); 315162306a36Sopenharmony_ci} 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_cistatic void ext4_invalidate_folio(struct folio *folio, size_t offset, 315462306a36Sopenharmony_ci size_t length) 315562306a36Sopenharmony_ci{ 315662306a36Sopenharmony_ci trace_ext4_invalidate_folio(folio, offset, length); 315762306a36Sopenharmony_ci 315862306a36Sopenharmony_ci /* No journalling happens on data buffers when this function is used */ 315962306a36Sopenharmony_ci WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio))); 316062306a36Sopenharmony_ci 316162306a36Sopenharmony_ci block_invalidate_folio(folio, offset, length); 316262306a36Sopenharmony_ci} 316362306a36Sopenharmony_ci 316462306a36Sopenharmony_cistatic int __ext4_journalled_invalidate_folio(struct folio *folio, 316562306a36Sopenharmony_ci size_t offset, size_t length) 316662306a36Sopenharmony_ci{ 316762306a36Sopenharmony_ci journal_t *journal = EXT4_JOURNAL(folio->mapping->host); 316862306a36Sopenharmony_ci 316962306a36Sopenharmony_ci trace_ext4_journalled_invalidate_folio(folio, offset, length); 317062306a36Sopenharmony_ci 317162306a36Sopenharmony_ci /* 317262306a36Sopenharmony_ci * If it's a full truncate we just forget about the pending dirtying 317362306a36Sopenharmony_ci */ 317462306a36Sopenharmony_ci if (offset == 0 && length == folio_size(folio)) 317562306a36Sopenharmony_ci folio_clear_checked(folio); 317662306a36Sopenharmony_ci 317762306a36Sopenharmony_ci return jbd2_journal_invalidate_folio(journal, folio, offset, length); 317862306a36Sopenharmony_ci} 317962306a36Sopenharmony_ci 318062306a36Sopenharmony_ci/* Wrapper for aops... */ 318162306a36Sopenharmony_cistatic void ext4_journalled_invalidate_folio(struct folio *folio, 318262306a36Sopenharmony_ci size_t offset, 318362306a36Sopenharmony_ci size_t length) 318462306a36Sopenharmony_ci{ 318562306a36Sopenharmony_ci WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0); 318662306a36Sopenharmony_ci} 318762306a36Sopenharmony_ci 318862306a36Sopenharmony_cistatic bool ext4_release_folio(struct folio *folio, gfp_t wait) 318962306a36Sopenharmony_ci{ 319062306a36Sopenharmony_ci struct inode *inode = folio->mapping->host; 319162306a36Sopenharmony_ci journal_t *journal = EXT4_JOURNAL(inode); 319262306a36Sopenharmony_ci 319362306a36Sopenharmony_ci trace_ext4_release_folio(inode, folio); 319462306a36Sopenharmony_ci 319562306a36Sopenharmony_ci /* Page has dirty journalled data -> cannot release */ 319662306a36Sopenharmony_ci if (folio_test_checked(folio)) 319762306a36Sopenharmony_ci return false; 319862306a36Sopenharmony_ci if (journal) 319962306a36Sopenharmony_ci return jbd2_journal_try_to_free_buffers(journal, folio); 320062306a36Sopenharmony_ci else 320162306a36Sopenharmony_ci return try_to_free_buffers(folio); 320262306a36Sopenharmony_ci} 320362306a36Sopenharmony_ci 320462306a36Sopenharmony_cistatic bool ext4_inode_datasync_dirty(struct inode *inode) 320562306a36Sopenharmony_ci{ 320662306a36Sopenharmony_ci journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 320762306a36Sopenharmony_ci 320862306a36Sopenharmony_ci if (journal) { 320962306a36Sopenharmony_ci if (jbd2_transaction_committed(journal, 321062306a36Sopenharmony_ci EXT4_I(inode)->i_datasync_tid)) 321162306a36Sopenharmony_ci return false; 321262306a36Sopenharmony_ci if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT)) 321362306a36Sopenharmony_ci return !list_empty(&EXT4_I(inode)->i_fc_list); 321462306a36Sopenharmony_ci return true; 321562306a36Sopenharmony_ci } 321662306a36Sopenharmony_ci 321762306a36Sopenharmony_ci /* Any metadata buffers to write? */ 321862306a36Sopenharmony_ci if (!list_empty(&inode->i_mapping->private_list)) 321962306a36Sopenharmony_ci return true; 322062306a36Sopenharmony_ci return inode->i_state & I_DIRTY_DATASYNC; 322162306a36Sopenharmony_ci} 322262306a36Sopenharmony_ci 322362306a36Sopenharmony_cistatic void ext4_set_iomap(struct inode *inode, struct iomap *iomap, 322462306a36Sopenharmony_ci struct ext4_map_blocks *map, loff_t offset, 322562306a36Sopenharmony_ci loff_t length, unsigned int flags) 322662306a36Sopenharmony_ci{ 322762306a36Sopenharmony_ci u8 blkbits = inode->i_blkbits; 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci /* 323062306a36Sopenharmony_ci * Writes that span EOF might trigger an I/O size update on completion, 323162306a36Sopenharmony_ci * so consider them to be dirty for the purpose of O_DSYNC, even if 323262306a36Sopenharmony_ci * there is no other metadata changes being made or are pending. 323362306a36Sopenharmony_ci */ 323462306a36Sopenharmony_ci iomap->flags = 0; 323562306a36Sopenharmony_ci if (ext4_inode_datasync_dirty(inode) || 323662306a36Sopenharmony_ci offset + length > i_size_read(inode)) 323762306a36Sopenharmony_ci iomap->flags |= IOMAP_F_DIRTY; 323862306a36Sopenharmony_ci 323962306a36Sopenharmony_ci if (map->m_flags & EXT4_MAP_NEW) 324062306a36Sopenharmony_ci iomap->flags |= IOMAP_F_NEW; 324162306a36Sopenharmony_ci 324262306a36Sopenharmony_ci if (flags & IOMAP_DAX) 324362306a36Sopenharmony_ci iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev; 324462306a36Sopenharmony_ci else 324562306a36Sopenharmony_ci iomap->bdev = inode->i_sb->s_bdev; 324662306a36Sopenharmony_ci iomap->offset = (u64) map->m_lblk << blkbits; 324762306a36Sopenharmony_ci iomap->length = (u64) map->m_len << blkbits; 324862306a36Sopenharmony_ci 324962306a36Sopenharmony_ci if ((map->m_flags & EXT4_MAP_MAPPED) && 325062306a36Sopenharmony_ci !ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 325162306a36Sopenharmony_ci iomap->flags |= IOMAP_F_MERGED; 325262306a36Sopenharmony_ci 325362306a36Sopenharmony_ci /* 325462306a36Sopenharmony_ci * Flags passed to ext4_map_blocks() for direct I/O writes can result 325562306a36Sopenharmony_ci * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits 325662306a36Sopenharmony_ci * set. In order for any allocated unwritten extents to be converted 325762306a36Sopenharmony_ci * into written extents correctly within the ->end_io() handler, we 325862306a36Sopenharmony_ci * need to ensure that the iomap->type is set appropriately. Hence, the 325962306a36Sopenharmony_ci * reason why we need to check whether the EXT4_MAP_UNWRITTEN bit has 326062306a36Sopenharmony_ci * been set first. 326162306a36Sopenharmony_ci */ 326262306a36Sopenharmony_ci if (map->m_flags & EXT4_MAP_UNWRITTEN) { 326362306a36Sopenharmony_ci iomap->type = IOMAP_UNWRITTEN; 326462306a36Sopenharmony_ci iomap->addr = (u64) map->m_pblk << blkbits; 326562306a36Sopenharmony_ci if (flags & IOMAP_DAX) 326662306a36Sopenharmony_ci iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; 326762306a36Sopenharmony_ci } else if (map->m_flags & EXT4_MAP_MAPPED) { 326862306a36Sopenharmony_ci iomap->type = IOMAP_MAPPED; 326962306a36Sopenharmony_ci iomap->addr = (u64) map->m_pblk << blkbits; 327062306a36Sopenharmony_ci if (flags & IOMAP_DAX) 327162306a36Sopenharmony_ci iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off; 327262306a36Sopenharmony_ci } else { 327362306a36Sopenharmony_ci iomap->type = IOMAP_HOLE; 327462306a36Sopenharmony_ci iomap->addr = IOMAP_NULL_ADDR; 327562306a36Sopenharmony_ci } 327662306a36Sopenharmony_ci} 327762306a36Sopenharmony_ci 327862306a36Sopenharmony_cistatic int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map, 327962306a36Sopenharmony_ci unsigned int flags) 328062306a36Sopenharmony_ci{ 328162306a36Sopenharmony_ci handle_t *handle; 328262306a36Sopenharmony_ci u8 blkbits = inode->i_blkbits; 328362306a36Sopenharmony_ci int ret, dio_credits, m_flags = 0, retries = 0; 328462306a36Sopenharmony_ci 328562306a36Sopenharmony_ci /* 328662306a36Sopenharmony_ci * Trim the mapping request to the maximum value that we can map at 328762306a36Sopenharmony_ci * once for direct I/O. 328862306a36Sopenharmony_ci */ 328962306a36Sopenharmony_ci if (map->m_len > DIO_MAX_BLOCKS) 329062306a36Sopenharmony_ci map->m_len = DIO_MAX_BLOCKS; 329162306a36Sopenharmony_ci dio_credits = ext4_chunk_trans_blocks(inode, map->m_len); 329262306a36Sopenharmony_ci 329362306a36Sopenharmony_ciretry: 329462306a36Sopenharmony_ci /* 329562306a36Sopenharmony_ci * Either we allocate blocks and then don't get an unwritten extent, so 329662306a36Sopenharmony_ci * in that case we have reserved enough credits. Or, the blocks are 329762306a36Sopenharmony_ci * already allocated and unwritten. In that case, the extent conversion 329862306a36Sopenharmony_ci * fits into the credits as well. 329962306a36Sopenharmony_ci */ 330062306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); 330162306a36Sopenharmony_ci if (IS_ERR(handle)) 330262306a36Sopenharmony_ci return PTR_ERR(handle); 330362306a36Sopenharmony_ci 330462306a36Sopenharmony_ci /* 330562306a36Sopenharmony_ci * DAX and direct I/O are the only two operations that are currently 330662306a36Sopenharmony_ci * supported with IOMAP_WRITE. 330762306a36Sopenharmony_ci */ 330862306a36Sopenharmony_ci WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT))); 330962306a36Sopenharmony_ci if (flags & IOMAP_DAX) 331062306a36Sopenharmony_ci m_flags = EXT4_GET_BLOCKS_CREATE_ZERO; 331162306a36Sopenharmony_ci /* 331262306a36Sopenharmony_ci * We use i_size instead of i_disksize here because delalloc writeback 331362306a36Sopenharmony_ci * can complete at any point during the I/O and subsequently push the 331462306a36Sopenharmony_ci * i_disksize out to i_size. This could be beyond where direct I/O is 331562306a36Sopenharmony_ci * happening and thus expose allocated blocks to direct I/O reads. 331662306a36Sopenharmony_ci */ 331762306a36Sopenharmony_ci else if (((loff_t)map->m_lblk << blkbits) >= i_size_read(inode)) 331862306a36Sopenharmony_ci m_flags = EXT4_GET_BLOCKS_CREATE; 331962306a36Sopenharmony_ci else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 332062306a36Sopenharmony_ci m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT; 332162306a36Sopenharmony_ci 332262306a36Sopenharmony_ci ret = ext4_map_blocks(handle, inode, map, m_flags); 332362306a36Sopenharmony_ci 332462306a36Sopenharmony_ci /* 332562306a36Sopenharmony_ci * We cannot fill holes in indirect tree based inodes as that could 332662306a36Sopenharmony_ci * expose stale data in the case of a crash. Use the magic error code 332762306a36Sopenharmony_ci * to fallback to buffered I/O. 332862306a36Sopenharmony_ci */ 332962306a36Sopenharmony_ci if (!m_flags && !ret) 333062306a36Sopenharmony_ci ret = -ENOTBLK; 333162306a36Sopenharmony_ci 333262306a36Sopenharmony_ci ext4_journal_stop(handle); 333362306a36Sopenharmony_ci if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 333462306a36Sopenharmony_ci goto retry; 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_ci return ret; 333762306a36Sopenharmony_ci} 333862306a36Sopenharmony_ci 333962306a36Sopenharmony_ci 334062306a36Sopenharmony_cistatic int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 334162306a36Sopenharmony_ci unsigned flags, struct iomap *iomap, struct iomap *srcmap) 334262306a36Sopenharmony_ci{ 334362306a36Sopenharmony_ci int ret; 334462306a36Sopenharmony_ci struct ext4_map_blocks map; 334562306a36Sopenharmony_ci u8 blkbits = inode->i_blkbits; 334662306a36Sopenharmony_ci 334762306a36Sopenharmony_ci if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 334862306a36Sopenharmony_ci return -EINVAL; 334962306a36Sopenharmony_ci 335062306a36Sopenharmony_ci if (WARN_ON_ONCE(ext4_has_inline_data(inode))) 335162306a36Sopenharmony_ci return -ERANGE; 335262306a36Sopenharmony_ci 335362306a36Sopenharmony_ci /* 335462306a36Sopenharmony_ci * Calculate the first and last logical blocks respectively. 335562306a36Sopenharmony_ci */ 335662306a36Sopenharmony_ci map.m_lblk = offset >> blkbits; 335762306a36Sopenharmony_ci map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, 335862306a36Sopenharmony_ci EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; 335962306a36Sopenharmony_ci 336062306a36Sopenharmony_ci if (flags & IOMAP_WRITE) { 336162306a36Sopenharmony_ci /* 336262306a36Sopenharmony_ci * We check here if the blocks are already allocated, then we 336362306a36Sopenharmony_ci * don't need to start a journal txn and we can directly return 336462306a36Sopenharmony_ci * the mapping information. This could boost performance 336562306a36Sopenharmony_ci * especially in multi-threaded overwrite requests. 336662306a36Sopenharmony_ci */ 336762306a36Sopenharmony_ci if (offset + length <= i_size_read(inode)) { 336862306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 336962306a36Sopenharmony_ci if (ret > 0 && (map.m_flags & EXT4_MAP_MAPPED)) 337062306a36Sopenharmony_ci goto out; 337162306a36Sopenharmony_ci } 337262306a36Sopenharmony_ci ret = ext4_iomap_alloc(inode, &map, flags); 337362306a36Sopenharmony_ci } else { 337462306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 337562306a36Sopenharmony_ci } 337662306a36Sopenharmony_ci 337762306a36Sopenharmony_ci if (ret < 0) 337862306a36Sopenharmony_ci return ret; 337962306a36Sopenharmony_ciout: 338062306a36Sopenharmony_ci /* 338162306a36Sopenharmony_ci * When inline encryption is enabled, sometimes I/O to an encrypted file 338262306a36Sopenharmony_ci * has to be broken up to guarantee DUN contiguity. Handle this by 338362306a36Sopenharmony_ci * limiting the length of the mapping returned. 338462306a36Sopenharmony_ci */ 338562306a36Sopenharmony_ci map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); 338662306a36Sopenharmony_ci 338762306a36Sopenharmony_ci ext4_set_iomap(inode, iomap, &map, offset, length, flags); 338862306a36Sopenharmony_ci 338962306a36Sopenharmony_ci return 0; 339062306a36Sopenharmony_ci} 339162306a36Sopenharmony_ci 339262306a36Sopenharmony_cistatic int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset, 339362306a36Sopenharmony_ci loff_t length, unsigned flags, struct iomap *iomap, 339462306a36Sopenharmony_ci struct iomap *srcmap) 339562306a36Sopenharmony_ci{ 339662306a36Sopenharmony_ci int ret; 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci /* 339962306a36Sopenharmony_ci * Even for writes we don't need to allocate blocks, so just pretend 340062306a36Sopenharmony_ci * we are reading to save overhead of starting a transaction. 340162306a36Sopenharmony_ci */ 340262306a36Sopenharmony_ci flags &= ~IOMAP_WRITE; 340362306a36Sopenharmony_ci ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap); 340462306a36Sopenharmony_ci WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED); 340562306a36Sopenharmony_ci return ret; 340662306a36Sopenharmony_ci} 340762306a36Sopenharmony_ci 340862306a36Sopenharmony_cistatic int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, 340962306a36Sopenharmony_ci ssize_t written, unsigned flags, struct iomap *iomap) 341062306a36Sopenharmony_ci{ 341162306a36Sopenharmony_ci /* 341262306a36Sopenharmony_ci * Check to see whether an error occurred while writing out the data to 341362306a36Sopenharmony_ci * the allocated blocks. If so, return the magic error code so that we 341462306a36Sopenharmony_ci * fallback to buffered I/O and attempt to complete the remainder of 341562306a36Sopenharmony_ci * the I/O. Any blocks that may have been allocated in preparation for 341662306a36Sopenharmony_ci * the direct I/O will be reused during buffered I/O. 341762306a36Sopenharmony_ci */ 341862306a36Sopenharmony_ci if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0) 341962306a36Sopenharmony_ci return -ENOTBLK; 342062306a36Sopenharmony_ci 342162306a36Sopenharmony_ci return 0; 342262306a36Sopenharmony_ci} 342362306a36Sopenharmony_ci 342462306a36Sopenharmony_ciconst struct iomap_ops ext4_iomap_ops = { 342562306a36Sopenharmony_ci .iomap_begin = ext4_iomap_begin, 342662306a36Sopenharmony_ci .iomap_end = ext4_iomap_end, 342762306a36Sopenharmony_ci}; 342862306a36Sopenharmony_ci 342962306a36Sopenharmony_ciconst struct iomap_ops ext4_iomap_overwrite_ops = { 343062306a36Sopenharmony_ci .iomap_begin = ext4_iomap_overwrite_begin, 343162306a36Sopenharmony_ci .iomap_end = ext4_iomap_end, 343262306a36Sopenharmony_ci}; 343362306a36Sopenharmony_ci 343462306a36Sopenharmony_cistatic bool ext4_iomap_is_delalloc(struct inode *inode, 343562306a36Sopenharmony_ci struct ext4_map_blocks *map) 343662306a36Sopenharmony_ci{ 343762306a36Sopenharmony_ci struct extent_status es; 343862306a36Sopenharmony_ci ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1; 343962306a36Sopenharmony_ci 344062306a36Sopenharmony_ci ext4_es_find_extent_range(inode, &ext4_es_is_delayed, 344162306a36Sopenharmony_ci map->m_lblk, end, &es); 344262306a36Sopenharmony_ci 344362306a36Sopenharmony_ci if (!es.es_len || es.es_lblk > end) 344462306a36Sopenharmony_ci return false; 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_ci if (es.es_lblk > map->m_lblk) { 344762306a36Sopenharmony_ci map->m_len = es.es_lblk - map->m_lblk; 344862306a36Sopenharmony_ci return false; 344962306a36Sopenharmony_ci } 345062306a36Sopenharmony_ci 345162306a36Sopenharmony_ci offset = map->m_lblk - es.es_lblk; 345262306a36Sopenharmony_ci map->m_len = es.es_len - offset; 345362306a36Sopenharmony_ci 345462306a36Sopenharmony_ci return true; 345562306a36Sopenharmony_ci} 345662306a36Sopenharmony_ci 345762306a36Sopenharmony_cistatic int ext4_iomap_begin_report(struct inode *inode, loff_t offset, 345862306a36Sopenharmony_ci loff_t length, unsigned int flags, 345962306a36Sopenharmony_ci struct iomap *iomap, struct iomap *srcmap) 346062306a36Sopenharmony_ci{ 346162306a36Sopenharmony_ci int ret; 346262306a36Sopenharmony_ci bool delalloc = false; 346362306a36Sopenharmony_ci struct ext4_map_blocks map; 346462306a36Sopenharmony_ci u8 blkbits = inode->i_blkbits; 346562306a36Sopenharmony_ci 346662306a36Sopenharmony_ci if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK) 346762306a36Sopenharmony_ci return -EINVAL; 346862306a36Sopenharmony_ci 346962306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) { 347062306a36Sopenharmony_ci ret = ext4_inline_data_iomap(inode, iomap); 347162306a36Sopenharmony_ci if (ret != -EAGAIN) { 347262306a36Sopenharmony_ci if (ret == 0 && offset >= iomap->length) 347362306a36Sopenharmony_ci ret = -ENOENT; 347462306a36Sopenharmony_ci return ret; 347562306a36Sopenharmony_ci } 347662306a36Sopenharmony_ci } 347762306a36Sopenharmony_ci 347862306a36Sopenharmony_ci /* 347962306a36Sopenharmony_ci * Calculate the first and last logical block respectively. 348062306a36Sopenharmony_ci */ 348162306a36Sopenharmony_ci map.m_lblk = offset >> blkbits; 348262306a36Sopenharmony_ci map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits, 348362306a36Sopenharmony_ci EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1; 348462306a36Sopenharmony_ci 348562306a36Sopenharmony_ci /* 348662306a36Sopenharmony_ci * Fiemap callers may call for offset beyond s_bitmap_maxbytes. 348762306a36Sopenharmony_ci * So handle it here itself instead of querying ext4_map_blocks(). 348862306a36Sopenharmony_ci * Since ext4_map_blocks() will warn about it and will return 348962306a36Sopenharmony_ci * -EIO error. 349062306a36Sopenharmony_ci */ 349162306a36Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 349262306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 349362306a36Sopenharmony_ci 349462306a36Sopenharmony_ci if (offset >= sbi->s_bitmap_maxbytes) { 349562306a36Sopenharmony_ci map.m_flags = 0; 349662306a36Sopenharmony_ci goto set_iomap; 349762306a36Sopenharmony_ci } 349862306a36Sopenharmony_ci } 349962306a36Sopenharmony_ci 350062306a36Sopenharmony_ci ret = ext4_map_blocks(NULL, inode, &map, 0); 350162306a36Sopenharmony_ci if (ret < 0) 350262306a36Sopenharmony_ci return ret; 350362306a36Sopenharmony_ci if (ret == 0) 350462306a36Sopenharmony_ci delalloc = ext4_iomap_is_delalloc(inode, &map); 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ciset_iomap: 350762306a36Sopenharmony_ci ext4_set_iomap(inode, iomap, &map, offset, length, flags); 350862306a36Sopenharmony_ci if (delalloc && iomap->type == IOMAP_HOLE) 350962306a36Sopenharmony_ci iomap->type = IOMAP_DELALLOC; 351062306a36Sopenharmony_ci 351162306a36Sopenharmony_ci return 0; 351262306a36Sopenharmony_ci} 351362306a36Sopenharmony_ci 351462306a36Sopenharmony_ciconst struct iomap_ops ext4_iomap_report_ops = { 351562306a36Sopenharmony_ci .iomap_begin = ext4_iomap_begin_report, 351662306a36Sopenharmony_ci}; 351762306a36Sopenharmony_ci 351862306a36Sopenharmony_ci/* 351962306a36Sopenharmony_ci * For data=journal mode, folio should be marked dirty only when it was 352062306a36Sopenharmony_ci * writeably mapped. When that happens, it was already attached to the 352162306a36Sopenharmony_ci * transaction and marked as jbddirty (we take care of this in 352262306a36Sopenharmony_ci * ext4_page_mkwrite()). On transaction commit, we writeprotect page mappings 352362306a36Sopenharmony_ci * so we should have nothing to do here, except for the case when someone 352462306a36Sopenharmony_ci * had the page pinned and dirtied the page through this pin (e.g. by doing 352562306a36Sopenharmony_ci * direct IO to it). In that case we'd need to attach buffers here to the 352662306a36Sopenharmony_ci * transaction but we cannot due to lock ordering. We cannot just dirty the 352762306a36Sopenharmony_ci * folio and leave attached buffers clean, because the buffers' dirty state is 352862306a36Sopenharmony_ci * "definitive". We cannot just set the buffers dirty or jbddirty because all 352962306a36Sopenharmony_ci * the journalling code will explode. So what we do is to mark the folio 353062306a36Sopenharmony_ci * "pending dirty" and next time ext4_writepages() is called, attach buffers 353162306a36Sopenharmony_ci * to the transaction appropriately. 353262306a36Sopenharmony_ci */ 353362306a36Sopenharmony_cistatic bool ext4_journalled_dirty_folio(struct address_space *mapping, 353462306a36Sopenharmony_ci struct folio *folio) 353562306a36Sopenharmony_ci{ 353662306a36Sopenharmony_ci WARN_ON_ONCE(!folio_buffers(folio)); 353762306a36Sopenharmony_ci if (folio_maybe_dma_pinned(folio)) 353862306a36Sopenharmony_ci folio_set_checked(folio); 353962306a36Sopenharmony_ci return filemap_dirty_folio(mapping, folio); 354062306a36Sopenharmony_ci} 354162306a36Sopenharmony_ci 354262306a36Sopenharmony_cistatic bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio) 354362306a36Sopenharmony_ci{ 354462306a36Sopenharmony_ci WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio)); 354562306a36Sopenharmony_ci WARN_ON_ONCE(!folio_buffers(folio)); 354662306a36Sopenharmony_ci return block_dirty_folio(mapping, folio); 354762306a36Sopenharmony_ci} 354862306a36Sopenharmony_ci 354962306a36Sopenharmony_cistatic int ext4_iomap_swap_activate(struct swap_info_struct *sis, 355062306a36Sopenharmony_ci struct file *file, sector_t *span) 355162306a36Sopenharmony_ci{ 355262306a36Sopenharmony_ci return iomap_swapfile_activate(sis, file, span, 355362306a36Sopenharmony_ci &ext4_iomap_report_ops); 355462306a36Sopenharmony_ci} 355562306a36Sopenharmony_ci 355662306a36Sopenharmony_cistatic const struct address_space_operations ext4_aops = { 355762306a36Sopenharmony_ci .read_folio = ext4_read_folio, 355862306a36Sopenharmony_ci .readahead = ext4_readahead, 355962306a36Sopenharmony_ci .writepages = ext4_writepages, 356062306a36Sopenharmony_ci .write_begin = ext4_write_begin, 356162306a36Sopenharmony_ci .write_end = ext4_write_end, 356262306a36Sopenharmony_ci .dirty_folio = ext4_dirty_folio, 356362306a36Sopenharmony_ci .bmap = ext4_bmap, 356462306a36Sopenharmony_ci .invalidate_folio = ext4_invalidate_folio, 356562306a36Sopenharmony_ci .release_folio = ext4_release_folio, 356662306a36Sopenharmony_ci .direct_IO = noop_direct_IO, 356762306a36Sopenharmony_ci .migrate_folio = buffer_migrate_folio, 356862306a36Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 356962306a36Sopenharmony_ci .error_remove_page = generic_error_remove_page, 357062306a36Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 357162306a36Sopenharmony_ci}; 357262306a36Sopenharmony_ci 357362306a36Sopenharmony_cistatic const struct address_space_operations ext4_journalled_aops = { 357462306a36Sopenharmony_ci .read_folio = ext4_read_folio, 357562306a36Sopenharmony_ci .readahead = ext4_readahead, 357662306a36Sopenharmony_ci .writepages = ext4_writepages, 357762306a36Sopenharmony_ci .write_begin = ext4_write_begin, 357862306a36Sopenharmony_ci .write_end = ext4_journalled_write_end, 357962306a36Sopenharmony_ci .dirty_folio = ext4_journalled_dirty_folio, 358062306a36Sopenharmony_ci .bmap = ext4_bmap, 358162306a36Sopenharmony_ci .invalidate_folio = ext4_journalled_invalidate_folio, 358262306a36Sopenharmony_ci .release_folio = ext4_release_folio, 358362306a36Sopenharmony_ci .direct_IO = noop_direct_IO, 358462306a36Sopenharmony_ci .migrate_folio = buffer_migrate_folio_norefs, 358562306a36Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 358662306a36Sopenharmony_ci .error_remove_page = generic_error_remove_page, 358762306a36Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 358862306a36Sopenharmony_ci}; 358962306a36Sopenharmony_ci 359062306a36Sopenharmony_cistatic const struct address_space_operations ext4_da_aops = { 359162306a36Sopenharmony_ci .read_folio = ext4_read_folio, 359262306a36Sopenharmony_ci .readahead = ext4_readahead, 359362306a36Sopenharmony_ci .writepages = ext4_writepages, 359462306a36Sopenharmony_ci .write_begin = ext4_da_write_begin, 359562306a36Sopenharmony_ci .write_end = ext4_da_write_end, 359662306a36Sopenharmony_ci .dirty_folio = ext4_dirty_folio, 359762306a36Sopenharmony_ci .bmap = ext4_bmap, 359862306a36Sopenharmony_ci .invalidate_folio = ext4_invalidate_folio, 359962306a36Sopenharmony_ci .release_folio = ext4_release_folio, 360062306a36Sopenharmony_ci .direct_IO = noop_direct_IO, 360162306a36Sopenharmony_ci .migrate_folio = buffer_migrate_folio, 360262306a36Sopenharmony_ci .is_partially_uptodate = block_is_partially_uptodate, 360362306a36Sopenharmony_ci .error_remove_page = generic_error_remove_page, 360462306a36Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 360562306a36Sopenharmony_ci}; 360662306a36Sopenharmony_ci 360762306a36Sopenharmony_cistatic const struct address_space_operations ext4_dax_aops = { 360862306a36Sopenharmony_ci .writepages = ext4_dax_writepages, 360962306a36Sopenharmony_ci .direct_IO = noop_direct_IO, 361062306a36Sopenharmony_ci .dirty_folio = noop_dirty_folio, 361162306a36Sopenharmony_ci .bmap = ext4_bmap, 361262306a36Sopenharmony_ci .swap_activate = ext4_iomap_swap_activate, 361362306a36Sopenharmony_ci}; 361462306a36Sopenharmony_ci 361562306a36Sopenharmony_civoid ext4_set_aops(struct inode *inode) 361662306a36Sopenharmony_ci{ 361762306a36Sopenharmony_ci switch (ext4_inode_journal_mode(inode)) { 361862306a36Sopenharmony_ci case EXT4_INODE_ORDERED_DATA_MODE: 361962306a36Sopenharmony_ci case EXT4_INODE_WRITEBACK_DATA_MODE: 362062306a36Sopenharmony_ci break; 362162306a36Sopenharmony_ci case EXT4_INODE_JOURNAL_DATA_MODE: 362262306a36Sopenharmony_ci inode->i_mapping->a_ops = &ext4_journalled_aops; 362362306a36Sopenharmony_ci return; 362462306a36Sopenharmony_ci default: 362562306a36Sopenharmony_ci BUG(); 362662306a36Sopenharmony_ci } 362762306a36Sopenharmony_ci if (IS_DAX(inode)) 362862306a36Sopenharmony_ci inode->i_mapping->a_ops = &ext4_dax_aops; 362962306a36Sopenharmony_ci else if (test_opt(inode->i_sb, DELALLOC)) 363062306a36Sopenharmony_ci inode->i_mapping->a_ops = &ext4_da_aops; 363162306a36Sopenharmony_ci else 363262306a36Sopenharmony_ci inode->i_mapping->a_ops = &ext4_aops; 363362306a36Sopenharmony_ci} 363462306a36Sopenharmony_ci 363562306a36Sopenharmony_cistatic int __ext4_block_zero_page_range(handle_t *handle, 363662306a36Sopenharmony_ci struct address_space *mapping, loff_t from, loff_t length) 363762306a36Sopenharmony_ci{ 363862306a36Sopenharmony_ci ext4_fsblk_t index = from >> PAGE_SHIFT; 363962306a36Sopenharmony_ci unsigned offset = from & (PAGE_SIZE-1); 364062306a36Sopenharmony_ci unsigned blocksize, pos; 364162306a36Sopenharmony_ci ext4_lblk_t iblock; 364262306a36Sopenharmony_ci struct inode *inode = mapping->host; 364362306a36Sopenharmony_ci struct buffer_head *bh; 364462306a36Sopenharmony_ci struct folio *folio; 364562306a36Sopenharmony_ci int err = 0; 364662306a36Sopenharmony_ci 364762306a36Sopenharmony_ci folio = __filemap_get_folio(mapping, from >> PAGE_SHIFT, 364862306a36Sopenharmony_ci FGP_LOCK | FGP_ACCESSED | FGP_CREAT, 364962306a36Sopenharmony_ci mapping_gfp_constraint(mapping, ~__GFP_FS)); 365062306a36Sopenharmony_ci if (IS_ERR(folio)) 365162306a36Sopenharmony_ci return PTR_ERR(folio); 365262306a36Sopenharmony_ci 365362306a36Sopenharmony_ci blocksize = inode->i_sb->s_blocksize; 365462306a36Sopenharmony_ci 365562306a36Sopenharmony_ci iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits); 365662306a36Sopenharmony_ci 365762306a36Sopenharmony_ci bh = folio_buffers(folio); 365862306a36Sopenharmony_ci if (!bh) { 365962306a36Sopenharmony_ci create_empty_buffers(&folio->page, blocksize, 0); 366062306a36Sopenharmony_ci bh = folio_buffers(folio); 366162306a36Sopenharmony_ci } 366262306a36Sopenharmony_ci 366362306a36Sopenharmony_ci /* Find the buffer that contains "offset" */ 366462306a36Sopenharmony_ci pos = blocksize; 366562306a36Sopenharmony_ci while (offset >= pos) { 366662306a36Sopenharmony_ci bh = bh->b_this_page; 366762306a36Sopenharmony_ci iblock++; 366862306a36Sopenharmony_ci pos += blocksize; 366962306a36Sopenharmony_ci } 367062306a36Sopenharmony_ci if (buffer_freed(bh)) { 367162306a36Sopenharmony_ci BUFFER_TRACE(bh, "freed: skip"); 367262306a36Sopenharmony_ci goto unlock; 367362306a36Sopenharmony_ci } 367462306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 367562306a36Sopenharmony_ci BUFFER_TRACE(bh, "unmapped"); 367662306a36Sopenharmony_ci ext4_get_block(inode, iblock, bh, 0); 367762306a36Sopenharmony_ci /* unmapped? It's a hole - nothing to do */ 367862306a36Sopenharmony_ci if (!buffer_mapped(bh)) { 367962306a36Sopenharmony_ci BUFFER_TRACE(bh, "still unmapped"); 368062306a36Sopenharmony_ci goto unlock; 368162306a36Sopenharmony_ci } 368262306a36Sopenharmony_ci } 368362306a36Sopenharmony_ci 368462306a36Sopenharmony_ci /* Ok, it's mapped. Make sure it's up-to-date */ 368562306a36Sopenharmony_ci if (folio_test_uptodate(folio)) 368662306a36Sopenharmony_ci set_buffer_uptodate(bh); 368762306a36Sopenharmony_ci 368862306a36Sopenharmony_ci if (!buffer_uptodate(bh)) { 368962306a36Sopenharmony_ci err = ext4_read_bh_lock(bh, 0, true); 369062306a36Sopenharmony_ci if (err) 369162306a36Sopenharmony_ci goto unlock; 369262306a36Sopenharmony_ci if (fscrypt_inode_uses_fs_layer_crypto(inode)) { 369362306a36Sopenharmony_ci /* We expect the key to be set. */ 369462306a36Sopenharmony_ci BUG_ON(!fscrypt_has_encryption_key(inode)); 369562306a36Sopenharmony_ci err = fscrypt_decrypt_pagecache_blocks(folio, 369662306a36Sopenharmony_ci blocksize, 369762306a36Sopenharmony_ci bh_offset(bh)); 369862306a36Sopenharmony_ci if (err) { 369962306a36Sopenharmony_ci clear_buffer_uptodate(bh); 370062306a36Sopenharmony_ci goto unlock; 370162306a36Sopenharmony_ci } 370262306a36Sopenharmony_ci } 370362306a36Sopenharmony_ci } 370462306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) { 370562306a36Sopenharmony_ci BUFFER_TRACE(bh, "get write access"); 370662306a36Sopenharmony_ci err = ext4_journal_get_write_access(handle, inode->i_sb, bh, 370762306a36Sopenharmony_ci EXT4_JTR_NONE); 370862306a36Sopenharmony_ci if (err) 370962306a36Sopenharmony_ci goto unlock; 371062306a36Sopenharmony_ci } 371162306a36Sopenharmony_ci folio_zero_range(folio, offset, length); 371262306a36Sopenharmony_ci BUFFER_TRACE(bh, "zeroed end of block"); 371362306a36Sopenharmony_ci 371462306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) { 371562306a36Sopenharmony_ci err = ext4_dirty_journalled_data(handle, bh); 371662306a36Sopenharmony_ci } else { 371762306a36Sopenharmony_ci err = 0; 371862306a36Sopenharmony_ci mark_buffer_dirty(bh); 371962306a36Sopenharmony_ci if (ext4_should_order_data(inode)) 372062306a36Sopenharmony_ci err = ext4_jbd2_inode_add_write(handle, inode, from, 372162306a36Sopenharmony_ci length); 372262306a36Sopenharmony_ci } 372362306a36Sopenharmony_ci 372462306a36Sopenharmony_ciunlock: 372562306a36Sopenharmony_ci folio_unlock(folio); 372662306a36Sopenharmony_ci folio_put(folio); 372762306a36Sopenharmony_ci return err; 372862306a36Sopenharmony_ci} 372962306a36Sopenharmony_ci 373062306a36Sopenharmony_ci/* 373162306a36Sopenharmony_ci * ext4_block_zero_page_range() zeros out a mapping of length 'length' 373262306a36Sopenharmony_ci * starting from file offset 'from'. The range to be zero'd must 373362306a36Sopenharmony_ci * be contained with in one block. If the specified range exceeds 373462306a36Sopenharmony_ci * the end of the block it will be shortened to end of the block 373562306a36Sopenharmony_ci * that corresponds to 'from' 373662306a36Sopenharmony_ci */ 373762306a36Sopenharmony_cistatic int ext4_block_zero_page_range(handle_t *handle, 373862306a36Sopenharmony_ci struct address_space *mapping, loff_t from, loff_t length) 373962306a36Sopenharmony_ci{ 374062306a36Sopenharmony_ci struct inode *inode = mapping->host; 374162306a36Sopenharmony_ci unsigned offset = from & (PAGE_SIZE-1); 374262306a36Sopenharmony_ci unsigned blocksize = inode->i_sb->s_blocksize; 374362306a36Sopenharmony_ci unsigned max = blocksize - (offset & (blocksize - 1)); 374462306a36Sopenharmony_ci 374562306a36Sopenharmony_ci /* 374662306a36Sopenharmony_ci * correct length if it does not fall between 374762306a36Sopenharmony_ci * 'from' and the end of the block 374862306a36Sopenharmony_ci */ 374962306a36Sopenharmony_ci if (length > max || length < 0) 375062306a36Sopenharmony_ci length = max; 375162306a36Sopenharmony_ci 375262306a36Sopenharmony_ci if (IS_DAX(inode)) { 375362306a36Sopenharmony_ci return dax_zero_range(inode, from, length, NULL, 375462306a36Sopenharmony_ci &ext4_iomap_ops); 375562306a36Sopenharmony_ci } 375662306a36Sopenharmony_ci return __ext4_block_zero_page_range(handle, mapping, from, length); 375762306a36Sopenharmony_ci} 375862306a36Sopenharmony_ci 375962306a36Sopenharmony_ci/* 376062306a36Sopenharmony_ci * ext4_block_truncate_page() zeroes out a mapping from file offset `from' 376162306a36Sopenharmony_ci * up to the end of the block which corresponds to `from'. 376262306a36Sopenharmony_ci * This required during truncate. We need to physically zero the tail end 376362306a36Sopenharmony_ci * of that block so it doesn't yield old data if the file is later grown. 376462306a36Sopenharmony_ci */ 376562306a36Sopenharmony_cistatic int ext4_block_truncate_page(handle_t *handle, 376662306a36Sopenharmony_ci struct address_space *mapping, loff_t from) 376762306a36Sopenharmony_ci{ 376862306a36Sopenharmony_ci unsigned offset = from & (PAGE_SIZE-1); 376962306a36Sopenharmony_ci unsigned length; 377062306a36Sopenharmony_ci unsigned blocksize; 377162306a36Sopenharmony_ci struct inode *inode = mapping->host; 377262306a36Sopenharmony_ci 377362306a36Sopenharmony_ci /* If we are processing an encrypted inode during orphan list handling */ 377462306a36Sopenharmony_ci if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode)) 377562306a36Sopenharmony_ci return 0; 377662306a36Sopenharmony_ci 377762306a36Sopenharmony_ci blocksize = inode->i_sb->s_blocksize; 377862306a36Sopenharmony_ci length = blocksize - (offset & (blocksize - 1)); 377962306a36Sopenharmony_ci 378062306a36Sopenharmony_ci return ext4_block_zero_page_range(handle, mapping, from, length); 378162306a36Sopenharmony_ci} 378262306a36Sopenharmony_ci 378362306a36Sopenharmony_ciint ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 378462306a36Sopenharmony_ci loff_t lstart, loff_t length) 378562306a36Sopenharmony_ci{ 378662306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 378762306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 378862306a36Sopenharmony_ci unsigned partial_start, partial_end; 378962306a36Sopenharmony_ci ext4_fsblk_t start, end; 379062306a36Sopenharmony_ci loff_t byte_end = (lstart + length - 1); 379162306a36Sopenharmony_ci int err = 0; 379262306a36Sopenharmony_ci 379362306a36Sopenharmony_ci partial_start = lstart & (sb->s_blocksize - 1); 379462306a36Sopenharmony_ci partial_end = byte_end & (sb->s_blocksize - 1); 379562306a36Sopenharmony_ci 379662306a36Sopenharmony_ci start = lstart >> sb->s_blocksize_bits; 379762306a36Sopenharmony_ci end = byte_end >> sb->s_blocksize_bits; 379862306a36Sopenharmony_ci 379962306a36Sopenharmony_ci /* Handle partial zero within the single block */ 380062306a36Sopenharmony_ci if (start == end && 380162306a36Sopenharmony_ci (partial_start || (partial_end != sb->s_blocksize - 1))) { 380262306a36Sopenharmony_ci err = ext4_block_zero_page_range(handle, mapping, 380362306a36Sopenharmony_ci lstart, length); 380462306a36Sopenharmony_ci return err; 380562306a36Sopenharmony_ci } 380662306a36Sopenharmony_ci /* Handle partial zero out on the start of the range */ 380762306a36Sopenharmony_ci if (partial_start) { 380862306a36Sopenharmony_ci err = ext4_block_zero_page_range(handle, mapping, 380962306a36Sopenharmony_ci lstart, sb->s_blocksize); 381062306a36Sopenharmony_ci if (err) 381162306a36Sopenharmony_ci return err; 381262306a36Sopenharmony_ci } 381362306a36Sopenharmony_ci /* Handle partial zero out on the end of the range */ 381462306a36Sopenharmony_ci if (partial_end != sb->s_blocksize - 1) 381562306a36Sopenharmony_ci err = ext4_block_zero_page_range(handle, mapping, 381662306a36Sopenharmony_ci byte_end - partial_end, 381762306a36Sopenharmony_ci partial_end + 1); 381862306a36Sopenharmony_ci return err; 381962306a36Sopenharmony_ci} 382062306a36Sopenharmony_ci 382162306a36Sopenharmony_ciint ext4_can_truncate(struct inode *inode) 382262306a36Sopenharmony_ci{ 382362306a36Sopenharmony_ci if (S_ISREG(inode->i_mode)) 382462306a36Sopenharmony_ci return 1; 382562306a36Sopenharmony_ci if (S_ISDIR(inode->i_mode)) 382662306a36Sopenharmony_ci return 1; 382762306a36Sopenharmony_ci if (S_ISLNK(inode->i_mode)) 382862306a36Sopenharmony_ci return !ext4_inode_is_fast_symlink(inode); 382962306a36Sopenharmony_ci return 0; 383062306a36Sopenharmony_ci} 383162306a36Sopenharmony_ci 383262306a36Sopenharmony_ci/* 383362306a36Sopenharmony_ci * We have to make sure i_disksize gets properly updated before we truncate 383462306a36Sopenharmony_ci * page cache due to hole punching or zero range. Otherwise i_disksize update 383562306a36Sopenharmony_ci * can get lost as it may have been postponed to submission of writeback but 383662306a36Sopenharmony_ci * that will never happen after we truncate page cache. 383762306a36Sopenharmony_ci */ 383862306a36Sopenharmony_ciint ext4_update_disksize_before_punch(struct inode *inode, loff_t offset, 383962306a36Sopenharmony_ci loff_t len) 384062306a36Sopenharmony_ci{ 384162306a36Sopenharmony_ci handle_t *handle; 384262306a36Sopenharmony_ci int ret; 384362306a36Sopenharmony_ci 384462306a36Sopenharmony_ci loff_t size = i_size_read(inode); 384562306a36Sopenharmony_ci 384662306a36Sopenharmony_ci WARN_ON(!inode_is_locked(inode)); 384762306a36Sopenharmony_ci if (offset > size || offset + len < size) 384862306a36Sopenharmony_ci return 0; 384962306a36Sopenharmony_ci 385062306a36Sopenharmony_ci if (EXT4_I(inode)->i_disksize >= size) 385162306a36Sopenharmony_ci return 0; 385262306a36Sopenharmony_ci 385362306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_MISC, 1); 385462306a36Sopenharmony_ci if (IS_ERR(handle)) 385562306a36Sopenharmony_ci return PTR_ERR(handle); 385662306a36Sopenharmony_ci ext4_update_i_disksize(inode, size); 385762306a36Sopenharmony_ci ret = ext4_mark_inode_dirty(handle, inode); 385862306a36Sopenharmony_ci ext4_journal_stop(handle); 385962306a36Sopenharmony_ci 386062306a36Sopenharmony_ci return ret; 386162306a36Sopenharmony_ci} 386262306a36Sopenharmony_ci 386362306a36Sopenharmony_cistatic void ext4_wait_dax_page(struct inode *inode) 386462306a36Sopenharmony_ci{ 386562306a36Sopenharmony_ci filemap_invalidate_unlock(inode->i_mapping); 386662306a36Sopenharmony_ci schedule(); 386762306a36Sopenharmony_ci filemap_invalidate_lock(inode->i_mapping); 386862306a36Sopenharmony_ci} 386962306a36Sopenharmony_ci 387062306a36Sopenharmony_ciint ext4_break_layouts(struct inode *inode) 387162306a36Sopenharmony_ci{ 387262306a36Sopenharmony_ci struct page *page; 387362306a36Sopenharmony_ci int error; 387462306a36Sopenharmony_ci 387562306a36Sopenharmony_ci if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock))) 387662306a36Sopenharmony_ci return -EINVAL; 387762306a36Sopenharmony_ci 387862306a36Sopenharmony_ci do { 387962306a36Sopenharmony_ci page = dax_layout_busy_page(inode->i_mapping); 388062306a36Sopenharmony_ci if (!page) 388162306a36Sopenharmony_ci return 0; 388262306a36Sopenharmony_ci 388362306a36Sopenharmony_ci error = ___wait_var_event(&page->_refcount, 388462306a36Sopenharmony_ci atomic_read(&page->_refcount) == 1, 388562306a36Sopenharmony_ci TASK_INTERRUPTIBLE, 0, 0, 388662306a36Sopenharmony_ci ext4_wait_dax_page(inode)); 388762306a36Sopenharmony_ci } while (error == 0); 388862306a36Sopenharmony_ci 388962306a36Sopenharmony_ci return error; 389062306a36Sopenharmony_ci} 389162306a36Sopenharmony_ci 389262306a36Sopenharmony_ci/* 389362306a36Sopenharmony_ci * ext4_punch_hole: punches a hole in a file by releasing the blocks 389462306a36Sopenharmony_ci * associated with the given offset and length 389562306a36Sopenharmony_ci * 389662306a36Sopenharmony_ci * @inode: File inode 389762306a36Sopenharmony_ci * @offset: The offset where the hole will begin 389862306a36Sopenharmony_ci * @len: The length of the hole 389962306a36Sopenharmony_ci * 390062306a36Sopenharmony_ci * Returns: 0 on success or negative on failure 390162306a36Sopenharmony_ci */ 390262306a36Sopenharmony_ci 390362306a36Sopenharmony_ciint ext4_punch_hole(struct file *file, loff_t offset, loff_t length) 390462306a36Sopenharmony_ci{ 390562306a36Sopenharmony_ci struct inode *inode = file_inode(file); 390662306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 390762306a36Sopenharmony_ci ext4_lblk_t first_block, stop_block; 390862306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 390962306a36Sopenharmony_ci loff_t first_block_offset, last_block_offset, max_length; 391062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 391162306a36Sopenharmony_ci handle_t *handle; 391262306a36Sopenharmony_ci unsigned int credits; 391362306a36Sopenharmony_ci int ret = 0, ret2 = 0; 391462306a36Sopenharmony_ci 391562306a36Sopenharmony_ci trace_ext4_punch_hole(inode, offset, length, 0); 391662306a36Sopenharmony_ci 391762306a36Sopenharmony_ci /* 391862306a36Sopenharmony_ci * Write out all dirty pages to avoid race conditions 391962306a36Sopenharmony_ci * Then release them. 392062306a36Sopenharmony_ci */ 392162306a36Sopenharmony_ci if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { 392262306a36Sopenharmony_ci ret = filemap_write_and_wait_range(mapping, offset, 392362306a36Sopenharmony_ci offset + length - 1); 392462306a36Sopenharmony_ci if (ret) 392562306a36Sopenharmony_ci return ret; 392662306a36Sopenharmony_ci } 392762306a36Sopenharmony_ci 392862306a36Sopenharmony_ci inode_lock(inode); 392962306a36Sopenharmony_ci 393062306a36Sopenharmony_ci /* No need to punch hole beyond i_size */ 393162306a36Sopenharmony_ci if (offset >= inode->i_size) 393262306a36Sopenharmony_ci goto out_mutex; 393362306a36Sopenharmony_ci 393462306a36Sopenharmony_ci /* 393562306a36Sopenharmony_ci * If the hole extends beyond i_size, set the hole 393662306a36Sopenharmony_ci * to end after the page that contains i_size 393762306a36Sopenharmony_ci */ 393862306a36Sopenharmony_ci if (offset + length > inode->i_size) { 393962306a36Sopenharmony_ci length = inode->i_size + 394062306a36Sopenharmony_ci PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) - 394162306a36Sopenharmony_ci offset; 394262306a36Sopenharmony_ci } 394362306a36Sopenharmony_ci 394462306a36Sopenharmony_ci /* 394562306a36Sopenharmony_ci * For punch hole the length + offset needs to be within one block 394662306a36Sopenharmony_ci * before last range. Adjust the length if it goes beyond that limit. 394762306a36Sopenharmony_ci */ 394862306a36Sopenharmony_ci max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize; 394962306a36Sopenharmony_ci if (offset + length > max_length) 395062306a36Sopenharmony_ci length = max_length - offset; 395162306a36Sopenharmony_ci 395262306a36Sopenharmony_ci if (offset & (sb->s_blocksize - 1) || 395362306a36Sopenharmony_ci (offset + length) & (sb->s_blocksize - 1)) { 395462306a36Sopenharmony_ci /* 395562306a36Sopenharmony_ci * Attach jinode to inode for jbd2 if we do any zeroing of 395662306a36Sopenharmony_ci * partial block 395762306a36Sopenharmony_ci */ 395862306a36Sopenharmony_ci ret = ext4_inode_attach_jinode(inode); 395962306a36Sopenharmony_ci if (ret < 0) 396062306a36Sopenharmony_ci goto out_mutex; 396162306a36Sopenharmony_ci 396262306a36Sopenharmony_ci } 396362306a36Sopenharmony_ci 396462306a36Sopenharmony_ci /* Wait all existing dio workers, newcomers will block on i_rwsem */ 396562306a36Sopenharmony_ci inode_dio_wait(inode); 396662306a36Sopenharmony_ci 396762306a36Sopenharmony_ci ret = file_modified(file); 396862306a36Sopenharmony_ci if (ret) 396962306a36Sopenharmony_ci goto out_mutex; 397062306a36Sopenharmony_ci 397162306a36Sopenharmony_ci /* 397262306a36Sopenharmony_ci * Prevent page faults from reinstantiating pages we have released from 397362306a36Sopenharmony_ci * page cache. 397462306a36Sopenharmony_ci */ 397562306a36Sopenharmony_ci filemap_invalidate_lock(mapping); 397662306a36Sopenharmony_ci 397762306a36Sopenharmony_ci ret = ext4_break_layouts(inode); 397862306a36Sopenharmony_ci if (ret) 397962306a36Sopenharmony_ci goto out_dio; 398062306a36Sopenharmony_ci 398162306a36Sopenharmony_ci first_block_offset = round_up(offset, sb->s_blocksize); 398262306a36Sopenharmony_ci last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; 398362306a36Sopenharmony_ci 398462306a36Sopenharmony_ci /* Now release the pages and zero block aligned part of pages*/ 398562306a36Sopenharmony_ci if (last_block_offset > first_block_offset) { 398662306a36Sopenharmony_ci ret = ext4_update_disksize_before_punch(inode, offset, length); 398762306a36Sopenharmony_ci if (ret) 398862306a36Sopenharmony_ci goto out_dio; 398962306a36Sopenharmony_ci truncate_pagecache_range(inode, first_block_offset, 399062306a36Sopenharmony_ci last_block_offset); 399162306a36Sopenharmony_ci } 399262306a36Sopenharmony_ci 399362306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 399462306a36Sopenharmony_ci credits = ext4_writepage_trans_blocks(inode); 399562306a36Sopenharmony_ci else 399662306a36Sopenharmony_ci credits = ext4_blocks_for_truncate(inode); 399762306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 399862306a36Sopenharmony_ci if (IS_ERR(handle)) { 399962306a36Sopenharmony_ci ret = PTR_ERR(handle); 400062306a36Sopenharmony_ci ext4_std_error(sb, ret); 400162306a36Sopenharmony_ci goto out_dio; 400262306a36Sopenharmony_ci } 400362306a36Sopenharmony_ci 400462306a36Sopenharmony_ci ret = ext4_zero_partial_blocks(handle, inode, offset, 400562306a36Sopenharmony_ci length); 400662306a36Sopenharmony_ci if (ret) 400762306a36Sopenharmony_ci goto out_stop; 400862306a36Sopenharmony_ci 400962306a36Sopenharmony_ci first_block = (offset + sb->s_blocksize - 1) >> 401062306a36Sopenharmony_ci EXT4_BLOCK_SIZE_BITS(sb); 401162306a36Sopenharmony_ci stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb); 401262306a36Sopenharmony_ci 401362306a36Sopenharmony_ci /* If there are blocks to remove, do it */ 401462306a36Sopenharmony_ci if (stop_block > first_block) { 401562306a36Sopenharmony_ci 401662306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 401762306a36Sopenharmony_ci ext4_discard_preallocations(inode, 0); 401862306a36Sopenharmony_ci 401962306a36Sopenharmony_ci ext4_es_remove_extent(inode, first_block, 402062306a36Sopenharmony_ci stop_block - first_block); 402162306a36Sopenharmony_ci 402262306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 402362306a36Sopenharmony_ci ret = ext4_ext_remove_space(inode, first_block, 402462306a36Sopenharmony_ci stop_block - 1); 402562306a36Sopenharmony_ci else 402662306a36Sopenharmony_ci ret = ext4_ind_remove_space(handle, inode, first_block, 402762306a36Sopenharmony_ci stop_block); 402862306a36Sopenharmony_ci 402962306a36Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 403062306a36Sopenharmony_ci } 403162306a36Sopenharmony_ci ext4_fc_track_range(handle, inode, first_block, stop_block); 403262306a36Sopenharmony_ci if (IS_SYNC(inode)) 403362306a36Sopenharmony_ci ext4_handle_sync(handle); 403462306a36Sopenharmony_ci 403562306a36Sopenharmony_ci inode->i_mtime = inode_set_ctime_current(inode); 403662306a36Sopenharmony_ci ret2 = ext4_mark_inode_dirty(handle, inode); 403762306a36Sopenharmony_ci if (unlikely(ret2)) 403862306a36Sopenharmony_ci ret = ret2; 403962306a36Sopenharmony_ci if (ret >= 0) 404062306a36Sopenharmony_ci ext4_update_inode_fsync_trans(handle, inode, 1); 404162306a36Sopenharmony_ciout_stop: 404262306a36Sopenharmony_ci ext4_journal_stop(handle); 404362306a36Sopenharmony_ciout_dio: 404462306a36Sopenharmony_ci filemap_invalidate_unlock(mapping); 404562306a36Sopenharmony_ciout_mutex: 404662306a36Sopenharmony_ci inode_unlock(inode); 404762306a36Sopenharmony_ci return ret; 404862306a36Sopenharmony_ci} 404962306a36Sopenharmony_ci 405062306a36Sopenharmony_ciint ext4_inode_attach_jinode(struct inode *inode) 405162306a36Sopenharmony_ci{ 405262306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 405362306a36Sopenharmony_ci struct jbd2_inode *jinode; 405462306a36Sopenharmony_ci 405562306a36Sopenharmony_ci if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal) 405662306a36Sopenharmony_ci return 0; 405762306a36Sopenharmony_ci 405862306a36Sopenharmony_ci jinode = jbd2_alloc_inode(GFP_KERNEL); 405962306a36Sopenharmony_ci spin_lock(&inode->i_lock); 406062306a36Sopenharmony_ci if (!ei->jinode) { 406162306a36Sopenharmony_ci if (!jinode) { 406262306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 406362306a36Sopenharmony_ci return -ENOMEM; 406462306a36Sopenharmony_ci } 406562306a36Sopenharmony_ci ei->jinode = jinode; 406662306a36Sopenharmony_ci jbd2_journal_init_jbd_inode(ei->jinode, inode); 406762306a36Sopenharmony_ci jinode = NULL; 406862306a36Sopenharmony_ci } 406962306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 407062306a36Sopenharmony_ci if (unlikely(jinode != NULL)) 407162306a36Sopenharmony_ci jbd2_free_inode(jinode); 407262306a36Sopenharmony_ci return 0; 407362306a36Sopenharmony_ci} 407462306a36Sopenharmony_ci 407562306a36Sopenharmony_ci/* 407662306a36Sopenharmony_ci * ext4_truncate() 407762306a36Sopenharmony_ci * 407862306a36Sopenharmony_ci * We block out ext4_get_block() block instantiations across the entire 407962306a36Sopenharmony_ci * transaction, and VFS/VM ensures that ext4_truncate() cannot run 408062306a36Sopenharmony_ci * simultaneously on behalf of the same inode. 408162306a36Sopenharmony_ci * 408262306a36Sopenharmony_ci * As we work through the truncate and commit bits of it to the journal there 408362306a36Sopenharmony_ci * is one core, guiding principle: the file's tree must always be consistent on 408462306a36Sopenharmony_ci * disk. We must be able to restart the truncate after a crash. 408562306a36Sopenharmony_ci * 408662306a36Sopenharmony_ci * The file's tree may be transiently inconsistent in memory (although it 408762306a36Sopenharmony_ci * probably isn't), but whenever we close off and commit a journal transaction, 408862306a36Sopenharmony_ci * the contents of (the filesystem + the journal) must be consistent and 408962306a36Sopenharmony_ci * restartable. It's pretty simple, really: bottom up, right to left (although 409062306a36Sopenharmony_ci * left-to-right works OK too). 409162306a36Sopenharmony_ci * 409262306a36Sopenharmony_ci * Note that at recovery time, journal replay occurs *before* the restart of 409362306a36Sopenharmony_ci * truncate against the orphan inode list. 409462306a36Sopenharmony_ci * 409562306a36Sopenharmony_ci * The committed inode has the new, desired i_size (which is the same as 409662306a36Sopenharmony_ci * i_disksize in this case). After a crash, ext4_orphan_cleanup() will see 409762306a36Sopenharmony_ci * that this inode's truncate did not complete and it will again call 409862306a36Sopenharmony_ci * ext4_truncate() to have another go. So there will be instantiated blocks 409962306a36Sopenharmony_ci * to the right of the truncation point in a crashed ext4 filesystem. But 410062306a36Sopenharmony_ci * that's fine - as long as they are linked from the inode, the post-crash 410162306a36Sopenharmony_ci * ext4_truncate() run will find them and release them. 410262306a36Sopenharmony_ci */ 410362306a36Sopenharmony_ciint ext4_truncate(struct inode *inode) 410462306a36Sopenharmony_ci{ 410562306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 410662306a36Sopenharmony_ci unsigned int credits; 410762306a36Sopenharmony_ci int err = 0, err2; 410862306a36Sopenharmony_ci handle_t *handle; 410962306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 411062306a36Sopenharmony_ci 411162306a36Sopenharmony_ci /* 411262306a36Sopenharmony_ci * There is a possibility that we're either freeing the inode 411362306a36Sopenharmony_ci * or it's a completely new inode. In those cases we might not 411462306a36Sopenharmony_ci * have i_rwsem locked because it's not necessary. 411562306a36Sopenharmony_ci */ 411662306a36Sopenharmony_ci if (!(inode->i_state & (I_NEW|I_FREEING))) 411762306a36Sopenharmony_ci WARN_ON(!inode_is_locked(inode)); 411862306a36Sopenharmony_ci trace_ext4_truncate_enter(inode); 411962306a36Sopenharmony_ci 412062306a36Sopenharmony_ci if (!ext4_can_truncate(inode)) 412162306a36Sopenharmony_ci goto out_trace; 412262306a36Sopenharmony_ci 412362306a36Sopenharmony_ci if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) 412462306a36Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); 412562306a36Sopenharmony_ci 412662306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) { 412762306a36Sopenharmony_ci int has_inline = 1; 412862306a36Sopenharmony_ci 412962306a36Sopenharmony_ci err = ext4_inline_data_truncate(inode, &has_inline); 413062306a36Sopenharmony_ci if (err || has_inline) 413162306a36Sopenharmony_ci goto out_trace; 413262306a36Sopenharmony_ci } 413362306a36Sopenharmony_ci 413462306a36Sopenharmony_ci /* If we zero-out tail of the page, we have to create jinode for jbd2 */ 413562306a36Sopenharmony_ci if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { 413662306a36Sopenharmony_ci err = ext4_inode_attach_jinode(inode); 413762306a36Sopenharmony_ci if (err) 413862306a36Sopenharmony_ci goto out_trace; 413962306a36Sopenharmony_ci } 414062306a36Sopenharmony_ci 414162306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 414262306a36Sopenharmony_ci credits = ext4_writepage_trans_blocks(inode); 414362306a36Sopenharmony_ci else 414462306a36Sopenharmony_ci credits = ext4_blocks_for_truncate(inode); 414562306a36Sopenharmony_ci 414662306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); 414762306a36Sopenharmony_ci if (IS_ERR(handle)) { 414862306a36Sopenharmony_ci err = PTR_ERR(handle); 414962306a36Sopenharmony_ci goto out_trace; 415062306a36Sopenharmony_ci } 415162306a36Sopenharmony_ci 415262306a36Sopenharmony_ci if (inode->i_size & (inode->i_sb->s_blocksize - 1)) 415362306a36Sopenharmony_ci ext4_block_truncate_page(handle, mapping, inode->i_size); 415462306a36Sopenharmony_ci 415562306a36Sopenharmony_ci /* 415662306a36Sopenharmony_ci * We add the inode to the orphan list, so that if this 415762306a36Sopenharmony_ci * truncate spans multiple transactions, and we crash, we will 415862306a36Sopenharmony_ci * resume the truncate when the filesystem recovers. It also 415962306a36Sopenharmony_ci * marks the inode dirty, to catch the new size. 416062306a36Sopenharmony_ci * 416162306a36Sopenharmony_ci * Implication: the file must always be in a sane, consistent 416262306a36Sopenharmony_ci * truncatable state while each transaction commits. 416362306a36Sopenharmony_ci */ 416462306a36Sopenharmony_ci err = ext4_orphan_add(handle, inode); 416562306a36Sopenharmony_ci if (err) 416662306a36Sopenharmony_ci goto out_stop; 416762306a36Sopenharmony_ci 416862306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 416962306a36Sopenharmony_ci 417062306a36Sopenharmony_ci ext4_discard_preallocations(inode, 0); 417162306a36Sopenharmony_ci 417262306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 417362306a36Sopenharmony_ci err = ext4_ext_truncate(handle, inode); 417462306a36Sopenharmony_ci else 417562306a36Sopenharmony_ci ext4_ind_truncate(handle, inode); 417662306a36Sopenharmony_ci 417762306a36Sopenharmony_ci up_write(&ei->i_data_sem); 417862306a36Sopenharmony_ci if (err) 417962306a36Sopenharmony_ci goto out_stop; 418062306a36Sopenharmony_ci 418162306a36Sopenharmony_ci if (IS_SYNC(inode)) 418262306a36Sopenharmony_ci ext4_handle_sync(handle); 418362306a36Sopenharmony_ci 418462306a36Sopenharmony_ciout_stop: 418562306a36Sopenharmony_ci /* 418662306a36Sopenharmony_ci * If this was a simple ftruncate() and the file will remain alive, 418762306a36Sopenharmony_ci * then we need to clear up the orphan record which we created above. 418862306a36Sopenharmony_ci * However, if this was a real unlink then we were called by 418962306a36Sopenharmony_ci * ext4_evict_inode(), and we allow that function to clean up the 419062306a36Sopenharmony_ci * orphan info for us. 419162306a36Sopenharmony_ci */ 419262306a36Sopenharmony_ci if (inode->i_nlink) 419362306a36Sopenharmony_ci ext4_orphan_del(handle, inode); 419462306a36Sopenharmony_ci 419562306a36Sopenharmony_ci inode->i_mtime = inode_set_ctime_current(inode); 419662306a36Sopenharmony_ci err2 = ext4_mark_inode_dirty(handle, inode); 419762306a36Sopenharmony_ci if (unlikely(err2 && !err)) 419862306a36Sopenharmony_ci err = err2; 419962306a36Sopenharmony_ci ext4_journal_stop(handle); 420062306a36Sopenharmony_ci 420162306a36Sopenharmony_ciout_trace: 420262306a36Sopenharmony_ci trace_ext4_truncate_exit(inode); 420362306a36Sopenharmony_ci return err; 420462306a36Sopenharmony_ci} 420562306a36Sopenharmony_ci 420662306a36Sopenharmony_cistatic inline u64 ext4_inode_peek_iversion(const struct inode *inode) 420762306a36Sopenharmony_ci{ 420862306a36Sopenharmony_ci if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 420962306a36Sopenharmony_ci return inode_peek_iversion_raw(inode); 421062306a36Sopenharmony_ci else 421162306a36Sopenharmony_ci return inode_peek_iversion(inode); 421262306a36Sopenharmony_ci} 421362306a36Sopenharmony_ci 421462306a36Sopenharmony_cistatic int ext4_inode_blocks_set(struct ext4_inode *raw_inode, 421562306a36Sopenharmony_ci struct ext4_inode_info *ei) 421662306a36Sopenharmony_ci{ 421762306a36Sopenharmony_ci struct inode *inode = &(ei->vfs_inode); 421862306a36Sopenharmony_ci u64 i_blocks = READ_ONCE(inode->i_blocks); 421962306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 422062306a36Sopenharmony_ci 422162306a36Sopenharmony_ci if (i_blocks <= ~0U) { 422262306a36Sopenharmony_ci /* 422362306a36Sopenharmony_ci * i_blocks can be represented in a 32 bit variable 422462306a36Sopenharmony_ci * as multiple of 512 bytes 422562306a36Sopenharmony_ci */ 422662306a36Sopenharmony_ci raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 422762306a36Sopenharmony_ci raw_inode->i_blocks_high = 0; 422862306a36Sopenharmony_ci ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 422962306a36Sopenharmony_ci return 0; 423062306a36Sopenharmony_ci } 423162306a36Sopenharmony_ci 423262306a36Sopenharmony_ci /* 423362306a36Sopenharmony_ci * This should never happen since sb->s_maxbytes should not have 423462306a36Sopenharmony_ci * allowed this, sb->s_maxbytes was set according to the huge_file 423562306a36Sopenharmony_ci * feature in ext4_fill_super(). 423662306a36Sopenharmony_ci */ 423762306a36Sopenharmony_ci if (!ext4_has_feature_huge_file(sb)) 423862306a36Sopenharmony_ci return -EFSCORRUPTED; 423962306a36Sopenharmony_ci 424062306a36Sopenharmony_ci if (i_blocks <= 0xffffffffffffULL) { 424162306a36Sopenharmony_ci /* 424262306a36Sopenharmony_ci * i_blocks can be represented in a 48 bit variable 424362306a36Sopenharmony_ci * as multiple of 512 bytes 424462306a36Sopenharmony_ci */ 424562306a36Sopenharmony_ci raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 424662306a36Sopenharmony_ci raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 424762306a36Sopenharmony_ci ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); 424862306a36Sopenharmony_ci } else { 424962306a36Sopenharmony_ci ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE); 425062306a36Sopenharmony_ci /* i_block is stored in file system block size */ 425162306a36Sopenharmony_ci i_blocks = i_blocks >> (inode->i_blkbits - 9); 425262306a36Sopenharmony_ci raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 425362306a36Sopenharmony_ci raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 425462306a36Sopenharmony_ci } 425562306a36Sopenharmony_ci return 0; 425662306a36Sopenharmony_ci} 425762306a36Sopenharmony_ci 425862306a36Sopenharmony_cistatic int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode) 425962306a36Sopenharmony_ci{ 426062306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 426162306a36Sopenharmony_ci uid_t i_uid; 426262306a36Sopenharmony_ci gid_t i_gid; 426362306a36Sopenharmony_ci projid_t i_projid; 426462306a36Sopenharmony_ci int block; 426562306a36Sopenharmony_ci int err; 426662306a36Sopenharmony_ci 426762306a36Sopenharmony_ci err = ext4_inode_blocks_set(raw_inode, ei); 426862306a36Sopenharmony_ci 426962306a36Sopenharmony_ci raw_inode->i_mode = cpu_to_le16(inode->i_mode); 427062306a36Sopenharmony_ci i_uid = i_uid_read(inode); 427162306a36Sopenharmony_ci i_gid = i_gid_read(inode); 427262306a36Sopenharmony_ci i_projid = from_kprojid(&init_user_ns, ei->i_projid); 427362306a36Sopenharmony_ci if (!(test_opt(inode->i_sb, NO_UID32))) { 427462306a36Sopenharmony_ci raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); 427562306a36Sopenharmony_ci raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); 427662306a36Sopenharmony_ci /* 427762306a36Sopenharmony_ci * Fix up interoperability with old kernels. Otherwise, 427862306a36Sopenharmony_ci * old inodes get re-used with the upper 16 bits of the 427962306a36Sopenharmony_ci * uid/gid intact. 428062306a36Sopenharmony_ci */ 428162306a36Sopenharmony_ci if (ei->i_dtime && list_empty(&ei->i_orphan)) { 428262306a36Sopenharmony_ci raw_inode->i_uid_high = 0; 428362306a36Sopenharmony_ci raw_inode->i_gid_high = 0; 428462306a36Sopenharmony_ci } else { 428562306a36Sopenharmony_ci raw_inode->i_uid_high = 428662306a36Sopenharmony_ci cpu_to_le16(high_16_bits(i_uid)); 428762306a36Sopenharmony_ci raw_inode->i_gid_high = 428862306a36Sopenharmony_ci cpu_to_le16(high_16_bits(i_gid)); 428962306a36Sopenharmony_ci } 429062306a36Sopenharmony_ci } else { 429162306a36Sopenharmony_ci raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid)); 429262306a36Sopenharmony_ci raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid)); 429362306a36Sopenharmony_ci raw_inode->i_uid_high = 0; 429462306a36Sopenharmony_ci raw_inode->i_gid_high = 0; 429562306a36Sopenharmony_ci } 429662306a36Sopenharmony_ci raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); 429762306a36Sopenharmony_ci 429862306a36Sopenharmony_ci EXT4_INODE_SET_CTIME(inode, raw_inode); 429962306a36Sopenharmony_ci EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 430062306a36Sopenharmony_ci EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 430162306a36Sopenharmony_ci EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 430262306a36Sopenharmony_ci 430362306a36Sopenharmony_ci raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 430462306a36Sopenharmony_ci raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 430562306a36Sopenharmony_ci if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 430662306a36Sopenharmony_ci raw_inode->i_file_acl_high = 430762306a36Sopenharmony_ci cpu_to_le16(ei->i_file_acl >> 32); 430862306a36Sopenharmony_ci raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 430962306a36Sopenharmony_ci ext4_isize_set(raw_inode, ei->i_disksize); 431062306a36Sopenharmony_ci 431162306a36Sopenharmony_ci raw_inode->i_generation = cpu_to_le32(inode->i_generation); 431262306a36Sopenharmony_ci if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 431362306a36Sopenharmony_ci if (old_valid_dev(inode->i_rdev)) { 431462306a36Sopenharmony_ci raw_inode->i_block[0] = 431562306a36Sopenharmony_ci cpu_to_le32(old_encode_dev(inode->i_rdev)); 431662306a36Sopenharmony_ci raw_inode->i_block[1] = 0; 431762306a36Sopenharmony_ci } else { 431862306a36Sopenharmony_ci raw_inode->i_block[0] = 0; 431962306a36Sopenharmony_ci raw_inode->i_block[1] = 432062306a36Sopenharmony_ci cpu_to_le32(new_encode_dev(inode->i_rdev)); 432162306a36Sopenharmony_ci raw_inode->i_block[2] = 0; 432262306a36Sopenharmony_ci } 432362306a36Sopenharmony_ci } else if (!ext4_has_inline_data(inode)) { 432462306a36Sopenharmony_ci for (block = 0; block < EXT4_N_BLOCKS; block++) 432562306a36Sopenharmony_ci raw_inode->i_block[block] = ei->i_data[block]; 432662306a36Sopenharmony_ci } 432762306a36Sopenharmony_ci 432862306a36Sopenharmony_ci if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 432962306a36Sopenharmony_ci u64 ivers = ext4_inode_peek_iversion(inode); 433062306a36Sopenharmony_ci 433162306a36Sopenharmony_ci raw_inode->i_disk_version = cpu_to_le32(ivers); 433262306a36Sopenharmony_ci if (ei->i_extra_isize) { 433362306a36Sopenharmony_ci if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 433462306a36Sopenharmony_ci raw_inode->i_version_hi = 433562306a36Sopenharmony_ci cpu_to_le32(ivers >> 32); 433662306a36Sopenharmony_ci raw_inode->i_extra_isize = 433762306a36Sopenharmony_ci cpu_to_le16(ei->i_extra_isize); 433862306a36Sopenharmony_ci } 433962306a36Sopenharmony_ci } 434062306a36Sopenharmony_ci 434162306a36Sopenharmony_ci if (i_projid != EXT4_DEF_PROJID && 434262306a36Sopenharmony_ci !ext4_has_feature_project(inode->i_sb)) 434362306a36Sopenharmony_ci err = err ?: -EFSCORRUPTED; 434462306a36Sopenharmony_ci 434562306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && 434662306a36Sopenharmony_ci EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) 434762306a36Sopenharmony_ci raw_inode->i_projid = cpu_to_le32(i_projid); 434862306a36Sopenharmony_ci 434962306a36Sopenharmony_ci ext4_inode_csum_set(inode, raw_inode, ei); 435062306a36Sopenharmony_ci return err; 435162306a36Sopenharmony_ci} 435262306a36Sopenharmony_ci 435362306a36Sopenharmony_ci/* 435462306a36Sopenharmony_ci * ext4_get_inode_loc returns with an extra refcount against the inode's 435562306a36Sopenharmony_ci * underlying buffer_head on success. If we pass 'inode' and it does not 435662306a36Sopenharmony_ci * have in-inode xattr, we have all inode data in memory that is needed 435762306a36Sopenharmony_ci * to recreate the on-disk version of this inode. 435862306a36Sopenharmony_ci */ 435962306a36Sopenharmony_cistatic int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino, 436062306a36Sopenharmony_ci struct inode *inode, struct ext4_iloc *iloc, 436162306a36Sopenharmony_ci ext4_fsblk_t *ret_block) 436262306a36Sopenharmony_ci{ 436362306a36Sopenharmony_ci struct ext4_group_desc *gdp; 436462306a36Sopenharmony_ci struct buffer_head *bh; 436562306a36Sopenharmony_ci ext4_fsblk_t block; 436662306a36Sopenharmony_ci struct blk_plug plug; 436762306a36Sopenharmony_ci int inodes_per_block, inode_offset; 436862306a36Sopenharmony_ci 436962306a36Sopenharmony_ci iloc->bh = NULL; 437062306a36Sopenharmony_ci if (ino < EXT4_ROOT_INO || 437162306a36Sopenharmony_ci ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)) 437262306a36Sopenharmony_ci return -EFSCORRUPTED; 437362306a36Sopenharmony_ci 437462306a36Sopenharmony_ci iloc->block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 437562306a36Sopenharmony_ci gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); 437662306a36Sopenharmony_ci if (!gdp) 437762306a36Sopenharmony_ci return -EIO; 437862306a36Sopenharmony_ci 437962306a36Sopenharmony_ci /* 438062306a36Sopenharmony_ci * Figure out the offset within the block group inode table 438162306a36Sopenharmony_ci */ 438262306a36Sopenharmony_ci inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 438362306a36Sopenharmony_ci inode_offset = ((ino - 1) % 438462306a36Sopenharmony_ci EXT4_INODES_PER_GROUP(sb)); 438562306a36Sopenharmony_ci iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); 438662306a36Sopenharmony_ci 438762306a36Sopenharmony_ci block = ext4_inode_table(sb, gdp); 438862306a36Sopenharmony_ci if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) || 438962306a36Sopenharmony_ci (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) { 439062306a36Sopenharmony_ci ext4_error(sb, "Invalid inode table block %llu in " 439162306a36Sopenharmony_ci "block_group %u", block, iloc->block_group); 439262306a36Sopenharmony_ci return -EFSCORRUPTED; 439362306a36Sopenharmony_ci } 439462306a36Sopenharmony_ci block += (inode_offset / inodes_per_block); 439562306a36Sopenharmony_ci 439662306a36Sopenharmony_ci bh = sb_getblk(sb, block); 439762306a36Sopenharmony_ci if (unlikely(!bh)) 439862306a36Sopenharmony_ci return -ENOMEM; 439962306a36Sopenharmony_ci if (ext4_buffer_uptodate(bh)) 440062306a36Sopenharmony_ci goto has_buffer; 440162306a36Sopenharmony_ci 440262306a36Sopenharmony_ci lock_buffer(bh); 440362306a36Sopenharmony_ci if (ext4_buffer_uptodate(bh)) { 440462306a36Sopenharmony_ci /* Someone brought it uptodate while we waited */ 440562306a36Sopenharmony_ci unlock_buffer(bh); 440662306a36Sopenharmony_ci goto has_buffer; 440762306a36Sopenharmony_ci } 440862306a36Sopenharmony_ci 440962306a36Sopenharmony_ci /* 441062306a36Sopenharmony_ci * If we have all information of the inode in memory and this 441162306a36Sopenharmony_ci * is the only valid inode in the block, we need not read the 441262306a36Sopenharmony_ci * block. 441362306a36Sopenharmony_ci */ 441462306a36Sopenharmony_ci if (inode && !ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 441562306a36Sopenharmony_ci struct buffer_head *bitmap_bh; 441662306a36Sopenharmony_ci int i, start; 441762306a36Sopenharmony_ci 441862306a36Sopenharmony_ci start = inode_offset & ~(inodes_per_block - 1); 441962306a36Sopenharmony_ci 442062306a36Sopenharmony_ci /* Is the inode bitmap in cache? */ 442162306a36Sopenharmony_ci bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); 442262306a36Sopenharmony_ci if (unlikely(!bitmap_bh)) 442362306a36Sopenharmony_ci goto make_io; 442462306a36Sopenharmony_ci 442562306a36Sopenharmony_ci /* 442662306a36Sopenharmony_ci * If the inode bitmap isn't in cache then the 442762306a36Sopenharmony_ci * optimisation may end up performing two reads instead 442862306a36Sopenharmony_ci * of one, so skip it. 442962306a36Sopenharmony_ci */ 443062306a36Sopenharmony_ci if (!buffer_uptodate(bitmap_bh)) { 443162306a36Sopenharmony_ci brelse(bitmap_bh); 443262306a36Sopenharmony_ci goto make_io; 443362306a36Sopenharmony_ci } 443462306a36Sopenharmony_ci for (i = start; i < start + inodes_per_block; i++) { 443562306a36Sopenharmony_ci if (i == inode_offset) 443662306a36Sopenharmony_ci continue; 443762306a36Sopenharmony_ci if (ext4_test_bit(i, bitmap_bh->b_data)) 443862306a36Sopenharmony_ci break; 443962306a36Sopenharmony_ci } 444062306a36Sopenharmony_ci brelse(bitmap_bh); 444162306a36Sopenharmony_ci if (i == start + inodes_per_block) { 444262306a36Sopenharmony_ci struct ext4_inode *raw_inode = 444362306a36Sopenharmony_ci (struct ext4_inode *) (bh->b_data + iloc->offset); 444462306a36Sopenharmony_ci 444562306a36Sopenharmony_ci /* all other inodes are free, so skip I/O */ 444662306a36Sopenharmony_ci memset(bh->b_data, 0, bh->b_size); 444762306a36Sopenharmony_ci if (!ext4_test_inode_state(inode, EXT4_STATE_NEW)) 444862306a36Sopenharmony_ci ext4_fill_raw_inode(inode, raw_inode); 444962306a36Sopenharmony_ci set_buffer_uptodate(bh); 445062306a36Sopenharmony_ci unlock_buffer(bh); 445162306a36Sopenharmony_ci goto has_buffer; 445262306a36Sopenharmony_ci } 445362306a36Sopenharmony_ci } 445462306a36Sopenharmony_ci 445562306a36Sopenharmony_cimake_io: 445662306a36Sopenharmony_ci /* 445762306a36Sopenharmony_ci * If we need to do any I/O, try to pre-readahead extra 445862306a36Sopenharmony_ci * blocks from the inode table. 445962306a36Sopenharmony_ci */ 446062306a36Sopenharmony_ci blk_start_plug(&plug); 446162306a36Sopenharmony_ci if (EXT4_SB(sb)->s_inode_readahead_blks) { 446262306a36Sopenharmony_ci ext4_fsblk_t b, end, table; 446362306a36Sopenharmony_ci unsigned num; 446462306a36Sopenharmony_ci __u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks; 446562306a36Sopenharmony_ci 446662306a36Sopenharmony_ci table = ext4_inode_table(sb, gdp); 446762306a36Sopenharmony_ci /* s_inode_readahead_blks is always a power of 2 */ 446862306a36Sopenharmony_ci b = block & ~((ext4_fsblk_t) ra_blks - 1); 446962306a36Sopenharmony_ci if (table > b) 447062306a36Sopenharmony_ci b = table; 447162306a36Sopenharmony_ci end = b + ra_blks; 447262306a36Sopenharmony_ci num = EXT4_INODES_PER_GROUP(sb); 447362306a36Sopenharmony_ci if (ext4_has_group_desc_csum(sb)) 447462306a36Sopenharmony_ci num -= ext4_itable_unused_count(sb, gdp); 447562306a36Sopenharmony_ci table += num / inodes_per_block; 447662306a36Sopenharmony_ci if (end > table) 447762306a36Sopenharmony_ci end = table; 447862306a36Sopenharmony_ci while (b <= end) 447962306a36Sopenharmony_ci ext4_sb_breadahead_unmovable(sb, b++); 448062306a36Sopenharmony_ci } 448162306a36Sopenharmony_ci 448262306a36Sopenharmony_ci /* 448362306a36Sopenharmony_ci * There are other valid inodes in the buffer, this inode 448462306a36Sopenharmony_ci * has in-inode xattrs, or we don't have this inode in memory. 448562306a36Sopenharmony_ci * Read the block from disk. 448662306a36Sopenharmony_ci */ 448762306a36Sopenharmony_ci trace_ext4_load_inode(sb, ino); 448862306a36Sopenharmony_ci ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL); 448962306a36Sopenharmony_ci blk_finish_plug(&plug); 449062306a36Sopenharmony_ci wait_on_buffer(bh); 449162306a36Sopenharmony_ci ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO); 449262306a36Sopenharmony_ci if (!buffer_uptodate(bh)) { 449362306a36Sopenharmony_ci if (ret_block) 449462306a36Sopenharmony_ci *ret_block = block; 449562306a36Sopenharmony_ci brelse(bh); 449662306a36Sopenharmony_ci return -EIO; 449762306a36Sopenharmony_ci } 449862306a36Sopenharmony_cihas_buffer: 449962306a36Sopenharmony_ci iloc->bh = bh; 450062306a36Sopenharmony_ci return 0; 450162306a36Sopenharmony_ci} 450262306a36Sopenharmony_ci 450362306a36Sopenharmony_cistatic int __ext4_get_inode_loc_noinmem(struct inode *inode, 450462306a36Sopenharmony_ci struct ext4_iloc *iloc) 450562306a36Sopenharmony_ci{ 450662306a36Sopenharmony_ci ext4_fsblk_t err_blk = 0; 450762306a36Sopenharmony_ci int ret; 450862306a36Sopenharmony_ci 450962306a36Sopenharmony_ci ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc, 451062306a36Sopenharmony_ci &err_blk); 451162306a36Sopenharmony_ci 451262306a36Sopenharmony_ci if (ret == -EIO) 451362306a36Sopenharmony_ci ext4_error_inode_block(inode, err_blk, EIO, 451462306a36Sopenharmony_ci "unable to read itable block"); 451562306a36Sopenharmony_ci 451662306a36Sopenharmony_ci return ret; 451762306a36Sopenharmony_ci} 451862306a36Sopenharmony_ci 451962306a36Sopenharmony_ciint ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) 452062306a36Sopenharmony_ci{ 452162306a36Sopenharmony_ci ext4_fsblk_t err_blk = 0; 452262306a36Sopenharmony_ci int ret; 452362306a36Sopenharmony_ci 452462306a36Sopenharmony_ci ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc, 452562306a36Sopenharmony_ci &err_blk); 452662306a36Sopenharmony_ci 452762306a36Sopenharmony_ci if (ret == -EIO) 452862306a36Sopenharmony_ci ext4_error_inode_block(inode, err_blk, EIO, 452962306a36Sopenharmony_ci "unable to read itable block"); 453062306a36Sopenharmony_ci 453162306a36Sopenharmony_ci return ret; 453262306a36Sopenharmony_ci} 453362306a36Sopenharmony_ci 453462306a36Sopenharmony_ci 453562306a36Sopenharmony_ciint ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino, 453662306a36Sopenharmony_ci struct ext4_iloc *iloc) 453762306a36Sopenharmony_ci{ 453862306a36Sopenharmony_ci return __ext4_get_inode_loc(sb, ino, NULL, iloc, NULL); 453962306a36Sopenharmony_ci} 454062306a36Sopenharmony_ci 454162306a36Sopenharmony_cistatic bool ext4_should_enable_dax(struct inode *inode) 454262306a36Sopenharmony_ci{ 454362306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 454462306a36Sopenharmony_ci 454562306a36Sopenharmony_ci if (test_opt2(inode->i_sb, DAX_NEVER)) 454662306a36Sopenharmony_ci return false; 454762306a36Sopenharmony_ci if (!S_ISREG(inode->i_mode)) 454862306a36Sopenharmony_ci return false; 454962306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) 455062306a36Sopenharmony_ci return false; 455162306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 455262306a36Sopenharmony_ci return false; 455362306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT)) 455462306a36Sopenharmony_ci return false; 455562306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY)) 455662306a36Sopenharmony_ci return false; 455762306a36Sopenharmony_ci if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) 455862306a36Sopenharmony_ci return false; 455962306a36Sopenharmony_ci if (test_opt(inode->i_sb, DAX_ALWAYS)) 456062306a36Sopenharmony_ci return true; 456162306a36Sopenharmony_ci 456262306a36Sopenharmony_ci return ext4_test_inode_flag(inode, EXT4_INODE_DAX); 456362306a36Sopenharmony_ci} 456462306a36Sopenharmony_ci 456562306a36Sopenharmony_civoid ext4_set_inode_flags(struct inode *inode, bool init) 456662306a36Sopenharmony_ci{ 456762306a36Sopenharmony_ci unsigned int flags = EXT4_I(inode)->i_flags; 456862306a36Sopenharmony_ci unsigned int new_fl = 0; 456962306a36Sopenharmony_ci 457062306a36Sopenharmony_ci WARN_ON_ONCE(IS_DAX(inode) && init); 457162306a36Sopenharmony_ci 457262306a36Sopenharmony_ci if (flags & EXT4_SYNC_FL) 457362306a36Sopenharmony_ci new_fl |= S_SYNC; 457462306a36Sopenharmony_ci if (flags & EXT4_APPEND_FL) 457562306a36Sopenharmony_ci new_fl |= S_APPEND; 457662306a36Sopenharmony_ci if (flags & EXT4_IMMUTABLE_FL) 457762306a36Sopenharmony_ci new_fl |= S_IMMUTABLE; 457862306a36Sopenharmony_ci if (flags & EXT4_NOATIME_FL) 457962306a36Sopenharmony_ci new_fl |= S_NOATIME; 458062306a36Sopenharmony_ci if (flags & EXT4_DIRSYNC_FL) 458162306a36Sopenharmony_ci new_fl |= S_DIRSYNC; 458262306a36Sopenharmony_ci 458362306a36Sopenharmony_ci /* Because of the way inode_set_flags() works we must preserve S_DAX 458462306a36Sopenharmony_ci * here if already set. */ 458562306a36Sopenharmony_ci new_fl |= (inode->i_flags & S_DAX); 458662306a36Sopenharmony_ci if (init && ext4_should_enable_dax(inode)) 458762306a36Sopenharmony_ci new_fl |= S_DAX; 458862306a36Sopenharmony_ci 458962306a36Sopenharmony_ci if (flags & EXT4_ENCRYPT_FL) 459062306a36Sopenharmony_ci new_fl |= S_ENCRYPTED; 459162306a36Sopenharmony_ci if (flags & EXT4_CASEFOLD_FL) 459262306a36Sopenharmony_ci new_fl |= S_CASEFOLD; 459362306a36Sopenharmony_ci if (flags & EXT4_VERITY_FL) 459462306a36Sopenharmony_ci new_fl |= S_VERITY; 459562306a36Sopenharmony_ci inode_set_flags(inode, new_fl, 459662306a36Sopenharmony_ci S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX| 459762306a36Sopenharmony_ci S_ENCRYPTED|S_CASEFOLD|S_VERITY); 459862306a36Sopenharmony_ci} 459962306a36Sopenharmony_ci 460062306a36Sopenharmony_cistatic blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, 460162306a36Sopenharmony_ci struct ext4_inode_info *ei) 460262306a36Sopenharmony_ci{ 460362306a36Sopenharmony_ci blkcnt_t i_blocks ; 460462306a36Sopenharmony_ci struct inode *inode = &(ei->vfs_inode); 460562306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 460662306a36Sopenharmony_ci 460762306a36Sopenharmony_ci if (ext4_has_feature_huge_file(sb)) { 460862306a36Sopenharmony_ci /* we are using combined 48 bit field */ 460962306a36Sopenharmony_ci i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 | 461062306a36Sopenharmony_ci le32_to_cpu(raw_inode->i_blocks_lo); 461162306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) { 461262306a36Sopenharmony_ci /* i_blocks represent file system block size */ 461362306a36Sopenharmony_ci return i_blocks << (inode->i_blkbits - 9); 461462306a36Sopenharmony_ci } else { 461562306a36Sopenharmony_ci return i_blocks; 461662306a36Sopenharmony_ci } 461762306a36Sopenharmony_ci } else { 461862306a36Sopenharmony_ci return le32_to_cpu(raw_inode->i_blocks_lo); 461962306a36Sopenharmony_ci } 462062306a36Sopenharmony_ci} 462162306a36Sopenharmony_ci 462262306a36Sopenharmony_cistatic inline int ext4_iget_extra_inode(struct inode *inode, 462362306a36Sopenharmony_ci struct ext4_inode *raw_inode, 462462306a36Sopenharmony_ci struct ext4_inode_info *ei) 462562306a36Sopenharmony_ci{ 462662306a36Sopenharmony_ci __le32 *magic = (void *)raw_inode + 462762306a36Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; 462862306a36Sopenharmony_ci 462962306a36Sopenharmony_ci if (EXT4_INODE_HAS_XATTR_SPACE(inode) && 463062306a36Sopenharmony_ci *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { 463162306a36Sopenharmony_ci int err; 463262306a36Sopenharmony_ci 463362306a36Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_XATTR); 463462306a36Sopenharmony_ci err = ext4_find_inline_data_nolock(inode); 463562306a36Sopenharmony_ci if (!err && ext4_has_inline_data(inode)) 463662306a36Sopenharmony_ci ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 463762306a36Sopenharmony_ci return err; 463862306a36Sopenharmony_ci } else 463962306a36Sopenharmony_ci EXT4_I(inode)->i_inline_off = 0; 464062306a36Sopenharmony_ci return 0; 464162306a36Sopenharmony_ci} 464262306a36Sopenharmony_ci 464362306a36Sopenharmony_ciint ext4_get_projid(struct inode *inode, kprojid_t *projid) 464462306a36Sopenharmony_ci{ 464562306a36Sopenharmony_ci if (!ext4_has_feature_project(inode->i_sb)) 464662306a36Sopenharmony_ci return -EOPNOTSUPP; 464762306a36Sopenharmony_ci *projid = EXT4_I(inode)->i_projid; 464862306a36Sopenharmony_ci return 0; 464962306a36Sopenharmony_ci} 465062306a36Sopenharmony_ci 465162306a36Sopenharmony_ci/* 465262306a36Sopenharmony_ci * ext4 has self-managed i_version for ea inodes, it stores the lower 32bit of 465362306a36Sopenharmony_ci * refcount in i_version, so use raw values if inode has EXT4_EA_INODE_FL flag 465462306a36Sopenharmony_ci * set. 465562306a36Sopenharmony_ci */ 465662306a36Sopenharmony_cistatic inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val) 465762306a36Sopenharmony_ci{ 465862306a36Sopenharmony_ci if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 465962306a36Sopenharmony_ci inode_set_iversion_raw(inode, val); 466062306a36Sopenharmony_ci else 466162306a36Sopenharmony_ci inode_set_iversion_queried(inode, val); 466262306a36Sopenharmony_ci} 466362306a36Sopenharmony_ci 466462306a36Sopenharmony_cistatic const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags) 466562306a36Sopenharmony_ci 466662306a36Sopenharmony_ci{ 466762306a36Sopenharmony_ci if (flags & EXT4_IGET_EA_INODE) { 466862306a36Sopenharmony_ci if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 466962306a36Sopenharmony_ci return "missing EA_INODE flag"; 467062306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 467162306a36Sopenharmony_ci EXT4_I(inode)->i_file_acl) 467262306a36Sopenharmony_ci return "ea_inode with extended attributes"; 467362306a36Sopenharmony_ci } else { 467462306a36Sopenharmony_ci if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) 467562306a36Sopenharmony_ci return "unexpected EA_INODE flag"; 467662306a36Sopenharmony_ci } 467762306a36Sopenharmony_ci if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD)) 467862306a36Sopenharmony_ci return "unexpected bad inode w/o EXT4_IGET_BAD"; 467962306a36Sopenharmony_ci return NULL; 468062306a36Sopenharmony_ci} 468162306a36Sopenharmony_ci 468262306a36Sopenharmony_cistruct inode *__ext4_iget(struct super_block *sb, unsigned long ino, 468362306a36Sopenharmony_ci ext4_iget_flags flags, const char *function, 468462306a36Sopenharmony_ci unsigned int line) 468562306a36Sopenharmony_ci{ 468662306a36Sopenharmony_ci struct ext4_iloc iloc; 468762306a36Sopenharmony_ci struct ext4_inode *raw_inode; 468862306a36Sopenharmony_ci struct ext4_inode_info *ei; 468962306a36Sopenharmony_ci struct ext4_super_block *es = EXT4_SB(sb)->s_es; 469062306a36Sopenharmony_ci struct inode *inode; 469162306a36Sopenharmony_ci const char *err_str; 469262306a36Sopenharmony_ci journal_t *journal = EXT4_SB(sb)->s_journal; 469362306a36Sopenharmony_ci long ret; 469462306a36Sopenharmony_ci loff_t size; 469562306a36Sopenharmony_ci int block; 469662306a36Sopenharmony_ci uid_t i_uid; 469762306a36Sopenharmony_ci gid_t i_gid; 469862306a36Sopenharmony_ci projid_t i_projid; 469962306a36Sopenharmony_ci 470062306a36Sopenharmony_ci if ((!(flags & EXT4_IGET_SPECIAL) && 470162306a36Sopenharmony_ci ((ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) || 470262306a36Sopenharmony_ci ino == le32_to_cpu(es->s_usr_quota_inum) || 470362306a36Sopenharmony_ci ino == le32_to_cpu(es->s_grp_quota_inum) || 470462306a36Sopenharmony_ci ino == le32_to_cpu(es->s_prj_quota_inum) || 470562306a36Sopenharmony_ci ino == le32_to_cpu(es->s_orphan_file_inum))) || 470662306a36Sopenharmony_ci (ino < EXT4_ROOT_INO) || 470762306a36Sopenharmony_ci (ino > le32_to_cpu(es->s_inodes_count))) { 470862306a36Sopenharmony_ci if (flags & EXT4_IGET_HANDLE) 470962306a36Sopenharmony_ci return ERR_PTR(-ESTALE); 471062306a36Sopenharmony_ci __ext4_error(sb, function, line, false, EFSCORRUPTED, 0, 471162306a36Sopenharmony_ci "inode #%lu: comm %s: iget: illegal inode #", 471262306a36Sopenharmony_ci ino, current->comm); 471362306a36Sopenharmony_ci return ERR_PTR(-EFSCORRUPTED); 471462306a36Sopenharmony_ci } 471562306a36Sopenharmony_ci 471662306a36Sopenharmony_ci inode = iget_locked(sb, ino); 471762306a36Sopenharmony_ci if (!inode) 471862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 471962306a36Sopenharmony_ci if (!(inode->i_state & I_NEW)) { 472062306a36Sopenharmony_ci if ((err_str = check_igot_inode(inode, flags)) != NULL) { 472162306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, err_str); 472262306a36Sopenharmony_ci iput(inode); 472362306a36Sopenharmony_ci return ERR_PTR(-EFSCORRUPTED); 472462306a36Sopenharmony_ci } 472562306a36Sopenharmony_ci return inode; 472662306a36Sopenharmony_ci } 472762306a36Sopenharmony_ci 472862306a36Sopenharmony_ci ei = EXT4_I(inode); 472962306a36Sopenharmony_ci iloc.bh = NULL; 473062306a36Sopenharmony_ci 473162306a36Sopenharmony_ci ret = __ext4_get_inode_loc_noinmem(inode, &iloc); 473262306a36Sopenharmony_ci if (ret < 0) 473362306a36Sopenharmony_ci goto bad_inode; 473462306a36Sopenharmony_ci raw_inode = ext4_raw_inode(&iloc); 473562306a36Sopenharmony_ci 473662306a36Sopenharmony_ci if ((flags & EXT4_IGET_HANDLE) && 473762306a36Sopenharmony_ci (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) { 473862306a36Sopenharmony_ci ret = -ESTALE; 473962306a36Sopenharmony_ci goto bad_inode; 474062306a36Sopenharmony_ci } 474162306a36Sopenharmony_ci 474262306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 474362306a36Sopenharmony_ci ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 474462306a36Sopenharmony_ci if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 474562306a36Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb) || 474662306a36Sopenharmony_ci (ei->i_extra_isize & 3)) { 474762306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 474862306a36Sopenharmony_ci "iget: bad extra_isize %u " 474962306a36Sopenharmony_ci "(inode size %u)", 475062306a36Sopenharmony_ci ei->i_extra_isize, 475162306a36Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb)); 475262306a36Sopenharmony_ci ret = -EFSCORRUPTED; 475362306a36Sopenharmony_ci goto bad_inode; 475462306a36Sopenharmony_ci } 475562306a36Sopenharmony_ci } else 475662306a36Sopenharmony_ci ei->i_extra_isize = 0; 475762306a36Sopenharmony_ci 475862306a36Sopenharmony_ci /* Precompute checksum seed for inode metadata */ 475962306a36Sopenharmony_ci if (ext4_has_metadata_csum(sb)) { 476062306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 476162306a36Sopenharmony_ci __u32 csum; 476262306a36Sopenharmony_ci __le32 inum = cpu_to_le32(inode->i_ino); 476362306a36Sopenharmony_ci __le32 gen = raw_inode->i_generation; 476462306a36Sopenharmony_ci csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum, 476562306a36Sopenharmony_ci sizeof(inum)); 476662306a36Sopenharmony_ci ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen, 476762306a36Sopenharmony_ci sizeof(gen)); 476862306a36Sopenharmony_ci } 476962306a36Sopenharmony_ci 477062306a36Sopenharmony_ci if ((!ext4_inode_csum_verify(inode, raw_inode, ei) || 477162306a36Sopenharmony_ci ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) && 477262306a36Sopenharmony_ci (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))) { 477362306a36Sopenharmony_ci ext4_error_inode_err(inode, function, line, 0, 477462306a36Sopenharmony_ci EFSBADCRC, "iget: checksum invalid"); 477562306a36Sopenharmony_ci ret = -EFSBADCRC; 477662306a36Sopenharmony_ci goto bad_inode; 477762306a36Sopenharmony_ci } 477862306a36Sopenharmony_ci 477962306a36Sopenharmony_ci inode->i_mode = le16_to_cpu(raw_inode->i_mode); 478062306a36Sopenharmony_ci i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 478162306a36Sopenharmony_ci i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); 478262306a36Sopenharmony_ci if (ext4_has_feature_project(sb) && 478362306a36Sopenharmony_ci EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE && 478462306a36Sopenharmony_ci EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) 478562306a36Sopenharmony_ci i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid); 478662306a36Sopenharmony_ci else 478762306a36Sopenharmony_ci i_projid = EXT4_DEF_PROJID; 478862306a36Sopenharmony_ci 478962306a36Sopenharmony_ci if (!(test_opt(inode->i_sb, NO_UID32))) { 479062306a36Sopenharmony_ci i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; 479162306a36Sopenharmony_ci i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; 479262306a36Sopenharmony_ci } 479362306a36Sopenharmony_ci i_uid_write(inode, i_uid); 479462306a36Sopenharmony_ci i_gid_write(inode, i_gid); 479562306a36Sopenharmony_ci ei->i_projid = make_kprojid(&init_user_ns, i_projid); 479662306a36Sopenharmony_ci set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); 479762306a36Sopenharmony_ci 479862306a36Sopenharmony_ci ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */ 479962306a36Sopenharmony_ci ei->i_inline_off = 0; 480062306a36Sopenharmony_ci ei->i_dir_start_lookup = 0; 480162306a36Sopenharmony_ci ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); 480262306a36Sopenharmony_ci /* We now have enough fields to check if the inode was active or not. 480362306a36Sopenharmony_ci * This is needed because nfsd might try to access dead inodes 480462306a36Sopenharmony_ci * the test is that same one that e2fsck uses 480562306a36Sopenharmony_ci * NeilBrown 1999oct15 480662306a36Sopenharmony_ci */ 480762306a36Sopenharmony_ci if (inode->i_nlink == 0) { 480862306a36Sopenharmony_ci if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL || 480962306a36Sopenharmony_ci !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && 481062306a36Sopenharmony_ci ino != EXT4_BOOT_LOADER_INO) { 481162306a36Sopenharmony_ci /* this inode is deleted or unallocated */ 481262306a36Sopenharmony_ci if (flags & EXT4_IGET_SPECIAL) { 481362306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 481462306a36Sopenharmony_ci "iget: special inode unallocated"); 481562306a36Sopenharmony_ci ret = -EFSCORRUPTED; 481662306a36Sopenharmony_ci } else 481762306a36Sopenharmony_ci ret = -ESTALE; 481862306a36Sopenharmony_ci goto bad_inode; 481962306a36Sopenharmony_ci } 482062306a36Sopenharmony_ci /* The only unlinked inodes we let through here have 482162306a36Sopenharmony_ci * valid i_mode and are being read by the orphan 482262306a36Sopenharmony_ci * recovery code: that's fine, we're about to complete 482362306a36Sopenharmony_ci * the process of deleting those. 482462306a36Sopenharmony_ci * OR it is the EXT4_BOOT_LOADER_INO which is 482562306a36Sopenharmony_ci * not initialized on a new filesystem. */ 482662306a36Sopenharmony_ci } 482762306a36Sopenharmony_ci ei->i_flags = le32_to_cpu(raw_inode->i_flags); 482862306a36Sopenharmony_ci ext4_set_inode_flags(inode, true); 482962306a36Sopenharmony_ci inode->i_blocks = ext4_inode_blocks(raw_inode, ei); 483062306a36Sopenharmony_ci ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo); 483162306a36Sopenharmony_ci if (ext4_has_feature_64bit(sb)) 483262306a36Sopenharmony_ci ei->i_file_acl |= 483362306a36Sopenharmony_ci ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32; 483462306a36Sopenharmony_ci inode->i_size = ext4_isize(sb, raw_inode); 483562306a36Sopenharmony_ci if ((size = i_size_read(inode)) < 0) { 483662306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 483762306a36Sopenharmony_ci "iget: bad i_size value: %lld", size); 483862306a36Sopenharmony_ci ret = -EFSCORRUPTED; 483962306a36Sopenharmony_ci goto bad_inode; 484062306a36Sopenharmony_ci } 484162306a36Sopenharmony_ci /* 484262306a36Sopenharmony_ci * If dir_index is not enabled but there's dir with INDEX flag set, 484362306a36Sopenharmony_ci * we'd normally treat htree data as empty space. But with metadata 484462306a36Sopenharmony_ci * checksumming that corrupts checksums so forbid that. 484562306a36Sopenharmony_ci */ 484662306a36Sopenharmony_ci if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && 484762306a36Sopenharmony_ci ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { 484862306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 484962306a36Sopenharmony_ci "iget: Dir with htree data on filesystem without dir_index feature."); 485062306a36Sopenharmony_ci ret = -EFSCORRUPTED; 485162306a36Sopenharmony_ci goto bad_inode; 485262306a36Sopenharmony_ci } 485362306a36Sopenharmony_ci ei->i_disksize = inode->i_size; 485462306a36Sopenharmony_ci#ifdef CONFIG_QUOTA 485562306a36Sopenharmony_ci ei->i_reserved_quota = 0; 485662306a36Sopenharmony_ci#endif 485762306a36Sopenharmony_ci inode->i_generation = le32_to_cpu(raw_inode->i_generation); 485862306a36Sopenharmony_ci ei->i_block_group = iloc.block_group; 485962306a36Sopenharmony_ci ei->i_last_alloc_group = ~0; 486062306a36Sopenharmony_ci /* 486162306a36Sopenharmony_ci * NOTE! The in-memory inode i_data array is in little-endian order 486262306a36Sopenharmony_ci * even on big-endian machines: we do NOT byteswap the block numbers! 486362306a36Sopenharmony_ci */ 486462306a36Sopenharmony_ci for (block = 0; block < EXT4_N_BLOCKS; block++) 486562306a36Sopenharmony_ci ei->i_data[block] = raw_inode->i_block[block]; 486662306a36Sopenharmony_ci INIT_LIST_HEAD(&ei->i_orphan); 486762306a36Sopenharmony_ci ext4_fc_init_inode(&ei->vfs_inode); 486862306a36Sopenharmony_ci 486962306a36Sopenharmony_ci /* 487062306a36Sopenharmony_ci * Set transaction id's of transactions that have to be committed 487162306a36Sopenharmony_ci * to finish f[data]sync. We set them to currently running transaction 487262306a36Sopenharmony_ci * as we cannot be sure that the inode or some of its metadata isn't 487362306a36Sopenharmony_ci * part of the transaction - the inode could have been reclaimed and 487462306a36Sopenharmony_ci * now it is reread from disk. 487562306a36Sopenharmony_ci */ 487662306a36Sopenharmony_ci if (journal) { 487762306a36Sopenharmony_ci transaction_t *transaction; 487862306a36Sopenharmony_ci tid_t tid; 487962306a36Sopenharmony_ci 488062306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 488162306a36Sopenharmony_ci if (journal->j_running_transaction) 488262306a36Sopenharmony_ci transaction = journal->j_running_transaction; 488362306a36Sopenharmony_ci else 488462306a36Sopenharmony_ci transaction = journal->j_committing_transaction; 488562306a36Sopenharmony_ci if (transaction) 488662306a36Sopenharmony_ci tid = transaction->t_tid; 488762306a36Sopenharmony_ci else 488862306a36Sopenharmony_ci tid = journal->j_commit_sequence; 488962306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 489062306a36Sopenharmony_ci ei->i_sync_tid = tid; 489162306a36Sopenharmony_ci ei->i_datasync_tid = tid; 489262306a36Sopenharmony_ci } 489362306a36Sopenharmony_ci 489462306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 489562306a36Sopenharmony_ci if (ei->i_extra_isize == 0) { 489662306a36Sopenharmony_ci /* The extra space is currently unused. Use it. */ 489762306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct ext4_inode) & 3); 489862306a36Sopenharmony_ci ei->i_extra_isize = sizeof(struct ext4_inode) - 489962306a36Sopenharmony_ci EXT4_GOOD_OLD_INODE_SIZE; 490062306a36Sopenharmony_ci } else { 490162306a36Sopenharmony_ci ret = ext4_iget_extra_inode(inode, raw_inode, ei); 490262306a36Sopenharmony_ci if (ret) 490362306a36Sopenharmony_ci goto bad_inode; 490462306a36Sopenharmony_ci } 490562306a36Sopenharmony_ci } 490662306a36Sopenharmony_ci 490762306a36Sopenharmony_ci EXT4_INODE_GET_CTIME(inode, raw_inode); 490862306a36Sopenharmony_ci EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode); 490962306a36Sopenharmony_ci EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); 491062306a36Sopenharmony_ci EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); 491162306a36Sopenharmony_ci 491262306a36Sopenharmony_ci if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { 491362306a36Sopenharmony_ci u64 ivers = le32_to_cpu(raw_inode->i_disk_version); 491462306a36Sopenharmony_ci 491562306a36Sopenharmony_ci if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 491662306a36Sopenharmony_ci if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) 491762306a36Sopenharmony_ci ivers |= 491862306a36Sopenharmony_ci (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; 491962306a36Sopenharmony_ci } 492062306a36Sopenharmony_ci ext4_inode_set_iversion_queried(inode, ivers); 492162306a36Sopenharmony_ci } 492262306a36Sopenharmony_ci 492362306a36Sopenharmony_ci ret = 0; 492462306a36Sopenharmony_ci if (ei->i_file_acl && 492562306a36Sopenharmony_ci !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) { 492662306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 492762306a36Sopenharmony_ci "iget: bad extended attribute block %llu", 492862306a36Sopenharmony_ci ei->i_file_acl); 492962306a36Sopenharmony_ci ret = -EFSCORRUPTED; 493062306a36Sopenharmony_ci goto bad_inode; 493162306a36Sopenharmony_ci } else if (!ext4_has_inline_data(inode)) { 493262306a36Sopenharmony_ci /* validate the block references in the inode */ 493362306a36Sopenharmony_ci if (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) && 493462306a36Sopenharmony_ci (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 493562306a36Sopenharmony_ci (S_ISLNK(inode->i_mode) && 493662306a36Sopenharmony_ci !ext4_inode_is_fast_symlink(inode)))) { 493762306a36Sopenharmony_ci if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) 493862306a36Sopenharmony_ci ret = ext4_ext_check_inode(inode); 493962306a36Sopenharmony_ci else 494062306a36Sopenharmony_ci ret = ext4_ind_check_inode(inode); 494162306a36Sopenharmony_ci } 494262306a36Sopenharmony_ci } 494362306a36Sopenharmony_ci if (ret) 494462306a36Sopenharmony_ci goto bad_inode; 494562306a36Sopenharmony_ci 494662306a36Sopenharmony_ci if (S_ISREG(inode->i_mode)) { 494762306a36Sopenharmony_ci inode->i_op = &ext4_file_inode_operations; 494862306a36Sopenharmony_ci inode->i_fop = &ext4_file_operations; 494962306a36Sopenharmony_ci ext4_set_aops(inode); 495062306a36Sopenharmony_ci } else if (S_ISDIR(inode->i_mode)) { 495162306a36Sopenharmony_ci inode->i_op = &ext4_dir_inode_operations; 495262306a36Sopenharmony_ci inode->i_fop = &ext4_dir_operations; 495362306a36Sopenharmony_ci } else if (S_ISLNK(inode->i_mode)) { 495462306a36Sopenharmony_ci /* VFS does not allow setting these so must be corruption */ 495562306a36Sopenharmony_ci if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { 495662306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 495762306a36Sopenharmony_ci "iget: immutable or append flags " 495862306a36Sopenharmony_ci "not allowed on symlinks"); 495962306a36Sopenharmony_ci ret = -EFSCORRUPTED; 496062306a36Sopenharmony_ci goto bad_inode; 496162306a36Sopenharmony_ci } 496262306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 496362306a36Sopenharmony_ci inode->i_op = &ext4_encrypted_symlink_inode_operations; 496462306a36Sopenharmony_ci } else if (ext4_inode_is_fast_symlink(inode)) { 496562306a36Sopenharmony_ci inode->i_link = (char *)ei->i_data; 496662306a36Sopenharmony_ci inode->i_op = &ext4_fast_symlink_inode_operations; 496762306a36Sopenharmony_ci nd_terminate_link(ei->i_data, inode->i_size, 496862306a36Sopenharmony_ci sizeof(ei->i_data) - 1); 496962306a36Sopenharmony_ci } else { 497062306a36Sopenharmony_ci inode->i_op = &ext4_symlink_inode_operations; 497162306a36Sopenharmony_ci } 497262306a36Sopenharmony_ci } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 497362306a36Sopenharmony_ci S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 497462306a36Sopenharmony_ci inode->i_op = &ext4_special_inode_operations; 497562306a36Sopenharmony_ci if (raw_inode->i_block[0]) 497662306a36Sopenharmony_ci init_special_inode(inode, inode->i_mode, 497762306a36Sopenharmony_ci old_decode_dev(le32_to_cpu(raw_inode->i_block[0]))); 497862306a36Sopenharmony_ci else 497962306a36Sopenharmony_ci init_special_inode(inode, inode->i_mode, 498062306a36Sopenharmony_ci new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 498162306a36Sopenharmony_ci } else if (ino == EXT4_BOOT_LOADER_INO) { 498262306a36Sopenharmony_ci make_bad_inode(inode); 498362306a36Sopenharmony_ci } else { 498462306a36Sopenharmony_ci ret = -EFSCORRUPTED; 498562306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 498662306a36Sopenharmony_ci "iget: bogus i_mode (%o)", inode->i_mode); 498762306a36Sopenharmony_ci goto bad_inode; 498862306a36Sopenharmony_ci } 498962306a36Sopenharmony_ci if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) { 499062306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, 499162306a36Sopenharmony_ci "casefold flag without casefold feature"); 499262306a36Sopenharmony_ci ret = -EFSCORRUPTED; 499362306a36Sopenharmony_ci goto bad_inode; 499462306a36Sopenharmony_ci } 499562306a36Sopenharmony_ci if ((err_str = check_igot_inode(inode, flags)) != NULL) { 499662306a36Sopenharmony_ci ext4_error_inode(inode, function, line, 0, err_str); 499762306a36Sopenharmony_ci ret = -EFSCORRUPTED; 499862306a36Sopenharmony_ci goto bad_inode; 499962306a36Sopenharmony_ci } 500062306a36Sopenharmony_ci 500162306a36Sopenharmony_ci brelse(iloc.bh); 500262306a36Sopenharmony_ci unlock_new_inode(inode); 500362306a36Sopenharmony_ci return inode; 500462306a36Sopenharmony_ci 500562306a36Sopenharmony_cibad_inode: 500662306a36Sopenharmony_ci brelse(iloc.bh); 500762306a36Sopenharmony_ci iget_failed(inode); 500862306a36Sopenharmony_ci return ERR_PTR(ret); 500962306a36Sopenharmony_ci} 501062306a36Sopenharmony_ci 501162306a36Sopenharmony_cistatic void __ext4_update_other_inode_time(struct super_block *sb, 501262306a36Sopenharmony_ci unsigned long orig_ino, 501362306a36Sopenharmony_ci unsigned long ino, 501462306a36Sopenharmony_ci struct ext4_inode *raw_inode) 501562306a36Sopenharmony_ci{ 501662306a36Sopenharmony_ci struct inode *inode; 501762306a36Sopenharmony_ci 501862306a36Sopenharmony_ci inode = find_inode_by_ino_rcu(sb, ino); 501962306a36Sopenharmony_ci if (!inode) 502062306a36Sopenharmony_ci return; 502162306a36Sopenharmony_ci 502262306a36Sopenharmony_ci if (!inode_is_dirtytime_only(inode)) 502362306a36Sopenharmony_ci return; 502462306a36Sopenharmony_ci 502562306a36Sopenharmony_ci spin_lock(&inode->i_lock); 502662306a36Sopenharmony_ci if (inode_is_dirtytime_only(inode)) { 502762306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 502862306a36Sopenharmony_ci 502962306a36Sopenharmony_ci inode->i_state &= ~I_DIRTY_TIME; 503062306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 503162306a36Sopenharmony_ci 503262306a36Sopenharmony_ci spin_lock(&ei->i_raw_lock); 503362306a36Sopenharmony_ci EXT4_INODE_SET_CTIME(inode, raw_inode); 503462306a36Sopenharmony_ci EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode); 503562306a36Sopenharmony_ci EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 503662306a36Sopenharmony_ci ext4_inode_csum_set(inode, raw_inode, ei); 503762306a36Sopenharmony_ci spin_unlock(&ei->i_raw_lock); 503862306a36Sopenharmony_ci trace_ext4_other_inode_update_time(inode, orig_ino); 503962306a36Sopenharmony_ci return; 504062306a36Sopenharmony_ci } 504162306a36Sopenharmony_ci spin_unlock(&inode->i_lock); 504262306a36Sopenharmony_ci} 504362306a36Sopenharmony_ci 504462306a36Sopenharmony_ci/* 504562306a36Sopenharmony_ci * Opportunistically update the other time fields for other inodes in 504662306a36Sopenharmony_ci * the same inode table block. 504762306a36Sopenharmony_ci */ 504862306a36Sopenharmony_cistatic void ext4_update_other_inodes_time(struct super_block *sb, 504962306a36Sopenharmony_ci unsigned long orig_ino, char *buf) 505062306a36Sopenharmony_ci{ 505162306a36Sopenharmony_ci unsigned long ino; 505262306a36Sopenharmony_ci int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; 505362306a36Sopenharmony_ci int inode_size = EXT4_INODE_SIZE(sb); 505462306a36Sopenharmony_ci 505562306a36Sopenharmony_ci /* 505662306a36Sopenharmony_ci * Calculate the first inode in the inode table block. Inode 505762306a36Sopenharmony_ci * numbers are one-based. That is, the first inode in a block 505862306a36Sopenharmony_ci * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). 505962306a36Sopenharmony_ci */ 506062306a36Sopenharmony_ci ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; 506162306a36Sopenharmony_ci rcu_read_lock(); 506262306a36Sopenharmony_ci for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { 506362306a36Sopenharmony_ci if (ino == orig_ino) 506462306a36Sopenharmony_ci continue; 506562306a36Sopenharmony_ci __ext4_update_other_inode_time(sb, orig_ino, ino, 506662306a36Sopenharmony_ci (struct ext4_inode *)buf); 506762306a36Sopenharmony_ci } 506862306a36Sopenharmony_ci rcu_read_unlock(); 506962306a36Sopenharmony_ci} 507062306a36Sopenharmony_ci 507162306a36Sopenharmony_ci/* 507262306a36Sopenharmony_ci * Post the struct inode info into an on-disk inode location in the 507362306a36Sopenharmony_ci * buffer-cache. This gobbles the caller's reference to the 507462306a36Sopenharmony_ci * buffer_head in the inode location struct. 507562306a36Sopenharmony_ci * 507662306a36Sopenharmony_ci * The caller must have write access to iloc->bh. 507762306a36Sopenharmony_ci */ 507862306a36Sopenharmony_cistatic int ext4_do_update_inode(handle_t *handle, 507962306a36Sopenharmony_ci struct inode *inode, 508062306a36Sopenharmony_ci struct ext4_iloc *iloc) 508162306a36Sopenharmony_ci{ 508262306a36Sopenharmony_ci struct ext4_inode *raw_inode = ext4_raw_inode(iloc); 508362306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 508462306a36Sopenharmony_ci struct buffer_head *bh = iloc->bh; 508562306a36Sopenharmony_ci struct super_block *sb = inode->i_sb; 508662306a36Sopenharmony_ci int err; 508762306a36Sopenharmony_ci int need_datasync = 0, set_large_file = 0; 508862306a36Sopenharmony_ci 508962306a36Sopenharmony_ci spin_lock(&ei->i_raw_lock); 509062306a36Sopenharmony_ci 509162306a36Sopenharmony_ci /* 509262306a36Sopenharmony_ci * For fields not tracked in the in-memory inode, initialise them 509362306a36Sopenharmony_ci * to zero for new inodes. 509462306a36Sopenharmony_ci */ 509562306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) 509662306a36Sopenharmony_ci memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 509762306a36Sopenharmony_ci 509862306a36Sopenharmony_ci if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode)) 509962306a36Sopenharmony_ci need_datasync = 1; 510062306a36Sopenharmony_ci if (ei->i_disksize > 0x7fffffffULL) { 510162306a36Sopenharmony_ci if (!ext4_has_feature_large_file(sb) || 510262306a36Sopenharmony_ci EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV)) 510362306a36Sopenharmony_ci set_large_file = 1; 510462306a36Sopenharmony_ci } 510562306a36Sopenharmony_ci 510662306a36Sopenharmony_ci err = ext4_fill_raw_inode(inode, raw_inode); 510762306a36Sopenharmony_ci spin_unlock(&ei->i_raw_lock); 510862306a36Sopenharmony_ci if (err) { 510962306a36Sopenharmony_ci EXT4_ERROR_INODE(inode, "corrupted inode contents"); 511062306a36Sopenharmony_ci goto out_brelse; 511162306a36Sopenharmony_ci } 511262306a36Sopenharmony_ci 511362306a36Sopenharmony_ci if (inode->i_sb->s_flags & SB_LAZYTIME) 511462306a36Sopenharmony_ci ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, 511562306a36Sopenharmony_ci bh->b_data); 511662306a36Sopenharmony_ci 511762306a36Sopenharmony_ci BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 511862306a36Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, NULL, bh); 511962306a36Sopenharmony_ci if (err) 512062306a36Sopenharmony_ci goto out_error; 512162306a36Sopenharmony_ci ext4_clear_inode_state(inode, EXT4_STATE_NEW); 512262306a36Sopenharmony_ci if (set_large_file) { 512362306a36Sopenharmony_ci BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); 512462306a36Sopenharmony_ci err = ext4_journal_get_write_access(handle, sb, 512562306a36Sopenharmony_ci EXT4_SB(sb)->s_sbh, 512662306a36Sopenharmony_ci EXT4_JTR_NONE); 512762306a36Sopenharmony_ci if (err) 512862306a36Sopenharmony_ci goto out_error; 512962306a36Sopenharmony_ci lock_buffer(EXT4_SB(sb)->s_sbh); 513062306a36Sopenharmony_ci ext4_set_feature_large_file(sb); 513162306a36Sopenharmony_ci ext4_superblock_csum_set(sb); 513262306a36Sopenharmony_ci unlock_buffer(EXT4_SB(sb)->s_sbh); 513362306a36Sopenharmony_ci ext4_handle_sync(handle); 513462306a36Sopenharmony_ci err = ext4_handle_dirty_metadata(handle, NULL, 513562306a36Sopenharmony_ci EXT4_SB(sb)->s_sbh); 513662306a36Sopenharmony_ci } 513762306a36Sopenharmony_ci ext4_update_inode_fsync_trans(handle, inode, need_datasync); 513862306a36Sopenharmony_ciout_error: 513962306a36Sopenharmony_ci ext4_std_error(inode->i_sb, err); 514062306a36Sopenharmony_ciout_brelse: 514162306a36Sopenharmony_ci brelse(bh); 514262306a36Sopenharmony_ci return err; 514362306a36Sopenharmony_ci} 514462306a36Sopenharmony_ci 514562306a36Sopenharmony_ci/* 514662306a36Sopenharmony_ci * ext4_write_inode() 514762306a36Sopenharmony_ci * 514862306a36Sopenharmony_ci * We are called from a few places: 514962306a36Sopenharmony_ci * 515062306a36Sopenharmony_ci * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. 515162306a36Sopenharmony_ci * Here, there will be no transaction running. We wait for any running 515262306a36Sopenharmony_ci * transaction to commit. 515362306a36Sopenharmony_ci * 515462306a36Sopenharmony_ci * - Within flush work (sys_sync(), kupdate and such). 515562306a36Sopenharmony_ci * We wait on commit, if told to. 515662306a36Sopenharmony_ci * 515762306a36Sopenharmony_ci * - Within iput_final() -> write_inode_now() 515862306a36Sopenharmony_ci * We wait on commit, if told to. 515962306a36Sopenharmony_ci * 516062306a36Sopenharmony_ci * In all cases it is actually safe for us to return without doing anything, 516162306a36Sopenharmony_ci * because the inode has been copied into a raw inode buffer in 516262306a36Sopenharmony_ci * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL 516362306a36Sopenharmony_ci * writeback. 516462306a36Sopenharmony_ci * 516562306a36Sopenharmony_ci * Note that we are absolutely dependent upon all inode dirtiers doing the 516662306a36Sopenharmony_ci * right thing: they *must* call mark_inode_dirty() after dirtying info in 516762306a36Sopenharmony_ci * which we are interested. 516862306a36Sopenharmony_ci * 516962306a36Sopenharmony_ci * It would be a bug for them to not do this. The code: 517062306a36Sopenharmony_ci * 517162306a36Sopenharmony_ci * mark_inode_dirty(inode) 517262306a36Sopenharmony_ci * stuff(); 517362306a36Sopenharmony_ci * inode->i_size = expr; 517462306a36Sopenharmony_ci * 517562306a36Sopenharmony_ci * is in error because write_inode() could occur while `stuff()' is running, 517662306a36Sopenharmony_ci * and the new i_size will be lost. Plus the inode will no longer be on the 517762306a36Sopenharmony_ci * superblock's dirty inode list. 517862306a36Sopenharmony_ci */ 517962306a36Sopenharmony_ciint ext4_write_inode(struct inode *inode, struct writeback_control *wbc) 518062306a36Sopenharmony_ci{ 518162306a36Sopenharmony_ci int err; 518262306a36Sopenharmony_ci 518362306a36Sopenharmony_ci if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) 518462306a36Sopenharmony_ci return 0; 518562306a36Sopenharmony_ci 518662306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) 518762306a36Sopenharmony_ci return -EIO; 518862306a36Sopenharmony_ci 518962306a36Sopenharmony_ci if (EXT4_SB(inode->i_sb)->s_journal) { 519062306a36Sopenharmony_ci if (ext4_journal_current_handle()) { 519162306a36Sopenharmony_ci ext4_debug("called recursively, non-PF_MEMALLOC!\n"); 519262306a36Sopenharmony_ci dump_stack(); 519362306a36Sopenharmony_ci return -EIO; 519462306a36Sopenharmony_ci } 519562306a36Sopenharmony_ci 519662306a36Sopenharmony_ci /* 519762306a36Sopenharmony_ci * No need to force transaction in WB_SYNC_NONE mode. Also 519862306a36Sopenharmony_ci * ext4_sync_fs() will force the commit after everything is 519962306a36Sopenharmony_ci * written. 520062306a36Sopenharmony_ci */ 520162306a36Sopenharmony_ci if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) 520262306a36Sopenharmony_ci return 0; 520362306a36Sopenharmony_ci 520462306a36Sopenharmony_ci err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal, 520562306a36Sopenharmony_ci EXT4_I(inode)->i_sync_tid); 520662306a36Sopenharmony_ci } else { 520762306a36Sopenharmony_ci struct ext4_iloc iloc; 520862306a36Sopenharmony_ci 520962306a36Sopenharmony_ci err = __ext4_get_inode_loc_noinmem(inode, &iloc); 521062306a36Sopenharmony_ci if (err) 521162306a36Sopenharmony_ci return err; 521262306a36Sopenharmony_ci /* 521362306a36Sopenharmony_ci * sync(2) will flush the whole buffer cache. No need to do 521462306a36Sopenharmony_ci * it here separately for each inode. 521562306a36Sopenharmony_ci */ 521662306a36Sopenharmony_ci if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) 521762306a36Sopenharmony_ci sync_dirty_buffer(iloc.bh); 521862306a36Sopenharmony_ci if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { 521962306a36Sopenharmony_ci ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO, 522062306a36Sopenharmony_ci "IO error syncing inode"); 522162306a36Sopenharmony_ci err = -EIO; 522262306a36Sopenharmony_ci } 522362306a36Sopenharmony_ci brelse(iloc.bh); 522462306a36Sopenharmony_ci } 522562306a36Sopenharmony_ci return err; 522662306a36Sopenharmony_ci} 522762306a36Sopenharmony_ci 522862306a36Sopenharmony_ci/* 522962306a36Sopenharmony_ci * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate 523062306a36Sopenharmony_ci * buffers that are attached to a folio straddling i_size and are undergoing 523162306a36Sopenharmony_ci * commit. In that case we have to wait for commit to finish and try again. 523262306a36Sopenharmony_ci */ 523362306a36Sopenharmony_cistatic void ext4_wait_for_tail_page_commit(struct inode *inode) 523462306a36Sopenharmony_ci{ 523562306a36Sopenharmony_ci unsigned offset; 523662306a36Sopenharmony_ci journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 523762306a36Sopenharmony_ci tid_t commit_tid = 0; 523862306a36Sopenharmony_ci int ret; 523962306a36Sopenharmony_ci 524062306a36Sopenharmony_ci offset = inode->i_size & (PAGE_SIZE - 1); 524162306a36Sopenharmony_ci /* 524262306a36Sopenharmony_ci * If the folio is fully truncated, we don't need to wait for any commit 524362306a36Sopenharmony_ci * (and we even should not as __ext4_journalled_invalidate_folio() may 524462306a36Sopenharmony_ci * strip all buffers from the folio but keep the folio dirty which can then 524562306a36Sopenharmony_ci * confuse e.g. concurrent ext4_writepages() seeing dirty folio without 524662306a36Sopenharmony_ci * buffers). Also we don't need to wait for any commit if all buffers in 524762306a36Sopenharmony_ci * the folio remain valid. This is most beneficial for the common case of 524862306a36Sopenharmony_ci * blocksize == PAGESIZE. 524962306a36Sopenharmony_ci */ 525062306a36Sopenharmony_ci if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) 525162306a36Sopenharmony_ci return; 525262306a36Sopenharmony_ci while (1) { 525362306a36Sopenharmony_ci struct folio *folio = filemap_lock_folio(inode->i_mapping, 525462306a36Sopenharmony_ci inode->i_size >> PAGE_SHIFT); 525562306a36Sopenharmony_ci if (IS_ERR(folio)) 525662306a36Sopenharmony_ci return; 525762306a36Sopenharmony_ci ret = __ext4_journalled_invalidate_folio(folio, offset, 525862306a36Sopenharmony_ci folio_size(folio) - offset); 525962306a36Sopenharmony_ci folio_unlock(folio); 526062306a36Sopenharmony_ci folio_put(folio); 526162306a36Sopenharmony_ci if (ret != -EBUSY) 526262306a36Sopenharmony_ci return; 526362306a36Sopenharmony_ci commit_tid = 0; 526462306a36Sopenharmony_ci read_lock(&journal->j_state_lock); 526562306a36Sopenharmony_ci if (journal->j_committing_transaction) 526662306a36Sopenharmony_ci commit_tid = journal->j_committing_transaction->t_tid; 526762306a36Sopenharmony_ci read_unlock(&journal->j_state_lock); 526862306a36Sopenharmony_ci if (commit_tid) 526962306a36Sopenharmony_ci jbd2_log_wait_commit(journal, commit_tid); 527062306a36Sopenharmony_ci } 527162306a36Sopenharmony_ci} 527262306a36Sopenharmony_ci 527362306a36Sopenharmony_ci/* 527462306a36Sopenharmony_ci * ext4_setattr() 527562306a36Sopenharmony_ci * 527662306a36Sopenharmony_ci * Called from notify_change. 527762306a36Sopenharmony_ci * 527862306a36Sopenharmony_ci * We want to trap VFS attempts to truncate the file as soon as 527962306a36Sopenharmony_ci * possible. In particular, we want to make sure that when the VFS 528062306a36Sopenharmony_ci * shrinks i_size, we put the inode on the orphan list and modify 528162306a36Sopenharmony_ci * i_disksize immediately, so that during the subsequent flushing of 528262306a36Sopenharmony_ci * dirty pages and freeing of disk blocks, we can guarantee that any 528362306a36Sopenharmony_ci * commit will leave the blocks being flushed in an unused state on 528462306a36Sopenharmony_ci * disk. (On recovery, the inode will get truncated and the blocks will 528562306a36Sopenharmony_ci * be freed, so we have a strong guarantee that no future commit will 528662306a36Sopenharmony_ci * leave these blocks visible to the user.) 528762306a36Sopenharmony_ci * 528862306a36Sopenharmony_ci * Another thing we have to assure is that if we are in ordered mode 528962306a36Sopenharmony_ci * and inode is still attached to the committing transaction, we must 529062306a36Sopenharmony_ci * we start writeout of all the dirty pages which are being truncated. 529162306a36Sopenharmony_ci * This way we are sure that all the data written in the previous 529262306a36Sopenharmony_ci * transaction are already on disk (truncate waits for pages under 529362306a36Sopenharmony_ci * writeback). 529462306a36Sopenharmony_ci * 529562306a36Sopenharmony_ci * Called with inode->i_rwsem down. 529662306a36Sopenharmony_ci */ 529762306a36Sopenharmony_ciint ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 529862306a36Sopenharmony_ci struct iattr *attr) 529962306a36Sopenharmony_ci{ 530062306a36Sopenharmony_ci struct inode *inode = d_inode(dentry); 530162306a36Sopenharmony_ci int error, rc = 0; 530262306a36Sopenharmony_ci int orphan = 0; 530362306a36Sopenharmony_ci const unsigned int ia_valid = attr->ia_valid; 530462306a36Sopenharmony_ci bool inc_ivers = true; 530562306a36Sopenharmony_ci 530662306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) 530762306a36Sopenharmony_ci return -EIO; 530862306a36Sopenharmony_ci 530962306a36Sopenharmony_ci if (unlikely(IS_IMMUTABLE(inode))) 531062306a36Sopenharmony_ci return -EPERM; 531162306a36Sopenharmony_ci 531262306a36Sopenharmony_ci if (unlikely(IS_APPEND(inode) && 531362306a36Sopenharmony_ci (ia_valid & (ATTR_MODE | ATTR_UID | 531462306a36Sopenharmony_ci ATTR_GID | ATTR_TIMES_SET)))) 531562306a36Sopenharmony_ci return -EPERM; 531662306a36Sopenharmony_ci 531762306a36Sopenharmony_ci error = setattr_prepare(idmap, dentry, attr); 531862306a36Sopenharmony_ci if (error) 531962306a36Sopenharmony_ci return error; 532062306a36Sopenharmony_ci 532162306a36Sopenharmony_ci error = fscrypt_prepare_setattr(dentry, attr); 532262306a36Sopenharmony_ci if (error) 532362306a36Sopenharmony_ci return error; 532462306a36Sopenharmony_ci 532562306a36Sopenharmony_ci error = fsverity_prepare_setattr(dentry, attr); 532662306a36Sopenharmony_ci if (error) 532762306a36Sopenharmony_ci return error; 532862306a36Sopenharmony_ci 532962306a36Sopenharmony_ci if (is_quota_modification(idmap, inode, attr)) { 533062306a36Sopenharmony_ci error = dquot_initialize(inode); 533162306a36Sopenharmony_ci if (error) 533262306a36Sopenharmony_ci return error; 533362306a36Sopenharmony_ci } 533462306a36Sopenharmony_ci 533562306a36Sopenharmony_ci if (i_uid_needs_update(idmap, attr, inode) || 533662306a36Sopenharmony_ci i_gid_needs_update(idmap, attr, inode)) { 533762306a36Sopenharmony_ci handle_t *handle; 533862306a36Sopenharmony_ci 533962306a36Sopenharmony_ci /* (user+group)*(old+new) structure, inode write (sb, 534062306a36Sopenharmony_ci * inode block, ? - but truncate inode update has it) */ 534162306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 534262306a36Sopenharmony_ci (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) + 534362306a36Sopenharmony_ci EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3); 534462306a36Sopenharmony_ci if (IS_ERR(handle)) { 534562306a36Sopenharmony_ci error = PTR_ERR(handle); 534662306a36Sopenharmony_ci goto err_out; 534762306a36Sopenharmony_ci } 534862306a36Sopenharmony_ci 534962306a36Sopenharmony_ci /* dquot_transfer() calls back ext4_get_inode_usage() which 535062306a36Sopenharmony_ci * counts xattr inode references. 535162306a36Sopenharmony_ci */ 535262306a36Sopenharmony_ci down_read(&EXT4_I(inode)->xattr_sem); 535362306a36Sopenharmony_ci error = dquot_transfer(idmap, inode, attr); 535462306a36Sopenharmony_ci up_read(&EXT4_I(inode)->xattr_sem); 535562306a36Sopenharmony_ci 535662306a36Sopenharmony_ci if (error) { 535762306a36Sopenharmony_ci ext4_journal_stop(handle); 535862306a36Sopenharmony_ci return error; 535962306a36Sopenharmony_ci } 536062306a36Sopenharmony_ci /* Update corresponding info in inode so that everything is in 536162306a36Sopenharmony_ci * one transaction */ 536262306a36Sopenharmony_ci i_uid_update(idmap, attr, inode); 536362306a36Sopenharmony_ci i_gid_update(idmap, attr, inode); 536462306a36Sopenharmony_ci error = ext4_mark_inode_dirty(handle, inode); 536562306a36Sopenharmony_ci ext4_journal_stop(handle); 536662306a36Sopenharmony_ci if (unlikely(error)) { 536762306a36Sopenharmony_ci return error; 536862306a36Sopenharmony_ci } 536962306a36Sopenharmony_ci } 537062306a36Sopenharmony_ci 537162306a36Sopenharmony_ci if (attr->ia_valid & ATTR_SIZE) { 537262306a36Sopenharmony_ci handle_t *handle; 537362306a36Sopenharmony_ci loff_t oldsize = inode->i_size; 537462306a36Sopenharmony_ci loff_t old_disksize; 537562306a36Sopenharmony_ci int shrink = (attr->ia_size < inode->i_size); 537662306a36Sopenharmony_ci 537762306a36Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 537862306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 537962306a36Sopenharmony_ci 538062306a36Sopenharmony_ci if (attr->ia_size > sbi->s_bitmap_maxbytes) { 538162306a36Sopenharmony_ci return -EFBIG; 538262306a36Sopenharmony_ci } 538362306a36Sopenharmony_ci } 538462306a36Sopenharmony_ci if (!S_ISREG(inode->i_mode)) { 538562306a36Sopenharmony_ci return -EINVAL; 538662306a36Sopenharmony_ci } 538762306a36Sopenharmony_ci 538862306a36Sopenharmony_ci if (attr->ia_size == inode->i_size) 538962306a36Sopenharmony_ci inc_ivers = false; 539062306a36Sopenharmony_ci 539162306a36Sopenharmony_ci if (shrink) { 539262306a36Sopenharmony_ci if (ext4_should_order_data(inode)) { 539362306a36Sopenharmony_ci error = ext4_begin_ordered_truncate(inode, 539462306a36Sopenharmony_ci attr->ia_size); 539562306a36Sopenharmony_ci if (error) 539662306a36Sopenharmony_ci goto err_out; 539762306a36Sopenharmony_ci } 539862306a36Sopenharmony_ci /* 539962306a36Sopenharmony_ci * Blocks are going to be removed from the inode. Wait 540062306a36Sopenharmony_ci * for dio in flight. 540162306a36Sopenharmony_ci */ 540262306a36Sopenharmony_ci inode_dio_wait(inode); 540362306a36Sopenharmony_ci } 540462306a36Sopenharmony_ci 540562306a36Sopenharmony_ci filemap_invalidate_lock(inode->i_mapping); 540662306a36Sopenharmony_ci 540762306a36Sopenharmony_ci rc = ext4_break_layouts(inode); 540862306a36Sopenharmony_ci if (rc) { 540962306a36Sopenharmony_ci filemap_invalidate_unlock(inode->i_mapping); 541062306a36Sopenharmony_ci goto err_out; 541162306a36Sopenharmony_ci } 541262306a36Sopenharmony_ci 541362306a36Sopenharmony_ci if (attr->ia_size != inode->i_size) { 541462306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); 541562306a36Sopenharmony_ci if (IS_ERR(handle)) { 541662306a36Sopenharmony_ci error = PTR_ERR(handle); 541762306a36Sopenharmony_ci goto out_mmap_sem; 541862306a36Sopenharmony_ci } 541962306a36Sopenharmony_ci if (ext4_handle_valid(handle) && shrink) { 542062306a36Sopenharmony_ci error = ext4_orphan_add(handle, inode); 542162306a36Sopenharmony_ci orphan = 1; 542262306a36Sopenharmony_ci } 542362306a36Sopenharmony_ci /* 542462306a36Sopenharmony_ci * Update c/mtime on truncate up, ext4_truncate() will 542562306a36Sopenharmony_ci * update c/mtime in shrink case below 542662306a36Sopenharmony_ci */ 542762306a36Sopenharmony_ci if (!shrink) 542862306a36Sopenharmony_ci inode->i_mtime = inode_set_ctime_current(inode); 542962306a36Sopenharmony_ci 543062306a36Sopenharmony_ci if (shrink) 543162306a36Sopenharmony_ci ext4_fc_track_range(handle, inode, 543262306a36Sopenharmony_ci (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> 543362306a36Sopenharmony_ci inode->i_sb->s_blocksize_bits, 543462306a36Sopenharmony_ci EXT_MAX_BLOCKS - 1); 543562306a36Sopenharmony_ci else 543662306a36Sopenharmony_ci ext4_fc_track_range( 543762306a36Sopenharmony_ci handle, inode, 543862306a36Sopenharmony_ci (oldsize > 0 ? oldsize - 1 : oldsize) >> 543962306a36Sopenharmony_ci inode->i_sb->s_blocksize_bits, 544062306a36Sopenharmony_ci (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> 544162306a36Sopenharmony_ci inode->i_sb->s_blocksize_bits); 544262306a36Sopenharmony_ci 544362306a36Sopenharmony_ci down_write(&EXT4_I(inode)->i_data_sem); 544462306a36Sopenharmony_ci old_disksize = EXT4_I(inode)->i_disksize; 544562306a36Sopenharmony_ci EXT4_I(inode)->i_disksize = attr->ia_size; 544662306a36Sopenharmony_ci rc = ext4_mark_inode_dirty(handle, inode); 544762306a36Sopenharmony_ci if (!error) 544862306a36Sopenharmony_ci error = rc; 544962306a36Sopenharmony_ci /* 545062306a36Sopenharmony_ci * We have to update i_size under i_data_sem together 545162306a36Sopenharmony_ci * with i_disksize to avoid races with writeback code 545262306a36Sopenharmony_ci * running ext4_wb_update_i_disksize(). 545362306a36Sopenharmony_ci */ 545462306a36Sopenharmony_ci if (!error) 545562306a36Sopenharmony_ci i_size_write(inode, attr->ia_size); 545662306a36Sopenharmony_ci else 545762306a36Sopenharmony_ci EXT4_I(inode)->i_disksize = old_disksize; 545862306a36Sopenharmony_ci up_write(&EXT4_I(inode)->i_data_sem); 545962306a36Sopenharmony_ci ext4_journal_stop(handle); 546062306a36Sopenharmony_ci if (error) 546162306a36Sopenharmony_ci goto out_mmap_sem; 546262306a36Sopenharmony_ci if (!shrink) { 546362306a36Sopenharmony_ci pagecache_isize_extended(inode, oldsize, 546462306a36Sopenharmony_ci inode->i_size); 546562306a36Sopenharmony_ci } else if (ext4_should_journal_data(inode)) { 546662306a36Sopenharmony_ci ext4_wait_for_tail_page_commit(inode); 546762306a36Sopenharmony_ci } 546862306a36Sopenharmony_ci } 546962306a36Sopenharmony_ci 547062306a36Sopenharmony_ci /* 547162306a36Sopenharmony_ci * Truncate pagecache after we've waited for commit 547262306a36Sopenharmony_ci * in data=journal mode to make pages freeable. 547362306a36Sopenharmony_ci */ 547462306a36Sopenharmony_ci truncate_pagecache(inode, inode->i_size); 547562306a36Sopenharmony_ci /* 547662306a36Sopenharmony_ci * Call ext4_truncate() even if i_size didn't change to 547762306a36Sopenharmony_ci * truncate possible preallocated blocks. 547862306a36Sopenharmony_ci */ 547962306a36Sopenharmony_ci if (attr->ia_size <= oldsize) { 548062306a36Sopenharmony_ci rc = ext4_truncate(inode); 548162306a36Sopenharmony_ci if (rc) 548262306a36Sopenharmony_ci error = rc; 548362306a36Sopenharmony_ci } 548462306a36Sopenharmony_ciout_mmap_sem: 548562306a36Sopenharmony_ci filemap_invalidate_unlock(inode->i_mapping); 548662306a36Sopenharmony_ci } 548762306a36Sopenharmony_ci 548862306a36Sopenharmony_ci if (!error) { 548962306a36Sopenharmony_ci if (inc_ivers) 549062306a36Sopenharmony_ci inode_inc_iversion(inode); 549162306a36Sopenharmony_ci setattr_copy(idmap, inode, attr); 549262306a36Sopenharmony_ci mark_inode_dirty(inode); 549362306a36Sopenharmony_ci } 549462306a36Sopenharmony_ci 549562306a36Sopenharmony_ci /* 549662306a36Sopenharmony_ci * If the call to ext4_truncate failed to get a transaction handle at 549762306a36Sopenharmony_ci * all, we need to clean up the in-core orphan list manually. 549862306a36Sopenharmony_ci */ 549962306a36Sopenharmony_ci if (orphan && inode->i_nlink) 550062306a36Sopenharmony_ci ext4_orphan_del(NULL, inode); 550162306a36Sopenharmony_ci 550262306a36Sopenharmony_ci if (!error && (ia_valid & ATTR_MODE)) 550362306a36Sopenharmony_ci rc = posix_acl_chmod(idmap, dentry, inode->i_mode); 550462306a36Sopenharmony_ci 550562306a36Sopenharmony_cierr_out: 550662306a36Sopenharmony_ci if (error) 550762306a36Sopenharmony_ci ext4_std_error(inode->i_sb, error); 550862306a36Sopenharmony_ci if (!error) 550962306a36Sopenharmony_ci error = rc; 551062306a36Sopenharmony_ci return error; 551162306a36Sopenharmony_ci} 551262306a36Sopenharmony_ci 551362306a36Sopenharmony_ciu32 ext4_dio_alignment(struct inode *inode) 551462306a36Sopenharmony_ci{ 551562306a36Sopenharmony_ci if (fsverity_active(inode)) 551662306a36Sopenharmony_ci return 0; 551762306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) 551862306a36Sopenharmony_ci return 0; 551962306a36Sopenharmony_ci if (ext4_has_inline_data(inode)) 552062306a36Sopenharmony_ci return 0; 552162306a36Sopenharmony_ci if (IS_ENCRYPTED(inode)) { 552262306a36Sopenharmony_ci if (!fscrypt_dio_supported(inode)) 552362306a36Sopenharmony_ci return 0; 552462306a36Sopenharmony_ci return i_blocksize(inode); 552562306a36Sopenharmony_ci } 552662306a36Sopenharmony_ci return 1; /* use the iomap defaults */ 552762306a36Sopenharmony_ci} 552862306a36Sopenharmony_ci 552962306a36Sopenharmony_ciint ext4_getattr(struct mnt_idmap *idmap, const struct path *path, 553062306a36Sopenharmony_ci struct kstat *stat, u32 request_mask, unsigned int query_flags) 553162306a36Sopenharmony_ci{ 553262306a36Sopenharmony_ci struct inode *inode = d_inode(path->dentry); 553362306a36Sopenharmony_ci struct ext4_inode *raw_inode; 553462306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 553562306a36Sopenharmony_ci unsigned int flags; 553662306a36Sopenharmony_ci 553762306a36Sopenharmony_ci if ((request_mask & STATX_BTIME) && 553862306a36Sopenharmony_ci EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) { 553962306a36Sopenharmony_ci stat->result_mask |= STATX_BTIME; 554062306a36Sopenharmony_ci stat->btime.tv_sec = ei->i_crtime.tv_sec; 554162306a36Sopenharmony_ci stat->btime.tv_nsec = ei->i_crtime.tv_nsec; 554262306a36Sopenharmony_ci } 554362306a36Sopenharmony_ci 554462306a36Sopenharmony_ci /* 554562306a36Sopenharmony_ci * Return the DIO alignment restrictions if requested. We only return 554662306a36Sopenharmony_ci * this information when requested, since on encrypted files it might 554762306a36Sopenharmony_ci * take a fair bit of work to get if the file wasn't opened recently. 554862306a36Sopenharmony_ci */ 554962306a36Sopenharmony_ci if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) { 555062306a36Sopenharmony_ci u32 dio_align = ext4_dio_alignment(inode); 555162306a36Sopenharmony_ci 555262306a36Sopenharmony_ci stat->result_mask |= STATX_DIOALIGN; 555362306a36Sopenharmony_ci if (dio_align == 1) { 555462306a36Sopenharmony_ci struct block_device *bdev = inode->i_sb->s_bdev; 555562306a36Sopenharmony_ci 555662306a36Sopenharmony_ci /* iomap defaults */ 555762306a36Sopenharmony_ci stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; 555862306a36Sopenharmony_ci stat->dio_offset_align = bdev_logical_block_size(bdev); 555962306a36Sopenharmony_ci } else { 556062306a36Sopenharmony_ci stat->dio_mem_align = dio_align; 556162306a36Sopenharmony_ci stat->dio_offset_align = dio_align; 556262306a36Sopenharmony_ci } 556362306a36Sopenharmony_ci } 556462306a36Sopenharmony_ci 556562306a36Sopenharmony_ci flags = ei->i_flags & EXT4_FL_USER_VISIBLE; 556662306a36Sopenharmony_ci if (flags & EXT4_APPEND_FL) 556762306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_APPEND; 556862306a36Sopenharmony_ci if (flags & EXT4_COMPR_FL) 556962306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_COMPRESSED; 557062306a36Sopenharmony_ci if (flags & EXT4_ENCRYPT_FL) 557162306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_ENCRYPTED; 557262306a36Sopenharmony_ci if (flags & EXT4_IMMUTABLE_FL) 557362306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_IMMUTABLE; 557462306a36Sopenharmony_ci if (flags & EXT4_NODUMP_FL) 557562306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_NODUMP; 557662306a36Sopenharmony_ci if (flags & EXT4_VERITY_FL) 557762306a36Sopenharmony_ci stat->attributes |= STATX_ATTR_VERITY; 557862306a36Sopenharmony_ci 557962306a36Sopenharmony_ci stat->attributes_mask |= (STATX_ATTR_APPEND | 558062306a36Sopenharmony_ci STATX_ATTR_COMPRESSED | 558162306a36Sopenharmony_ci STATX_ATTR_ENCRYPTED | 558262306a36Sopenharmony_ci STATX_ATTR_IMMUTABLE | 558362306a36Sopenharmony_ci STATX_ATTR_NODUMP | 558462306a36Sopenharmony_ci STATX_ATTR_VERITY); 558562306a36Sopenharmony_ci 558662306a36Sopenharmony_ci generic_fillattr(idmap, request_mask, inode, stat); 558762306a36Sopenharmony_ci return 0; 558862306a36Sopenharmony_ci} 558962306a36Sopenharmony_ci 559062306a36Sopenharmony_ciint ext4_file_getattr(struct mnt_idmap *idmap, 559162306a36Sopenharmony_ci const struct path *path, struct kstat *stat, 559262306a36Sopenharmony_ci u32 request_mask, unsigned int query_flags) 559362306a36Sopenharmony_ci{ 559462306a36Sopenharmony_ci struct inode *inode = d_inode(path->dentry); 559562306a36Sopenharmony_ci u64 delalloc_blocks; 559662306a36Sopenharmony_ci 559762306a36Sopenharmony_ci ext4_getattr(idmap, path, stat, request_mask, query_flags); 559862306a36Sopenharmony_ci 559962306a36Sopenharmony_ci /* 560062306a36Sopenharmony_ci * If there is inline data in the inode, the inode will normally not 560162306a36Sopenharmony_ci * have data blocks allocated (it may have an external xattr block). 560262306a36Sopenharmony_ci * Report at least one sector for such files, so tools like tar, rsync, 560362306a36Sopenharmony_ci * others don't incorrectly think the file is completely sparse. 560462306a36Sopenharmony_ci */ 560562306a36Sopenharmony_ci if (unlikely(ext4_has_inline_data(inode))) 560662306a36Sopenharmony_ci stat->blocks += (stat->size + 511) >> 9; 560762306a36Sopenharmony_ci 560862306a36Sopenharmony_ci /* 560962306a36Sopenharmony_ci * We can't update i_blocks if the block allocation is delayed 561062306a36Sopenharmony_ci * otherwise in the case of system crash before the real block 561162306a36Sopenharmony_ci * allocation is done, we will have i_blocks inconsistent with 561262306a36Sopenharmony_ci * on-disk file blocks. 561362306a36Sopenharmony_ci * We always keep i_blocks updated together with real 561462306a36Sopenharmony_ci * allocation. But to not confuse with user, stat 561562306a36Sopenharmony_ci * will return the blocks that include the delayed allocation 561662306a36Sopenharmony_ci * blocks for this file. 561762306a36Sopenharmony_ci */ 561862306a36Sopenharmony_ci delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), 561962306a36Sopenharmony_ci EXT4_I(inode)->i_reserved_data_blocks); 562062306a36Sopenharmony_ci stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9); 562162306a36Sopenharmony_ci return 0; 562262306a36Sopenharmony_ci} 562362306a36Sopenharmony_ci 562462306a36Sopenharmony_cistatic int ext4_index_trans_blocks(struct inode *inode, int lblocks, 562562306a36Sopenharmony_ci int pextents) 562662306a36Sopenharmony_ci{ 562762306a36Sopenharmony_ci if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 562862306a36Sopenharmony_ci return ext4_ind_trans_blocks(inode, lblocks); 562962306a36Sopenharmony_ci return ext4_ext_index_trans_blocks(inode, pextents); 563062306a36Sopenharmony_ci} 563162306a36Sopenharmony_ci 563262306a36Sopenharmony_ci/* 563362306a36Sopenharmony_ci * Account for index blocks, block groups bitmaps and block group 563462306a36Sopenharmony_ci * descriptor blocks if modify datablocks and index blocks 563562306a36Sopenharmony_ci * worse case, the indexs blocks spread over different block groups 563662306a36Sopenharmony_ci * 563762306a36Sopenharmony_ci * If datablocks are discontiguous, they are possible to spread over 563862306a36Sopenharmony_ci * different block groups too. If they are contiguous, with flexbg, 563962306a36Sopenharmony_ci * they could still across block group boundary. 564062306a36Sopenharmony_ci * 564162306a36Sopenharmony_ci * Also account for superblock, inode, quota and xattr blocks 564262306a36Sopenharmony_ci */ 564362306a36Sopenharmony_cistatic int ext4_meta_trans_blocks(struct inode *inode, int lblocks, 564462306a36Sopenharmony_ci int pextents) 564562306a36Sopenharmony_ci{ 564662306a36Sopenharmony_ci ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); 564762306a36Sopenharmony_ci int gdpblocks; 564862306a36Sopenharmony_ci int idxblocks; 564962306a36Sopenharmony_ci int ret; 565062306a36Sopenharmony_ci 565162306a36Sopenharmony_ci /* 565262306a36Sopenharmony_ci * How many index blocks need to touch to map @lblocks logical blocks 565362306a36Sopenharmony_ci * to @pextents physical extents? 565462306a36Sopenharmony_ci */ 565562306a36Sopenharmony_ci idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); 565662306a36Sopenharmony_ci 565762306a36Sopenharmony_ci ret = idxblocks; 565862306a36Sopenharmony_ci 565962306a36Sopenharmony_ci /* 566062306a36Sopenharmony_ci * Now let's see how many group bitmaps and group descriptors need 566162306a36Sopenharmony_ci * to account 566262306a36Sopenharmony_ci */ 566362306a36Sopenharmony_ci groups = idxblocks + pextents; 566462306a36Sopenharmony_ci gdpblocks = groups; 566562306a36Sopenharmony_ci if (groups > ngroups) 566662306a36Sopenharmony_ci groups = ngroups; 566762306a36Sopenharmony_ci if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) 566862306a36Sopenharmony_ci gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; 566962306a36Sopenharmony_ci 567062306a36Sopenharmony_ci /* bitmaps and block group descriptor blocks */ 567162306a36Sopenharmony_ci ret += groups + gdpblocks; 567262306a36Sopenharmony_ci 567362306a36Sopenharmony_ci /* Blocks for super block, inode, quota and xattr blocks */ 567462306a36Sopenharmony_ci ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); 567562306a36Sopenharmony_ci 567662306a36Sopenharmony_ci return ret; 567762306a36Sopenharmony_ci} 567862306a36Sopenharmony_ci 567962306a36Sopenharmony_ci/* 568062306a36Sopenharmony_ci * Calculate the total number of credits to reserve to fit 568162306a36Sopenharmony_ci * the modification of a single pages into a single transaction, 568262306a36Sopenharmony_ci * which may include multiple chunks of block allocations. 568362306a36Sopenharmony_ci * 568462306a36Sopenharmony_ci * This could be called via ext4_write_begin() 568562306a36Sopenharmony_ci * 568662306a36Sopenharmony_ci * We need to consider the worse case, when 568762306a36Sopenharmony_ci * one new block per extent. 568862306a36Sopenharmony_ci */ 568962306a36Sopenharmony_ciint ext4_writepage_trans_blocks(struct inode *inode) 569062306a36Sopenharmony_ci{ 569162306a36Sopenharmony_ci int bpp = ext4_journal_blocks_per_page(inode); 569262306a36Sopenharmony_ci int ret; 569362306a36Sopenharmony_ci 569462306a36Sopenharmony_ci ret = ext4_meta_trans_blocks(inode, bpp, bpp); 569562306a36Sopenharmony_ci 569662306a36Sopenharmony_ci /* Account for data blocks for journalled mode */ 569762306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) 569862306a36Sopenharmony_ci ret += bpp; 569962306a36Sopenharmony_ci return ret; 570062306a36Sopenharmony_ci} 570162306a36Sopenharmony_ci 570262306a36Sopenharmony_ci/* 570362306a36Sopenharmony_ci * Calculate the journal credits for a chunk of data modification. 570462306a36Sopenharmony_ci * 570562306a36Sopenharmony_ci * This is called from DIO, fallocate or whoever calling 570662306a36Sopenharmony_ci * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks. 570762306a36Sopenharmony_ci * 570862306a36Sopenharmony_ci * journal buffers for data blocks are not included here, as DIO 570962306a36Sopenharmony_ci * and fallocate do no need to journal data buffers. 571062306a36Sopenharmony_ci */ 571162306a36Sopenharmony_ciint ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) 571262306a36Sopenharmony_ci{ 571362306a36Sopenharmony_ci return ext4_meta_trans_blocks(inode, nrblocks, 1); 571462306a36Sopenharmony_ci} 571562306a36Sopenharmony_ci 571662306a36Sopenharmony_ci/* 571762306a36Sopenharmony_ci * The caller must have previously called ext4_reserve_inode_write(). 571862306a36Sopenharmony_ci * Give this, we know that the caller already has write access to iloc->bh. 571962306a36Sopenharmony_ci */ 572062306a36Sopenharmony_ciint ext4_mark_iloc_dirty(handle_t *handle, 572162306a36Sopenharmony_ci struct inode *inode, struct ext4_iloc *iloc) 572262306a36Sopenharmony_ci{ 572362306a36Sopenharmony_ci int err = 0; 572462306a36Sopenharmony_ci 572562306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) { 572662306a36Sopenharmony_ci put_bh(iloc->bh); 572762306a36Sopenharmony_ci return -EIO; 572862306a36Sopenharmony_ci } 572962306a36Sopenharmony_ci ext4_fc_track_inode(handle, inode); 573062306a36Sopenharmony_ci 573162306a36Sopenharmony_ci /* the do_update_inode consumes one bh->b_count */ 573262306a36Sopenharmony_ci get_bh(iloc->bh); 573362306a36Sopenharmony_ci 573462306a36Sopenharmony_ci /* ext4_do_update_inode() does jbd2_journal_dirty_metadata */ 573562306a36Sopenharmony_ci err = ext4_do_update_inode(handle, inode, iloc); 573662306a36Sopenharmony_ci put_bh(iloc->bh); 573762306a36Sopenharmony_ci return err; 573862306a36Sopenharmony_ci} 573962306a36Sopenharmony_ci 574062306a36Sopenharmony_ci/* 574162306a36Sopenharmony_ci * On success, We end up with an outstanding reference count against 574262306a36Sopenharmony_ci * iloc->bh. This _must_ be cleaned up later. 574362306a36Sopenharmony_ci */ 574462306a36Sopenharmony_ci 574562306a36Sopenharmony_ciint 574662306a36Sopenharmony_ciext4_reserve_inode_write(handle_t *handle, struct inode *inode, 574762306a36Sopenharmony_ci struct ext4_iloc *iloc) 574862306a36Sopenharmony_ci{ 574962306a36Sopenharmony_ci int err; 575062306a36Sopenharmony_ci 575162306a36Sopenharmony_ci if (unlikely(ext4_forced_shutdown(inode->i_sb))) 575262306a36Sopenharmony_ci return -EIO; 575362306a36Sopenharmony_ci 575462306a36Sopenharmony_ci err = ext4_get_inode_loc(inode, iloc); 575562306a36Sopenharmony_ci if (!err) { 575662306a36Sopenharmony_ci BUFFER_TRACE(iloc->bh, "get_write_access"); 575762306a36Sopenharmony_ci err = ext4_journal_get_write_access(handle, inode->i_sb, 575862306a36Sopenharmony_ci iloc->bh, EXT4_JTR_NONE); 575962306a36Sopenharmony_ci if (err) { 576062306a36Sopenharmony_ci brelse(iloc->bh); 576162306a36Sopenharmony_ci iloc->bh = NULL; 576262306a36Sopenharmony_ci } 576362306a36Sopenharmony_ci } 576462306a36Sopenharmony_ci ext4_std_error(inode->i_sb, err); 576562306a36Sopenharmony_ci return err; 576662306a36Sopenharmony_ci} 576762306a36Sopenharmony_ci 576862306a36Sopenharmony_cistatic int __ext4_expand_extra_isize(struct inode *inode, 576962306a36Sopenharmony_ci unsigned int new_extra_isize, 577062306a36Sopenharmony_ci struct ext4_iloc *iloc, 577162306a36Sopenharmony_ci handle_t *handle, int *no_expand) 577262306a36Sopenharmony_ci{ 577362306a36Sopenharmony_ci struct ext4_inode *raw_inode; 577462306a36Sopenharmony_ci struct ext4_xattr_ibody_header *header; 577562306a36Sopenharmony_ci unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); 577662306a36Sopenharmony_ci struct ext4_inode_info *ei = EXT4_I(inode); 577762306a36Sopenharmony_ci int error; 577862306a36Sopenharmony_ci 577962306a36Sopenharmony_ci /* this was checked at iget time, but double check for good measure */ 578062306a36Sopenharmony_ci if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || 578162306a36Sopenharmony_ci (ei->i_extra_isize & 3)) { 578262306a36Sopenharmony_ci EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", 578362306a36Sopenharmony_ci ei->i_extra_isize, 578462306a36Sopenharmony_ci EXT4_INODE_SIZE(inode->i_sb)); 578562306a36Sopenharmony_ci return -EFSCORRUPTED; 578662306a36Sopenharmony_ci } 578762306a36Sopenharmony_ci if ((new_extra_isize < ei->i_extra_isize) || 578862306a36Sopenharmony_ci (new_extra_isize < 4) || 578962306a36Sopenharmony_ci (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) 579062306a36Sopenharmony_ci return -EINVAL; /* Should never happen */ 579162306a36Sopenharmony_ci 579262306a36Sopenharmony_ci raw_inode = ext4_raw_inode(iloc); 579362306a36Sopenharmony_ci 579462306a36Sopenharmony_ci header = IHDR(inode, raw_inode); 579562306a36Sopenharmony_ci 579662306a36Sopenharmony_ci /* No extended attributes present */ 579762306a36Sopenharmony_ci if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || 579862306a36Sopenharmony_ci header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { 579962306a36Sopenharmony_ci memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + 580062306a36Sopenharmony_ci EXT4_I(inode)->i_extra_isize, 0, 580162306a36Sopenharmony_ci new_extra_isize - EXT4_I(inode)->i_extra_isize); 580262306a36Sopenharmony_ci EXT4_I(inode)->i_extra_isize = new_extra_isize; 580362306a36Sopenharmony_ci return 0; 580462306a36Sopenharmony_ci } 580562306a36Sopenharmony_ci 580662306a36Sopenharmony_ci /* 580762306a36Sopenharmony_ci * We may need to allocate external xattr block so we need quotas 580862306a36Sopenharmony_ci * initialized. Here we can be called with various locks held so we 580962306a36Sopenharmony_ci * cannot affort to initialize quotas ourselves. So just bail. 581062306a36Sopenharmony_ci */ 581162306a36Sopenharmony_ci if (dquot_initialize_needed(inode)) 581262306a36Sopenharmony_ci return -EAGAIN; 581362306a36Sopenharmony_ci 581462306a36Sopenharmony_ci /* try to expand with EAs present */ 581562306a36Sopenharmony_ci error = ext4_expand_extra_isize_ea(inode, new_extra_isize, 581662306a36Sopenharmony_ci raw_inode, handle); 581762306a36Sopenharmony_ci if (error) { 581862306a36Sopenharmony_ci /* 581962306a36Sopenharmony_ci * Inode size expansion failed; don't try again 582062306a36Sopenharmony_ci */ 582162306a36Sopenharmony_ci *no_expand = 1; 582262306a36Sopenharmony_ci } 582362306a36Sopenharmony_ci 582462306a36Sopenharmony_ci return error; 582562306a36Sopenharmony_ci} 582662306a36Sopenharmony_ci 582762306a36Sopenharmony_ci/* 582862306a36Sopenharmony_ci * Expand an inode by new_extra_isize bytes. 582962306a36Sopenharmony_ci * Returns 0 on success or negative error number on failure. 583062306a36Sopenharmony_ci */ 583162306a36Sopenharmony_cistatic int ext4_try_to_expand_extra_isize(struct inode *inode, 583262306a36Sopenharmony_ci unsigned int new_extra_isize, 583362306a36Sopenharmony_ci struct ext4_iloc iloc, 583462306a36Sopenharmony_ci handle_t *handle) 583562306a36Sopenharmony_ci{ 583662306a36Sopenharmony_ci int no_expand; 583762306a36Sopenharmony_ci int error; 583862306a36Sopenharmony_ci 583962306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) 584062306a36Sopenharmony_ci return -EOVERFLOW; 584162306a36Sopenharmony_ci 584262306a36Sopenharmony_ci /* 584362306a36Sopenharmony_ci * In nojournal mode, we can immediately attempt to expand 584462306a36Sopenharmony_ci * the inode. When journaled, we first need to obtain extra 584562306a36Sopenharmony_ci * buffer credits since we may write into the EA block 584662306a36Sopenharmony_ci * with this same handle. If journal_extend fails, then it will 584762306a36Sopenharmony_ci * only result in a minor loss of functionality for that inode. 584862306a36Sopenharmony_ci * If this is felt to be critical, then e2fsck should be run to 584962306a36Sopenharmony_ci * force a large enough s_min_extra_isize. 585062306a36Sopenharmony_ci */ 585162306a36Sopenharmony_ci if (ext4_journal_extend(handle, 585262306a36Sopenharmony_ci EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0) 585362306a36Sopenharmony_ci return -ENOSPC; 585462306a36Sopenharmony_ci 585562306a36Sopenharmony_ci if (ext4_write_trylock_xattr(inode, &no_expand) == 0) 585662306a36Sopenharmony_ci return -EBUSY; 585762306a36Sopenharmony_ci 585862306a36Sopenharmony_ci error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc, 585962306a36Sopenharmony_ci handle, &no_expand); 586062306a36Sopenharmony_ci ext4_write_unlock_xattr(inode, &no_expand); 586162306a36Sopenharmony_ci 586262306a36Sopenharmony_ci return error; 586362306a36Sopenharmony_ci} 586462306a36Sopenharmony_ci 586562306a36Sopenharmony_ciint ext4_expand_extra_isize(struct inode *inode, 586662306a36Sopenharmony_ci unsigned int new_extra_isize, 586762306a36Sopenharmony_ci struct ext4_iloc *iloc) 586862306a36Sopenharmony_ci{ 586962306a36Sopenharmony_ci handle_t *handle; 587062306a36Sopenharmony_ci int no_expand; 587162306a36Sopenharmony_ci int error, rc; 587262306a36Sopenharmony_ci 587362306a36Sopenharmony_ci if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { 587462306a36Sopenharmony_ci brelse(iloc->bh); 587562306a36Sopenharmony_ci return -EOVERFLOW; 587662306a36Sopenharmony_ci } 587762306a36Sopenharmony_ci 587862306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 587962306a36Sopenharmony_ci EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); 588062306a36Sopenharmony_ci if (IS_ERR(handle)) { 588162306a36Sopenharmony_ci error = PTR_ERR(handle); 588262306a36Sopenharmony_ci brelse(iloc->bh); 588362306a36Sopenharmony_ci return error; 588462306a36Sopenharmony_ci } 588562306a36Sopenharmony_ci 588662306a36Sopenharmony_ci ext4_write_lock_xattr(inode, &no_expand); 588762306a36Sopenharmony_ci 588862306a36Sopenharmony_ci BUFFER_TRACE(iloc->bh, "get_write_access"); 588962306a36Sopenharmony_ci error = ext4_journal_get_write_access(handle, inode->i_sb, iloc->bh, 589062306a36Sopenharmony_ci EXT4_JTR_NONE); 589162306a36Sopenharmony_ci if (error) { 589262306a36Sopenharmony_ci brelse(iloc->bh); 589362306a36Sopenharmony_ci goto out_unlock; 589462306a36Sopenharmony_ci } 589562306a36Sopenharmony_ci 589662306a36Sopenharmony_ci error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, 589762306a36Sopenharmony_ci handle, &no_expand); 589862306a36Sopenharmony_ci 589962306a36Sopenharmony_ci rc = ext4_mark_iloc_dirty(handle, inode, iloc); 590062306a36Sopenharmony_ci if (!error) 590162306a36Sopenharmony_ci error = rc; 590262306a36Sopenharmony_ci 590362306a36Sopenharmony_ciout_unlock: 590462306a36Sopenharmony_ci ext4_write_unlock_xattr(inode, &no_expand); 590562306a36Sopenharmony_ci ext4_journal_stop(handle); 590662306a36Sopenharmony_ci return error; 590762306a36Sopenharmony_ci} 590862306a36Sopenharmony_ci 590962306a36Sopenharmony_ci/* 591062306a36Sopenharmony_ci * What we do here is to mark the in-core inode as clean with respect to inode 591162306a36Sopenharmony_ci * dirtiness (it may still be data-dirty). 591262306a36Sopenharmony_ci * This means that the in-core inode may be reaped by prune_icache 591362306a36Sopenharmony_ci * without having to perform any I/O. This is a very good thing, 591462306a36Sopenharmony_ci * because *any* task may call prune_icache - even ones which 591562306a36Sopenharmony_ci * have a transaction open against a different journal. 591662306a36Sopenharmony_ci * 591762306a36Sopenharmony_ci * Is this cheating? Not really. Sure, we haven't written the 591862306a36Sopenharmony_ci * inode out, but prune_icache isn't a user-visible syncing function. 591962306a36Sopenharmony_ci * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync) 592062306a36Sopenharmony_ci * we start and wait on commits. 592162306a36Sopenharmony_ci */ 592262306a36Sopenharmony_ciint __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode, 592362306a36Sopenharmony_ci const char *func, unsigned int line) 592462306a36Sopenharmony_ci{ 592562306a36Sopenharmony_ci struct ext4_iloc iloc; 592662306a36Sopenharmony_ci struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 592762306a36Sopenharmony_ci int err; 592862306a36Sopenharmony_ci 592962306a36Sopenharmony_ci might_sleep(); 593062306a36Sopenharmony_ci trace_ext4_mark_inode_dirty(inode, _RET_IP_); 593162306a36Sopenharmony_ci err = ext4_reserve_inode_write(handle, inode, &iloc); 593262306a36Sopenharmony_ci if (err) 593362306a36Sopenharmony_ci goto out; 593462306a36Sopenharmony_ci 593562306a36Sopenharmony_ci if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize) 593662306a36Sopenharmony_ci ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize, 593762306a36Sopenharmony_ci iloc, handle); 593862306a36Sopenharmony_ci 593962306a36Sopenharmony_ci err = ext4_mark_iloc_dirty(handle, inode, &iloc); 594062306a36Sopenharmony_ciout: 594162306a36Sopenharmony_ci if (unlikely(err)) 594262306a36Sopenharmony_ci ext4_error_inode_err(inode, func, line, 0, err, 594362306a36Sopenharmony_ci "mark_inode_dirty error"); 594462306a36Sopenharmony_ci return err; 594562306a36Sopenharmony_ci} 594662306a36Sopenharmony_ci 594762306a36Sopenharmony_ci/* 594862306a36Sopenharmony_ci * ext4_dirty_inode() is called from __mark_inode_dirty() 594962306a36Sopenharmony_ci * 595062306a36Sopenharmony_ci * We're really interested in the case where a file is being extended. 595162306a36Sopenharmony_ci * i_size has been changed by generic_commit_write() and we thus need 595262306a36Sopenharmony_ci * to include the updated inode in the current transaction. 595362306a36Sopenharmony_ci * 595462306a36Sopenharmony_ci * Also, dquot_alloc_block() will always dirty the inode when blocks 595562306a36Sopenharmony_ci * are allocated to the file. 595662306a36Sopenharmony_ci * 595762306a36Sopenharmony_ci * If the inode is marked synchronous, we don't honour that here - doing 595862306a36Sopenharmony_ci * so would cause a commit on atime updates, which we don't bother doing. 595962306a36Sopenharmony_ci * We handle synchronous inodes at the highest possible level. 596062306a36Sopenharmony_ci */ 596162306a36Sopenharmony_civoid ext4_dirty_inode(struct inode *inode, int flags) 596262306a36Sopenharmony_ci{ 596362306a36Sopenharmony_ci handle_t *handle; 596462306a36Sopenharmony_ci 596562306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 596662306a36Sopenharmony_ci if (IS_ERR(handle)) 596762306a36Sopenharmony_ci return; 596862306a36Sopenharmony_ci ext4_mark_inode_dirty(handle, inode); 596962306a36Sopenharmony_ci ext4_journal_stop(handle); 597062306a36Sopenharmony_ci} 597162306a36Sopenharmony_ci 597262306a36Sopenharmony_ciint ext4_change_inode_journal_flag(struct inode *inode, int val) 597362306a36Sopenharmony_ci{ 597462306a36Sopenharmony_ci journal_t *journal; 597562306a36Sopenharmony_ci handle_t *handle; 597662306a36Sopenharmony_ci int err; 597762306a36Sopenharmony_ci int alloc_ctx; 597862306a36Sopenharmony_ci 597962306a36Sopenharmony_ci /* 598062306a36Sopenharmony_ci * We have to be very careful here: changing a data block's 598162306a36Sopenharmony_ci * journaling status dynamically is dangerous. If we write a 598262306a36Sopenharmony_ci * data block to the journal, change the status and then delete 598362306a36Sopenharmony_ci * that block, we risk forgetting to revoke the old log record 598462306a36Sopenharmony_ci * from the journal and so a subsequent replay can corrupt data. 598562306a36Sopenharmony_ci * So, first we make sure that the journal is empty and that 598662306a36Sopenharmony_ci * nobody is changing anything. 598762306a36Sopenharmony_ci */ 598862306a36Sopenharmony_ci 598962306a36Sopenharmony_ci journal = EXT4_JOURNAL(inode); 599062306a36Sopenharmony_ci if (!journal) 599162306a36Sopenharmony_ci return 0; 599262306a36Sopenharmony_ci if (is_journal_aborted(journal)) 599362306a36Sopenharmony_ci return -EROFS; 599462306a36Sopenharmony_ci 599562306a36Sopenharmony_ci /* Wait for all existing dio workers */ 599662306a36Sopenharmony_ci inode_dio_wait(inode); 599762306a36Sopenharmony_ci 599862306a36Sopenharmony_ci /* 599962306a36Sopenharmony_ci * Before flushing the journal and switching inode's aops, we have 600062306a36Sopenharmony_ci * to flush all dirty data the inode has. There can be outstanding 600162306a36Sopenharmony_ci * delayed allocations, there can be unwritten extents created by 600262306a36Sopenharmony_ci * fallocate or buffered writes in dioread_nolock mode covered by 600362306a36Sopenharmony_ci * dirty data which can be converted only after flushing the dirty 600462306a36Sopenharmony_ci * data (and journalled aops don't know how to handle these cases). 600562306a36Sopenharmony_ci */ 600662306a36Sopenharmony_ci if (val) { 600762306a36Sopenharmony_ci filemap_invalidate_lock(inode->i_mapping); 600862306a36Sopenharmony_ci err = filemap_write_and_wait(inode->i_mapping); 600962306a36Sopenharmony_ci if (err < 0) { 601062306a36Sopenharmony_ci filemap_invalidate_unlock(inode->i_mapping); 601162306a36Sopenharmony_ci return err; 601262306a36Sopenharmony_ci } 601362306a36Sopenharmony_ci } 601462306a36Sopenharmony_ci 601562306a36Sopenharmony_ci alloc_ctx = ext4_writepages_down_write(inode->i_sb); 601662306a36Sopenharmony_ci jbd2_journal_lock_updates(journal); 601762306a36Sopenharmony_ci 601862306a36Sopenharmony_ci /* 601962306a36Sopenharmony_ci * OK, there are no updates running now, and all cached data is 602062306a36Sopenharmony_ci * synced to disk. We are now in a completely consistent state 602162306a36Sopenharmony_ci * which doesn't have anything in the journal, and we know that 602262306a36Sopenharmony_ci * no filesystem updates are running, so it is safe to modify 602362306a36Sopenharmony_ci * the inode's in-core data-journaling state flag now. 602462306a36Sopenharmony_ci */ 602562306a36Sopenharmony_ci 602662306a36Sopenharmony_ci if (val) 602762306a36Sopenharmony_ci ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 602862306a36Sopenharmony_ci else { 602962306a36Sopenharmony_ci err = jbd2_journal_flush(journal, 0); 603062306a36Sopenharmony_ci if (err < 0) { 603162306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal); 603262306a36Sopenharmony_ci ext4_writepages_up_write(inode->i_sb, alloc_ctx); 603362306a36Sopenharmony_ci return err; 603462306a36Sopenharmony_ci } 603562306a36Sopenharmony_ci ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 603662306a36Sopenharmony_ci } 603762306a36Sopenharmony_ci ext4_set_aops(inode); 603862306a36Sopenharmony_ci 603962306a36Sopenharmony_ci jbd2_journal_unlock_updates(journal); 604062306a36Sopenharmony_ci ext4_writepages_up_write(inode->i_sb, alloc_ctx); 604162306a36Sopenharmony_ci 604262306a36Sopenharmony_ci if (val) 604362306a36Sopenharmony_ci filemap_invalidate_unlock(inode->i_mapping); 604462306a36Sopenharmony_ci 604562306a36Sopenharmony_ci /* Finally we can mark the inode as dirty. */ 604662306a36Sopenharmony_ci 604762306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 604862306a36Sopenharmony_ci if (IS_ERR(handle)) 604962306a36Sopenharmony_ci return PTR_ERR(handle); 605062306a36Sopenharmony_ci 605162306a36Sopenharmony_ci ext4_fc_mark_ineligible(inode->i_sb, 605262306a36Sopenharmony_ci EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle); 605362306a36Sopenharmony_ci err = ext4_mark_inode_dirty(handle, inode); 605462306a36Sopenharmony_ci ext4_handle_sync(handle); 605562306a36Sopenharmony_ci ext4_journal_stop(handle); 605662306a36Sopenharmony_ci ext4_std_error(inode->i_sb, err); 605762306a36Sopenharmony_ci 605862306a36Sopenharmony_ci return err; 605962306a36Sopenharmony_ci} 606062306a36Sopenharmony_ci 606162306a36Sopenharmony_cistatic int ext4_bh_unmapped(handle_t *handle, struct inode *inode, 606262306a36Sopenharmony_ci struct buffer_head *bh) 606362306a36Sopenharmony_ci{ 606462306a36Sopenharmony_ci return !buffer_mapped(bh); 606562306a36Sopenharmony_ci} 606662306a36Sopenharmony_ci 606762306a36Sopenharmony_civm_fault_t ext4_page_mkwrite(struct vm_fault *vmf) 606862306a36Sopenharmony_ci{ 606962306a36Sopenharmony_ci struct vm_area_struct *vma = vmf->vma; 607062306a36Sopenharmony_ci struct folio *folio = page_folio(vmf->page); 607162306a36Sopenharmony_ci loff_t size; 607262306a36Sopenharmony_ci unsigned long len; 607362306a36Sopenharmony_ci int err; 607462306a36Sopenharmony_ci vm_fault_t ret; 607562306a36Sopenharmony_ci struct file *file = vma->vm_file; 607662306a36Sopenharmony_ci struct inode *inode = file_inode(file); 607762306a36Sopenharmony_ci struct address_space *mapping = inode->i_mapping; 607862306a36Sopenharmony_ci handle_t *handle; 607962306a36Sopenharmony_ci get_block_t *get_block; 608062306a36Sopenharmony_ci int retries = 0; 608162306a36Sopenharmony_ci 608262306a36Sopenharmony_ci if (unlikely(IS_IMMUTABLE(inode))) 608362306a36Sopenharmony_ci return VM_FAULT_SIGBUS; 608462306a36Sopenharmony_ci 608562306a36Sopenharmony_ci sb_start_pagefault(inode->i_sb); 608662306a36Sopenharmony_ci file_update_time(vma->vm_file); 608762306a36Sopenharmony_ci 608862306a36Sopenharmony_ci filemap_invalidate_lock_shared(mapping); 608962306a36Sopenharmony_ci 609062306a36Sopenharmony_ci err = ext4_convert_inline_data(inode); 609162306a36Sopenharmony_ci if (err) 609262306a36Sopenharmony_ci goto out_ret; 609362306a36Sopenharmony_ci 609462306a36Sopenharmony_ci /* 609562306a36Sopenharmony_ci * On data journalling we skip straight to the transaction handle: 609662306a36Sopenharmony_ci * there's no delalloc; page truncated will be checked later; the 609762306a36Sopenharmony_ci * early return w/ all buffers mapped (calculates size/len) can't 609862306a36Sopenharmony_ci * be used; and there's no dioread_nolock, so only ext4_get_block. 609962306a36Sopenharmony_ci */ 610062306a36Sopenharmony_ci if (ext4_should_journal_data(inode)) 610162306a36Sopenharmony_ci goto retry_alloc; 610262306a36Sopenharmony_ci 610362306a36Sopenharmony_ci /* Delalloc case is easy... */ 610462306a36Sopenharmony_ci if (test_opt(inode->i_sb, DELALLOC) && 610562306a36Sopenharmony_ci !ext4_nonda_switch(inode->i_sb)) { 610662306a36Sopenharmony_ci do { 610762306a36Sopenharmony_ci err = block_page_mkwrite(vma, vmf, 610862306a36Sopenharmony_ci ext4_da_get_block_prep); 610962306a36Sopenharmony_ci } while (err == -ENOSPC && 611062306a36Sopenharmony_ci ext4_should_retry_alloc(inode->i_sb, &retries)); 611162306a36Sopenharmony_ci goto out_ret; 611262306a36Sopenharmony_ci } 611362306a36Sopenharmony_ci 611462306a36Sopenharmony_ci folio_lock(folio); 611562306a36Sopenharmony_ci size = i_size_read(inode); 611662306a36Sopenharmony_ci /* Page got truncated from under us? */ 611762306a36Sopenharmony_ci if (folio->mapping != mapping || folio_pos(folio) > size) { 611862306a36Sopenharmony_ci folio_unlock(folio); 611962306a36Sopenharmony_ci ret = VM_FAULT_NOPAGE; 612062306a36Sopenharmony_ci goto out; 612162306a36Sopenharmony_ci } 612262306a36Sopenharmony_ci 612362306a36Sopenharmony_ci len = folio_size(folio); 612462306a36Sopenharmony_ci if (folio_pos(folio) + len > size) 612562306a36Sopenharmony_ci len = size - folio_pos(folio); 612662306a36Sopenharmony_ci /* 612762306a36Sopenharmony_ci * Return if we have all the buffers mapped. This avoids the need to do 612862306a36Sopenharmony_ci * journal_start/journal_stop which can block and take a long time 612962306a36Sopenharmony_ci * 613062306a36Sopenharmony_ci * This cannot be done for data journalling, as we have to add the 613162306a36Sopenharmony_ci * inode to the transaction's list to writeprotect pages on commit. 613262306a36Sopenharmony_ci */ 613362306a36Sopenharmony_ci if (folio_buffers(folio)) { 613462306a36Sopenharmony_ci if (!ext4_walk_page_buffers(NULL, inode, folio_buffers(folio), 613562306a36Sopenharmony_ci 0, len, NULL, 613662306a36Sopenharmony_ci ext4_bh_unmapped)) { 613762306a36Sopenharmony_ci /* Wait so that we don't change page under IO */ 613862306a36Sopenharmony_ci folio_wait_stable(folio); 613962306a36Sopenharmony_ci ret = VM_FAULT_LOCKED; 614062306a36Sopenharmony_ci goto out; 614162306a36Sopenharmony_ci } 614262306a36Sopenharmony_ci } 614362306a36Sopenharmony_ci folio_unlock(folio); 614462306a36Sopenharmony_ci /* OK, we need to fill the hole... */ 614562306a36Sopenharmony_ci if (ext4_should_dioread_nolock(inode)) 614662306a36Sopenharmony_ci get_block = ext4_get_block_unwritten; 614762306a36Sopenharmony_ci else 614862306a36Sopenharmony_ci get_block = ext4_get_block; 614962306a36Sopenharmony_ciretry_alloc: 615062306a36Sopenharmony_ci handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 615162306a36Sopenharmony_ci ext4_writepage_trans_blocks(inode)); 615262306a36Sopenharmony_ci if (IS_ERR(handle)) { 615362306a36Sopenharmony_ci ret = VM_FAULT_SIGBUS; 615462306a36Sopenharmony_ci goto out; 615562306a36Sopenharmony_ci } 615662306a36Sopenharmony_ci /* 615762306a36Sopenharmony_ci * Data journalling can't use block_page_mkwrite() because it 615862306a36Sopenharmony_ci * will set_buffer_dirty() before do_journal_get_write_access() 615962306a36Sopenharmony_ci * thus might hit warning messages for dirty metadata buffers. 616062306a36Sopenharmony_ci */ 616162306a36Sopenharmony_ci if (!ext4_should_journal_data(inode)) { 616262306a36Sopenharmony_ci err = block_page_mkwrite(vma, vmf, get_block); 616362306a36Sopenharmony_ci } else { 616462306a36Sopenharmony_ci folio_lock(folio); 616562306a36Sopenharmony_ci size = i_size_read(inode); 616662306a36Sopenharmony_ci /* Page got truncated from under us? */ 616762306a36Sopenharmony_ci if (folio->mapping != mapping || folio_pos(folio) > size) { 616862306a36Sopenharmony_ci ret = VM_FAULT_NOPAGE; 616962306a36Sopenharmony_ci goto out_error; 617062306a36Sopenharmony_ci } 617162306a36Sopenharmony_ci 617262306a36Sopenharmony_ci len = folio_size(folio); 617362306a36Sopenharmony_ci if (folio_pos(folio) + len > size) 617462306a36Sopenharmony_ci len = size - folio_pos(folio); 617562306a36Sopenharmony_ci 617662306a36Sopenharmony_ci err = __block_write_begin(&folio->page, 0, len, ext4_get_block); 617762306a36Sopenharmony_ci if (!err) { 617862306a36Sopenharmony_ci ret = VM_FAULT_SIGBUS; 617962306a36Sopenharmony_ci if (ext4_journal_folio_buffers(handle, folio, len)) 618062306a36Sopenharmony_ci goto out_error; 618162306a36Sopenharmony_ci } else { 618262306a36Sopenharmony_ci folio_unlock(folio); 618362306a36Sopenharmony_ci } 618462306a36Sopenharmony_ci } 618562306a36Sopenharmony_ci ext4_journal_stop(handle); 618662306a36Sopenharmony_ci if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 618762306a36Sopenharmony_ci goto retry_alloc; 618862306a36Sopenharmony_ciout_ret: 618962306a36Sopenharmony_ci ret = vmf_fs_error(err); 619062306a36Sopenharmony_ciout: 619162306a36Sopenharmony_ci filemap_invalidate_unlock_shared(mapping); 619262306a36Sopenharmony_ci sb_end_pagefault(inode->i_sb); 619362306a36Sopenharmony_ci return ret; 619462306a36Sopenharmony_ciout_error: 619562306a36Sopenharmony_ci folio_unlock(folio); 619662306a36Sopenharmony_ci ext4_journal_stop(handle); 619762306a36Sopenharmony_ci goto out; 619862306a36Sopenharmony_ci} 6199