162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *  linux/fs/ext4/inode.c
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 1992, 1993, 1994, 1995
662306a36Sopenharmony_ci * Remy Card (card@masi.ibp.fr)
762306a36Sopenharmony_ci * Laboratoire MASI - Institut Blaise Pascal
862306a36Sopenharmony_ci * Universite Pierre et Marie Curie (Paris VI)
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci *  from
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci *  linux/fs/minix/inode.c
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci *  Copyright (C) 1991, 1992  Linus Torvalds
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci *  64-bit file support on 64-bit platforms by Jakub Jelinek
1762306a36Sopenharmony_ci *	(jj@sunsite.ms.mff.cuni.cz)
1862306a36Sopenharmony_ci *
1962306a36Sopenharmony_ci *  Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include <linux/fs.h>
2362306a36Sopenharmony_ci#include <linux/mount.h>
2462306a36Sopenharmony_ci#include <linux/time.h>
2562306a36Sopenharmony_ci#include <linux/highuid.h>
2662306a36Sopenharmony_ci#include <linux/pagemap.h>
2762306a36Sopenharmony_ci#include <linux/dax.h>
2862306a36Sopenharmony_ci#include <linux/quotaops.h>
2962306a36Sopenharmony_ci#include <linux/string.h>
3062306a36Sopenharmony_ci#include <linux/buffer_head.h>
3162306a36Sopenharmony_ci#include <linux/writeback.h>
3262306a36Sopenharmony_ci#include <linux/pagevec.h>
3362306a36Sopenharmony_ci#include <linux/mpage.h>
3462306a36Sopenharmony_ci#include <linux/namei.h>
3562306a36Sopenharmony_ci#include <linux/uio.h>
3662306a36Sopenharmony_ci#include <linux/bio.h>
3762306a36Sopenharmony_ci#include <linux/workqueue.h>
3862306a36Sopenharmony_ci#include <linux/kernel.h>
3962306a36Sopenharmony_ci#include <linux/printk.h>
4062306a36Sopenharmony_ci#include <linux/slab.h>
4162306a36Sopenharmony_ci#include <linux/bitops.h>
4262306a36Sopenharmony_ci#include <linux/iomap.h>
4362306a36Sopenharmony_ci#include <linux/iversion.h>
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci#include "ext4_jbd2.h"
4662306a36Sopenharmony_ci#include "xattr.h"
4762306a36Sopenharmony_ci#include "acl.h"
4862306a36Sopenharmony_ci#include "truncate.h"
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci#include <trace/events/ext4.h>
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_cistatic __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
5362306a36Sopenharmony_ci			      struct ext4_inode_info *ei)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5662306a36Sopenharmony_ci	__u32 csum;
5762306a36Sopenharmony_ci	__u16 dummy_csum = 0;
5862306a36Sopenharmony_ci	int offset = offsetof(struct ext4_inode, i_checksum_lo);
5962306a36Sopenharmony_ci	unsigned int csum_size = sizeof(dummy_csum);
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset);
6262306a36Sopenharmony_ci	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size);
6362306a36Sopenharmony_ci	offset += csum_size;
6462306a36Sopenharmony_ci	csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
6562306a36Sopenharmony_ci			   EXT4_GOOD_OLD_INODE_SIZE - offset);
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
6862306a36Sopenharmony_ci		offset = offsetof(struct ext4_inode, i_checksum_hi);
6962306a36Sopenharmony_ci		csum = ext4_chksum(sbi, csum, (__u8 *)raw +
7062306a36Sopenharmony_ci				   EXT4_GOOD_OLD_INODE_SIZE,
7162306a36Sopenharmony_ci				   offset - EXT4_GOOD_OLD_INODE_SIZE);
7262306a36Sopenharmony_ci		if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
7362306a36Sopenharmony_ci			csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
7462306a36Sopenharmony_ci					   csum_size);
7562306a36Sopenharmony_ci			offset += csum_size;
7662306a36Sopenharmony_ci		}
7762306a36Sopenharmony_ci		csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
7862306a36Sopenharmony_ci				   EXT4_INODE_SIZE(inode->i_sb) - offset);
7962306a36Sopenharmony_ci	}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	return csum;
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
8562306a36Sopenharmony_ci				  struct ext4_inode_info *ei)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	__u32 provided, calculated;
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
9062306a36Sopenharmony_ci	    cpu_to_le32(EXT4_OS_LINUX) ||
9162306a36Sopenharmony_ci	    !ext4_has_metadata_csum(inode->i_sb))
9262306a36Sopenharmony_ci		return 1;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	provided = le16_to_cpu(raw->i_checksum_lo);
9562306a36Sopenharmony_ci	calculated = ext4_inode_csum(inode, raw, ei);
9662306a36Sopenharmony_ci	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
9762306a36Sopenharmony_ci	    EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
9862306a36Sopenharmony_ci		provided |= ((__u32)le16_to_cpu(raw->i_checksum_hi)) << 16;
9962306a36Sopenharmony_ci	else
10062306a36Sopenharmony_ci		calculated &= 0xFFFF;
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	return provided == calculated;
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_civoid ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
10662306a36Sopenharmony_ci			 struct ext4_inode_info *ei)
10762306a36Sopenharmony_ci{
10862306a36Sopenharmony_ci	__u32 csum;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
11162306a36Sopenharmony_ci	    cpu_to_le32(EXT4_OS_LINUX) ||
11262306a36Sopenharmony_ci	    !ext4_has_metadata_csum(inode->i_sb))
11362306a36Sopenharmony_ci		return;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	csum = ext4_inode_csum(inode, raw, ei);
11662306a36Sopenharmony_ci	raw->i_checksum_lo = cpu_to_le16(csum & 0xFFFF);
11762306a36Sopenharmony_ci	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
11862306a36Sopenharmony_ci	    EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
11962306a36Sopenharmony_ci		raw->i_checksum_hi = cpu_to_le16(csum >> 16);
12062306a36Sopenharmony_ci}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic inline int ext4_begin_ordered_truncate(struct inode *inode,
12362306a36Sopenharmony_ci					      loff_t new_size)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	trace_ext4_begin_ordered_truncate(inode, new_size);
12662306a36Sopenharmony_ci	/*
12762306a36Sopenharmony_ci	 * If jinode is zero, then we never opened the file for
12862306a36Sopenharmony_ci	 * writing, so there's no need to call
12962306a36Sopenharmony_ci	 * jbd2_journal_begin_ordered_truncate() since there's no
13062306a36Sopenharmony_ci	 * outstanding writes we need to flush.
13162306a36Sopenharmony_ci	 */
13262306a36Sopenharmony_ci	if (!EXT4_I(inode)->jinode)
13362306a36Sopenharmony_ci		return 0;
13462306a36Sopenharmony_ci	return jbd2_journal_begin_ordered_truncate(EXT4_JOURNAL(inode),
13562306a36Sopenharmony_ci						   EXT4_I(inode)->jinode,
13662306a36Sopenharmony_ci						   new_size);
13762306a36Sopenharmony_ci}
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_cistatic int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
14062306a36Sopenharmony_ci				  int pextents);
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci/*
14362306a36Sopenharmony_ci * Test whether an inode is a fast symlink.
14462306a36Sopenharmony_ci * A fast symlink has its symlink data stored in ext4_inode_info->i_data.
14562306a36Sopenharmony_ci */
14662306a36Sopenharmony_ciint ext4_inode_is_fast_symlink(struct inode *inode)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
14962306a36Sopenharmony_ci		int ea_blocks = EXT4_I(inode)->i_file_acl ?
15062306a36Sopenharmony_ci				EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci		if (ext4_has_inline_data(inode))
15362306a36Sopenharmony_ci			return 0;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci		return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
15662306a36Sopenharmony_ci	}
15762306a36Sopenharmony_ci	return S_ISLNK(inode->i_mode) && inode->i_size &&
15862306a36Sopenharmony_ci	       (inode->i_size < EXT4_N_BLOCKS * 4);
15962306a36Sopenharmony_ci}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci/*
16262306a36Sopenharmony_ci * Called at the last iput() if i_nlink is zero.
16362306a36Sopenharmony_ci */
16462306a36Sopenharmony_civoid ext4_evict_inode(struct inode *inode)
16562306a36Sopenharmony_ci{
16662306a36Sopenharmony_ci	handle_t *handle;
16762306a36Sopenharmony_ci	int err;
16862306a36Sopenharmony_ci	/*
16962306a36Sopenharmony_ci	 * Credits for final inode cleanup and freeing:
17062306a36Sopenharmony_ci	 * sb + inode (ext4_orphan_del()), block bitmap, group descriptor
17162306a36Sopenharmony_ci	 * (xattr block freeing), bitmap, group descriptor (inode freeing)
17262306a36Sopenharmony_ci	 */
17362306a36Sopenharmony_ci	int extra_credits = 6;
17462306a36Sopenharmony_ci	struct ext4_xattr_inode_array *ea_inode_array = NULL;
17562306a36Sopenharmony_ci	bool freeze_protected = false;
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci	trace_ext4_evict_inode(inode);
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
18062306a36Sopenharmony_ci		ext4_evict_ea_inode(inode);
18162306a36Sopenharmony_ci	if (inode->i_nlink) {
18262306a36Sopenharmony_ci		truncate_inode_pages_final(&inode->i_data);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci		goto no_delete;
18562306a36Sopenharmony_ci	}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	if (is_bad_inode(inode))
18862306a36Sopenharmony_ci		goto no_delete;
18962306a36Sopenharmony_ci	dquot_initialize(inode);
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	if (ext4_should_order_data(inode))
19262306a36Sopenharmony_ci		ext4_begin_ordered_truncate(inode, 0);
19362306a36Sopenharmony_ci	truncate_inode_pages_final(&inode->i_data);
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	/*
19662306a36Sopenharmony_ci	 * For inodes with journalled data, transaction commit could have
19762306a36Sopenharmony_ci	 * dirtied the inode. And for inodes with dioread_nolock, unwritten
19862306a36Sopenharmony_ci	 * extents converting worker could merge extents and also have dirtied
19962306a36Sopenharmony_ci	 * the inode. Flush worker is ignoring it because of I_FREEING flag but
20062306a36Sopenharmony_ci	 * we still need to remove the inode from the writeback lists.
20162306a36Sopenharmony_ci	 */
20262306a36Sopenharmony_ci	if (!list_empty_careful(&inode->i_io_list))
20362306a36Sopenharmony_ci		inode_io_list_del(inode);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	/*
20662306a36Sopenharmony_ci	 * Protect us against freezing - iput() caller didn't have to have any
20762306a36Sopenharmony_ci	 * protection against it. When we are in a running transaction though,
20862306a36Sopenharmony_ci	 * we are already protected against freezing and we cannot grab further
20962306a36Sopenharmony_ci	 * protection due to lock ordering constraints.
21062306a36Sopenharmony_ci	 */
21162306a36Sopenharmony_ci	if (!ext4_journal_current_handle()) {
21262306a36Sopenharmony_ci		sb_start_intwrite(inode->i_sb);
21362306a36Sopenharmony_ci		freeze_protected = true;
21462306a36Sopenharmony_ci	}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	if (!IS_NOQUOTA(inode))
21762306a36Sopenharmony_ci		extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	/*
22062306a36Sopenharmony_ci	 * Block bitmap, group descriptor, and inode are accounted in both
22162306a36Sopenharmony_ci	 * ext4_blocks_for_truncate() and extra_credits. So subtract 3.
22262306a36Sopenharmony_ci	 */
22362306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
22462306a36Sopenharmony_ci			 ext4_blocks_for_truncate(inode) + extra_credits - 3);
22562306a36Sopenharmony_ci	if (IS_ERR(handle)) {
22662306a36Sopenharmony_ci		ext4_std_error(inode->i_sb, PTR_ERR(handle));
22762306a36Sopenharmony_ci		/*
22862306a36Sopenharmony_ci		 * If we're going to skip the normal cleanup, we still need to
22962306a36Sopenharmony_ci		 * make sure that the in-core orphan linked list is properly
23062306a36Sopenharmony_ci		 * cleaned up.
23162306a36Sopenharmony_ci		 */
23262306a36Sopenharmony_ci		ext4_orphan_del(NULL, inode);
23362306a36Sopenharmony_ci		if (freeze_protected)
23462306a36Sopenharmony_ci			sb_end_intwrite(inode->i_sb);
23562306a36Sopenharmony_ci		goto no_delete;
23662306a36Sopenharmony_ci	}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	if (IS_SYNC(inode))
23962306a36Sopenharmony_ci		ext4_handle_sync(handle);
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	/*
24262306a36Sopenharmony_ci	 * Set inode->i_size to 0 before calling ext4_truncate(). We need
24362306a36Sopenharmony_ci	 * special handling of symlinks here because i_size is used to
24462306a36Sopenharmony_ci	 * determine whether ext4_inode_info->i_data contains symlink data or
24562306a36Sopenharmony_ci	 * block mappings. Setting i_size to 0 will remove its fast symlink
24662306a36Sopenharmony_ci	 * status. Erase i_data so that it becomes a valid empty block map.
24762306a36Sopenharmony_ci	 */
24862306a36Sopenharmony_ci	if (ext4_inode_is_fast_symlink(inode))
24962306a36Sopenharmony_ci		memset(EXT4_I(inode)->i_data, 0, sizeof(EXT4_I(inode)->i_data));
25062306a36Sopenharmony_ci	inode->i_size = 0;
25162306a36Sopenharmony_ci	err = ext4_mark_inode_dirty(handle, inode);
25262306a36Sopenharmony_ci	if (err) {
25362306a36Sopenharmony_ci		ext4_warning(inode->i_sb,
25462306a36Sopenharmony_ci			     "couldn't mark inode dirty (err %d)", err);
25562306a36Sopenharmony_ci		goto stop_handle;
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci	if (inode->i_blocks) {
25862306a36Sopenharmony_ci		err = ext4_truncate(inode);
25962306a36Sopenharmony_ci		if (err) {
26062306a36Sopenharmony_ci			ext4_error_err(inode->i_sb, -err,
26162306a36Sopenharmony_ci				       "couldn't truncate inode %lu (err %d)",
26262306a36Sopenharmony_ci				       inode->i_ino, err);
26362306a36Sopenharmony_ci			goto stop_handle;
26462306a36Sopenharmony_ci		}
26562306a36Sopenharmony_ci	}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	/* Remove xattr references. */
26862306a36Sopenharmony_ci	err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
26962306a36Sopenharmony_ci				      extra_credits);
27062306a36Sopenharmony_ci	if (err) {
27162306a36Sopenharmony_ci		ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
27262306a36Sopenharmony_cistop_handle:
27362306a36Sopenharmony_ci		ext4_journal_stop(handle);
27462306a36Sopenharmony_ci		ext4_orphan_del(NULL, inode);
27562306a36Sopenharmony_ci		if (freeze_protected)
27662306a36Sopenharmony_ci			sb_end_intwrite(inode->i_sb);
27762306a36Sopenharmony_ci		ext4_xattr_inode_array_free(ea_inode_array);
27862306a36Sopenharmony_ci		goto no_delete;
27962306a36Sopenharmony_ci	}
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	/*
28262306a36Sopenharmony_ci	 * Kill off the orphan record which ext4_truncate created.
28362306a36Sopenharmony_ci	 * AKPM: I think this can be inside the above `if'.
28462306a36Sopenharmony_ci	 * Note that ext4_orphan_del() has to be able to cope with the
28562306a36Sopenharmony_ci	 * deletion of a non-existent orphan - this is because we don't
28662306a36Sopenharmony_ci	 * know if ext4_truncate() actually created an orphan record.
28762306a36Sopenharmony_ci	 * (Well, we could do this if we need to, but heck - it works)
28862306a36Sopenharmony_ci	 */
28962306a36Sopenharmony_ci	ext4_orphan_del(handle, inode);
29062306a36Sopenharmony_ci	EXT4_I(inode)->i_dtime	= (__u32)ktime_get_real_seconds();
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	/*
29362306a36Sopenharmony_ci	 * One subtle ordering requirement: if anything has gone wrong
29462306a36Sopenharmony_ci	 * (transaction abort, IO errors, whatever), then we can still
29562306a36Sopenharmony_ci	 * do these next steps (the fs will already have been marked as
29662306a36Sopenharmony_ci	 * having errors), but we can't free the inode if the mark_dirty
29762306a36Sopenharmony_ci	 * fails.
29862306a36Sopenharmony_ci	 */
29962306a36Sopenharmony_ci	if (ext4_mark_inode_dirty(handle, inode))
30062306a36Sopenharmony_ci		/* If that failed, just do the required in-core inode clear. */
30162306a36Sopenharmony_ci		ext4_clear_inode(inode);
30262306a36Sopenharmony_ci	else
30362306a36Sopenharmony_ci		ext4_free_inode(handle, inode);
30462306a36Sopenharmony_ci	ext4_journal_stop(handle);
30562306a36Sopenharmony_ci	if (freeze_protected)
30662306a36Sopenharmony_ci		sb_end_intwrite(inode->i_sb);
30762306a36Sopenharmony_ci	ext4_xattr_inode_array_free(ea_inode_array);
30862306a36Sopenharmony_ci	return;
30962306a36Sopenharmony_cino_delete:
31062306a36Sopenharmony_ci	/*
31162306a36Sopenharmony_ci	 * Check out some where else accidentally dirty the evicting inode,
31262306a36Sopenharmony_ci	 * which may probably cause inode use-after-free issues later.
31362306a36Sopenharmony_ci	 */
31462306a36Sopenharmony_ci	WARN_ON_ONCE(!list_empty_careful(&inode->i_io_list));
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	if (!list_empty(&EXT4_I(inode)->i_fc_list))
31762306a36Sopenharmony_ci		ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
31862306a36Sopenharmony_ci	ext4_clear_inode(inode);	/* We must guarantee clearing of inode... */
31962306a36Sopenharmony_ci}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci#ifdef CONFIG_QUOTA
32262306a36Sopenharmony_ciqsize_t *ext4_get_reserved_space(struct inode *inode)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	return &EXT4_I(inode)->i_reserved_quota;
32562306a36Sopenharmony_ci}
32662306a36Sopenharmony_ci#endif
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci/*
32962306a36Sopenharmony_ci * Called with i_data_sem down, which is important since we can call
33062306a36Sopenharmony_ci * ext4_discard_preallocations() from here.
33162306a36Sopenharmony_ci */
33262306a36Sopenharmony_civoid ext4_da_update_reserve_space(struct inode *inode,
33362306a36Sopenharmony_ci					int used, int quota_claim)
33462306a36Sopenharmony_ci{
33562306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
33662306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	spin_lock(&ei->i_block_reservation_lock);
33962306a36Sopenharmony_ci	trace_ext4_da_update_reserve_space(inode, used, quota_claim);
34062306a36Sopenharmony_ci	if (unlikely(used > ei->i_reserved_data_blocks)) {
34162306a36Sopenharmony_ci		ext4_warning(inode->i_sb, "%s: ino %lu, used %d "
34262306a36Sopenharmony_ci			 "with only %d reserved data blocks",
34362306a36Sopenharmony_ci			 __func__, inode->i_ino, used,
34462306a36Sopenharmony_ci			 ei->i_reserved_data_blocks);
34562306a36Sopenharmony_ci		WARN_ON(1);
34662306a36Sopenharmony_ci		used = ei->i_reserved_data_blocks;
34762306a36Sopenharmony_ci	}
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	/* Update per-inode reservations */
35062306a36Sopenharmony_ci	ei->i_reserved_data_blocks -= used;
35162306a36Sopenharmony_ci	percpu_counter_sub(&sbi->s_dirtyclusters_counter, used);
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	spin_unlock(&ei->i_block_reservation_lock);
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	/* Update quota subsystem for data blocks */
35662306a36Sopenharmony_ci	if (quota_claim)
35762306a36Sopenharmony_ci		dquot_claim_block(inode, EXT4_C2B(sbi, used));
35862306a36Sopenharmony_ci	else {
35962306a36Sopenharmony_ci		/*
36062306a36Sopenharmony_ci		 * We did fallocate with an offset that is already delayed
36162306a36Sopenharmony_ci		 * allocated. So on delayed allocated writeback we should
36262306a36Sopenharmony_ci		 * not re-claim the quota for fallocated blocks.
36362306a36Sopenharmony_ci		 */
36462306a36Sopenharmony_ci		dquot_release_reservation_block(inode, EXT4_C2B(sbi, used));
36562306a36Sopenharmony_ci	}
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	/*
36862306a36Sopenharmony_ci	 * If we have done all the pending block allocations and if
36962306a36Sopenharmony_ci	 * there aren't any writers on the inode, we can discard the
37062306a36Sopenharmony_ci	 * inode's preallocations.
37162306a36Sopenharmony_ci	 */
37262306a36Sopenharmony_ci	if ((ei->i_reserved_data_blocks == 0) &&
37362306a36Sopenharmony_ci	    !inode_is_open_for_write(inode))
37462306a36Sopenharmony_ci		ext4_discard_preallocations(inode, 0);
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_cistatic int __check_block_validity(struct inode *inode, const char *func,
37862306a36Sopenharmony_ci				unsigned int line,
37962306a36Sopenharmony_ci				struct ext4_map_blocks *map)
38062306a36Sopenharmony_ci{
38162306a36Sopenharmony_ci	if (ext4_has_feature_journal(inode->i_sb) &&
38262306a36Sopenharmony_ci	    (inode->i_ino ==
38362306a36Sopenharmony_ci	     le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum)))
38462306a36Sopenharmony_ci		return 0;
38562306a36Sopenharmony_ci	if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) {
38662306a36Sopenharmony_ci		ext4_error_inode(inode, func, line, map->m_pblk,
38762306a36Sopenharmony_ci				 "lblock %lu mapped to illegal pblock %llu "
38862306a36Sopenharmony_ci				 "(length %d)", (unsigned long) map->m_lblk,
38962306a36Sopenharmony_ci				 map->m_pblk, map->m_len);
39062306a36Sopenharmony_ci		return -EFSCORRUPTED;
39162306a36Sopenharmony_ci	}
39262306a36Sopenharmony_ci	return 0;
39362306a36Sopenharmony_ci}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ciint ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, ext4_fsblk_t pblk,
39662306a36Sopenharmony_ci		       ext4_lblk_t len)
39762306a36Sopenharmony_ci{
39862306a36Sopenharmony_ci	int ret;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode))
40162306a36Sopenharmony_ci		return fscrypt_zeroout_range(inode, lblk, pblk, len);
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	ret = sb_issue_zeroout(inode->i_sb, pblk, len, GFP_NOFS);
40462306a36Sopenharmony_ci	if (ret > 0)
40562306a36Sopenharmony_ci		ret = 0;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	return ret;
40862306a36Sopenharmony_ci}
40962306a36Sopenharmony_ci
41062306a36Sopenharmony_ci#define check_block_validity(inode, map)	\
41162306a36Sopenharmony_ci	__check_block_validity((inode), __func__, __LINE__, (map))
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST
41462306a36Sopenharmony_cistatic void ext4_map_blocks_es_recheck(handle_t *handle,
41562306a36Sopenharmony_ci				       struct inode *inode,
41662306a36Sopenharmony_ci				       struct ext4_map_blocks *es_map,
41762306a36Sopenharmony_ci				       struct ext4_map_blocks *map,
41862306a36Sopenharmony_ci				       int flags)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	int retval;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	map->m_flags = 0;
42362306a36Sopenharmony_ci	/*
42462306a36Sopenharmony_ci	 * There is a race window that the result is not the same.
42562306a36Sopenharmony_ci	 * e.g. xfstests #223 when dioread_nolock enables.  The reason
42662306a36Sopenharmony_ci	 * is that we lookup a block mapping in extent status tree with
42762306a36Sopenharmony_ci	 * out taking i_data_sem.  So at the time the unwritten extent
42862306a36Sopenharmony_ci	 * could be converted.
42962306a36Sopenharmony_ci	 */
43062306a36Sopenharmony_ci	down_read(&EXT4_I(inode)->i_data_sem);
43162306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
43262306a36Sopenharmony_ci		retval = ext4_ext_map_blocks(handle, inode, map, 0);
43362306a36Sopenharmony_ci	} else {
43462306a36Sopenharmony_ci		retval = ext4_ind_map_blocks(handle, inode, map, 0);
43562306a36Sopenharmony_ci	}
43662306a36Sopenharmony_ci	up_read((&EXT4_I(inode)->i_data_sem));
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	/*
43962306a36Sopenharmony_ci	 * We don't check m_len because extent will be collpased in status
44062306a36Sopenharmony_ci	 * tree.  So the m_len might not equal.
44162306a36Sopenharmony_ci	 */
44262306a36Sopenharmony_ci	if (es_map->m_lblk != map->m_lblk ||
44362306a36Sopenharmony_ci	    es_map->m_flags != map->m_flags ||
44462306a36Sopenharmony_ci	    es_map->m_pblk != map->m_pblk) {
44562306a36Sopenharmony_ci		printk("ES cache assertion failed for inode: %lu "
44662306a36Sopenharmony_ci		       "es_cached ex [%d/%d/%llu/%x] != "
44762306a36Sopenharmony_ci		       "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
44862306a36Sopenharmony_ci		       inode->i_ino, es_map->m_lblk, es_map->m_len,
44962306a36Sopenharmony_ci		       es_map->m_pblk, es_map->m_flags, map->m_lblk,
45062306a36Sopenharmony_ci		       map->m_len, map->m_pblk, map->m_flags,
45162306a36Sopenharmony_ci		       retval, flags);
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci}
45462306a36Sopenharmony_ci#endif /* ES_AGGRESSIVE_TEST */
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci/*
45762306a36Sopenharmony_ci * The ext4_map_blocks() function tries to look up the requested blocks,
45862306a36Sopenharmony_ci * and returns if the blocks are already mapped.
45962306a36Sopenharmony_ci *
46062306a36Sopenharmony_ci * Otherwise it takes the write lock of the i_data_sem and allocate blocks
46162306a36Sopenharmony_ci * and store the allocated blocks in the result buffer head and mark it
46262306a36Sopenharmony_ci * mapped.
46362306a36Sopenharmony_ci *
46462306a36Sopenharmony_ci * If file type is extents based, it will call ext4_ext_map_blocks(),
46562306a36Sopenharmony_ci * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
46662306a36Sopenharmony_ci * based files
46762306a36Sopenharmony_ci *
46862306a36Sopenharmony_ci * On success, it returns the number of blocks being mapped or allocated.  if
46962306a36Sopenharmony_ci * create==0 and the blocks are pre-allocated and unwritten, the resulting @map
47062306a36Sopenharmony_ci * is marked as unwritten. If the create == 1, it will mark @map as mapped.
47162306a36Sopenharmony_ci *
47262306a36Sopenharmony_ci * It returns 0 if plain look up failed (blocks have not been allocated), in
47362306a36Sopenharmony_ci * that case, @map is returned as unmapped but we still do fill map->m_len to
47462306a36Sopenharmony_ci * indicate the length of a hole starting at map->m_lblk.
47562306a36Sopenharmony_ci *
47662306a36Sopenharmony_ci * It returns the error in case of allocation failure.
47762306a36Sopenharmony_ci */
47862306a36Sopenharmony_ciint ext4_map_blocks(handle_t *handle, struct inode *inode,
47962306a36Sopenharmony_ci		    struct ext4_map_blocks *map, int flags)
48062306a36Sopenharmony_ci{
48162306a36Sopenharmony_ci	struct extent_status es;
48262306a36Sopenharmony_ci	int retval;
48362306a36Sopenharmony_ci	int ret = 0;
48462306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST
48562306a36Sopenharmony_ci	struct ext4_map_blocks orig_map;
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	memcpy(&orig_map, map, sizeof(*map));
48862306a36Sopenharmony_ci#endif
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci	map->m_flags = 0;
49162306a36Sopenharmony_ci	ext_debug(inode, "flag 0x%x, max_blocks %u, logical block %lu\n",
49262306a36Sopenharmony_ci		  flags, map->m_len, (unsigned long) map->m_lblk);
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_ci	/*
49562306a36Sopenharmony_ci	 * ext4_map_blocks returns an int, and m_len is an unsigned int
49662306a36Sopenharmony_ci	 */
49762306a36Sopenharmony_ci	if (unlikely(map->m_len > INT_MAX))
49862306a36Sopenharmony_ci		map->m_len = INT_MAX;
49962306a36Sopenharmony_ci
50062306a36Sopenharmony_ci	/* We can handle the block number less than EXT_MAX_BLOCKS */
50162306a36Sopenharmony_ci	if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
50262306a36Sopenharmony_ci		return -EFSCORRUPTED;
50362306a36Sopenharmony_ci
50462306a36Sopenharmony_ci	/* Lookup extent status tree firstly */
50562306a36Sopenharmony_ci	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) &&
50662306a36Sopenharmony_ci	    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
50762306a36Sopenharmony_ci		if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
50862306a36Sopenharmony_ci			map->m_pblk = ext4_es_pblock(&es) +
50962306a36Sopenharmony_ci					map->m_lblk - es.es_lblk;
51062306a36Sopenharmony_ci			map->m_flags |= ext4_es_is_written(&es) ?
51162306a36Sopenharmony_ci					EXT4_MAP_MAPPED : EXT4_MAP_UNWRITTEN;
51262306a36Sopenharmony_ci			retval = es.es_len - (map->m_lblk - es.es_lblk);
51362306a36Sopenharmony_ci			if (retval > map->m_len)
51462306a36Sopenharmony_ci				retval = map->m_len;
51562306a36Sopenharmony_ci			map->m_len = retval;
51662306a36Sopenharmony_ci		} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
51762306a36Sopenharmony_ci			map->m_pblk = 0;
51862306a36Sopenharmony_ci			retval = es.es_len - (map->m_lblk - es.es_lblk);
51962306a36Sopenharmony_ci			if (retval > map->m_len)
52062306a36Sopenharmony_ci				retval = map->m_len;
52162306a36Sopenharmony_ci			map->m_len = retval;
52262306a36Sopenharmony_ci			retval = 0;
52362306a36Sopenharmony_ci		} else {
52462306a36Sopenharmony_ci			BUG();
52562306a36Sopenharmony_ci		}
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci		if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT)
52862306a36Sopenharmony_ci			return retval;
52962306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST
53062306a36Sopenharmony_ci		ext4_map_blocks_es_recheck(handle, inode, map,
53162306a36Sopenharmony_ci					   &orig_map, flags);
53262306a36Sopenharmony_ci#endif
53362306a36Sopenharmony_ci		goto found;
53462306a36Sopenharmony_ci	}
53562306a36Sopenharmony_ci	/*
53662306a36Sopenharmony_ci	 * In the query cache no-wait mode, nothing we can do more if we
53762306a36Sopenharmony_ci	 * cannot find extent in the cache.
53862306a36Sopenharmony_ci	 */
53962306a36Sopenharmony_ci	if (flags & EXT4_GET_BLOCKS_CACHED_NOWAIT)
54062306a36Sopenharmony_ci		return 0;
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	/*
54362306a36Sopenharmony_ci	 * Try to see if we can get the block without requesting a new
54462306a36Sopenharmony_ci	 * file system block.
54562306a36Sopenharmony_ci	 */
54662306a36Sopenharmony_ci	down_read(&EXT4_I(inode)->i_data_sem);
54762306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
54862306a36Sopenharmony_ci		retval = ext4_ext_map_blocks(handle, inode, map, 0);
54962306a36Sopenharmony_ci	} else {
55062306a36Sopenharmony_ci		retval = ext4_ind_map_blocks(handle, inode, map, 0);
55162306a36Sopenharmony_ci	}
55262306a36Sopenharmony_ci	if (retval > 0) {
55362306a36Sopenharmony_ci		unsigned int status;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci		if (unlikely(retval != map->m_len)) {
55662306a36Sopenharmony_ci			ext4_warning(inode->i_sb,
55762306a36Sopenharmony_ci				     "ES len assertion failed for inode "
55862306a36Sopenharmony_ci				     "%lu: retval %d != map->m_len %d",
55962306a36Sopenharmony_ci				     inode->i_ino, retval, map->m_len);
56062306a36Sopenharmony_ci			WARN_ON(1);
56162306a36Sopenharmony_ci		}
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
56462306a36Sopenharmony_ci				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
56562306a36Sopenharmony_ci		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
56662306a36Sopenharmony_ci		    !(status & EXTENT_STATUS_WRITTEN) &&
56762306a36Sopenharmony_ci		    ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
56862306a36Sopenharmony_ci				       map->m_lblk + map->m_len - 1))
56962306a36Sopenharmony_ci			status |= EXTENT_STATUS_DELAYED;
57062306a36Sopenharmony_ci		ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
57162306a36Sopenharmony_ci				      map->m_pblk, status);
57262306a36Sopenharmony_ci	}
57362306a36Sopenharmony_ci	up_read((&EXT4_I(inode)->i_data_sem));
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_cifound:
57662306a36Sopenharmony_ci	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
57762306a36Sopenharmony_ci		ret = check_block_validity(inode, map);
57862306a36Sopenharmony_ci		if (ret != 0)
57962306a36Sopenharmony_ci			return ret;
58062306a36Sopenharmony_ci	}
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	/* If it is only a block(s) look up */
58362306a36Sopenharmony_ci	if ((flags & EXT4_GET_BLOCKS_CREATE) == 0)
58462306a36Sopenharmony_ci		return retval;
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	/*
58762306a36Sopenharmony_ci	 * Returns if the blocks have already allocated
58862306a36Sopenharmony_ci	 *
58962306a36Sopenharmony_ci	 * Note that if blocks have been preallocated
59062306a36Sopenharmony_ci	 * ext4_ext_get_block() returns the create = 0
59162306a36Sopenharmony_ci	 * with buffer head unmapped.
59262306a36Sopenharmony_ci	 */
59362306a36Sopenharmony_ci	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED)
59462306a36Sopenharmony_ci		/*
59562306a36Sopenharmony_ci		 * If we need to convert extent to unwritten
59662306a36Sopenharmony_ci		 * we continue and do the actual work in
59762306a36Sopenharmony_ci		 * ext4_ext_map_blocks()
59862306a36Sopenharmony_ci		 */
59962306a36Sopenharmony_ci		if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN))
60062306a36Sopenharmony_ci			return retval;
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	/*
60362306a36Sopenharmony_ci	 * Here we clear m_flags because after allocating an new extent,
60462306a36Sopenharmony_ci	 * it will be set again.
60562306a36Sopenharmony_ci	 */
60662306a36Sopenharmony_ci	map->m_flags &= ~EXT4_MAP_FLAGS;
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci	/*
60962306a36Sopenharmony_ci	 * New blocks allocate and/or writing to unwritten extent
61062306a36Sopenharmony_ci	 * will possibly result in updating i_data, so we take
61162306a36Sopenharmony_ci	 * the write lock of i_data_sem, and call get_block()
61262306a36Sopenharmony_ci	 * with create == 1 flag.
61362306a36Sopenharmony_ci	 */
61462306a36Sopenharmony_ci	down_write(&EXT4_I(inode)->i_data_sem);
61562306a36Sopenharmony_ci
61662306a36Sopenharmony_ci	/*
61762306a36Sopenharmony_ci	 * We need to check for EXT4 here because migrate
61862306a36Sopenharmony_ci	 * could have changed the inode type in between
61962306a36Sopenharmony_ci	 */
62062306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
62162306a36Sopenharmony_ci		retval = ext4_ext_map_blocks(handle, inode, map, flags);
62262306a36Sopenharmony_ci	} else {
62362306a36Sopenharmony_ci		retval = ext4_ind_map_blocks(handle, inode, map, flags);
62462306a36Sopenharmony_ci
62562306a36Sopenharmony_ci		if (retval > 0 && map->m_flags & EXT4_MAP_NEW) {
62662306a36Sopenharmony_ci			/*
62762306a36Sopenharmony_ci			 * We allocated new blocks which will result in
62862306a36Sopenharmony_ci			 * i_data's format changing.  Force the migrate
62962306a36Sopenharmony_ci			 * to fail by clearing migrate flags
63062306a36Sopenharmony_ci			 */
63162306a36Sopenharmony_ci			ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
63262306a36Sopenharmony_ci		}
63362306a36Sopenharmony_ci	}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	if (retval > 0) {
63662306a36Sopenharmony_ci		unsigned int status;
63762306a36Sopenharmony_ci
63862306a36Sopenharmony_ci		if (unlikely(retval != map->m_len)) {
63962306a36Sopenharmony_ci			ext4_warning(inode->i_sb,
64062306a36Sopenharmony_ci				     "ES len assertion failed for inode "
64162306a36Sopenharmony_ci				     "%lu: retval %d != map->m_len %d",
64262306a36Sopenharmony_ci				     inode->i_ino, retval, map->m_len);
64362306a36Sopenharmony_ci			WARN_ON(1);
64462306a36Sopenharmony_ci		}
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci		/*
64762306a36Sopenharmony_ci		 * We have to zeroout blocks before inserting them into extent
64862306a36Sopenharmony_ci		 * status tree. Otherwise someone could look them up there and
64962306a36Sopenharmony_ci		 * use them before they are really zeroed. We also have to
65062306a36Sopenharmony_ci		 * unmap metadata before zeroing as otherwise writeback can
65162306a36Sopenharmony_ci		 * overwrite zeros with stale data from block device.
65262306a36Sopenharmony_ci		 */
65362306a36Sopenharmony_ci		if (flags & EXT4_GET_BLOCKS_ZERO &&
65462306a36Sopenharmony_ci		    map->m_flags & EXT4_MAP_MAPPED &&
65562306a36Sopenharmony_ci		    map->m_flags & EXT4_MAP_NEW) {
65662306a36Sopenharmony_ci			ret = ext4_issue_zeroout(inode, map->m_lblk,
65762306a36Sopenharmony_ci						 map->m_pblk, map->m_len);
65862306a36Sopenharmony_ci			if (ret) {
65962306a36Sopenharmony_ci				retval = ret;
66062306a36Sopenharmony_ci				goto out_sem;
66162306a36Sopenharmony_ci			}
66262306a36Sopenharmony_ci		}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci		/*
66562306a36Sopenharmony_ci		 * If the extent has been zeroed out, we don't need to update
66662306a36Sopenharmony_ci		 * extent status tree.
66762306a36Sopenharmony_ci		 */
66862306a36Sopenharmony_ci		if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
66962306a36Sopenharmony_ci		    ext4_es_lookup_extent(inode, map->m_lblk, NULL, &es)) {
67062306a36Sopenharmony_ci			if (ext4_es_is_written(&es))
67162306a36Sopenharmony_ci				goto out_sem;
67262306a36Sopenharmony_ci		}
67362306a36Sopenharmony_ci		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
67462306a36Sopenharmony_ci				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
67562306a36Sopenharmony_ci		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
67662306a36Sopenharmony_ci		    !(status & EXTENT_STATUS_WRITTEN) &&
67762306a36Sopenharmony_ci		    ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
67862306a36Sopenharmony_ci				       map->m_lblk + map->m_len - 1))
67962306a36Sopenharmony_ci			status |= EXTENT_STATUS_DELAYED;
68062306a36Sopenharmony_ci		ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
68162306a36Sopenharmony_ci				      map->m_pblk, status);
68262306a36Sopenharmony_ci	}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ciout_sem:
68562306a36Sopenharmony_ci	up_write((&EXT4_I(inode)->i_data_sem));
68662306a36Sopenharmony_ci	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
68762306a36Sopenharmony_ci		ret = check_block_validity(inode, map);
68862306a36Sopenharmony_ci		if (ret != 0)
68962306a36Sopenharmony_ci			return ret;
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci		/*
69262306a36Sopenharmony_ci		 * Inodes with freshly allocated blocks where contents will be
69362306a36Sopenharmony_ci		 * visible after transaction commit must be on transaction's
69462306a36Sopenharmony_ci		 * ordered data list.
69562306a36Sopenharmony_ci		 */
69662306a36Sopenharmony_ci		if (map->m_flags & EXT4_MAP_NEW &&
69762306a36Sopenharmony_ci		    !(map->m_flags & EXT4_MAP_UNWRITTEN) &&
69862306a36Sopenharmony_ci		    !(flags & EXT4_GET_BLOCKS_ZERO) &&
69962306a36Sopenharmony_ci		    !ext4_is_quota_file(inode) &&
70062306a36Sopenharmony_ci		    ext4_should_order_data(inode)) {
70162306a36Sopenharmony_ci			loff_t start_byte =
70262306a36Sopenharmony_ci				(loff_t)map->m_lblk << inode->i_blkbits;
70362306a36Sopenharmony_ci			loff_t length = (loff_t)map->m_len << inode->i_blkbits;
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci			if (flags & EXT4_GET_BLOCKS_IO_SUBMIT)
70662306a36Sopenharmony_ci				ret = ext4_jbd2_inode_add_wait(handle, inode,
70762306a36Sopenharmony_ci						start_byte, length);
70862306a36Sopenharmony_ci			else
70962306a36Sopenharmony_ci				ret = ext4_jbd2_inode_add_write(handle, inode,
71062306a36Sopenharmony_ci						start_byte, length);
71162306a36Sopenharmony_ci			if (ret)
71262306a36Sopenharmony_ci				return ret;
71362306a36Sopenharmony_ci		}
71462306a36Sopenharmony_ci	}
71562306a36Sopenharmony_ci	if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN ||
71662306a36Sopenharmony_ci				map->m_flags & EXT4_MAP_MAPPED))
71762306a36Sopenharmony_ci		ext4_fc_track_range(handle, inode, map->m_lblk,
71862306a36Sopenharmony_ci					map->m_lblk + map->m_len - 1);
71962306a36Sopenharmony_ci	if (retval < 0)
72062306a36Sopenharmony_ci		ext_debug(inode, "failed with err %d\n", retval);
72162306a36Sopenharmony_ci	return retval;
72262306a36Sopenharmony_ci}
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci/*
72562306a36Sopenharmony_ci * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages
72662306a36Sopenharmony_ci * we have to be careful as someone else may be manipulating b_state as well.
72762306a36Sopenharmony_ci */
72862306a36Sopenharmony_cistatic void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
72962306a36Sopenharmony_ci{
73062306a36Sopenharmony_ci	unsigned long old_state;
73162306a36Sopenharmony_ci	unsigned long new_state;
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	flags &= EXT4_MAP_FLAGS;
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	/* Dummy buffer_head? Set non-atomically. */
73662306a36Sopenharmony_ci	if (!bh->b_page) {
73762306a36Sopenharmony_ci		bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
73862306a36Sopenharmony_ci		return;
73962306a36Sopenharmony_ci	}
74062306a36Sopenharmony_ci	/*
74162306a36Sopenharmony_ci	 * Someone else may be modifying b_state. Be careful! This is ugly but
74262306a36Sopenharmony_ci	 * once we get rid of using bh as a container for mapping information
74362306a36Sopenharmony_ci	 * to pass to / from get_block functions, this can go away.
74462306a36Sopenharmony_ci	 */
74562306a36Sopenharmony_ci	old_state = READ_ONCE(bh->b_state);
74662306a36Sopenharmony_ci	do {
74762306a36Sopenharmony_ci		new_state = (old_state & ~EXT4_MAP_FLAGS) | flags;
74862306a36Sopenharmony_ci	} while (unlikely(!try_cmpxchg(&bh->b_state, &old_state, new_state)));
74962306a36Sopenharmony_ci}
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_cistatic int _ext4_get_block(struct inode *inode, sector_t iblock,
75262306a36Sopenharmony_ci			   struct buffer_head *bh, int flags)
75362306a36Sopenharmony_ci{
75462306a36Sopenharmony_ci	struct ext4_map_blocks map;
75562306a36Sopenharmony_ci	int ret = 0;
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
75862306a36Sopenharmony_ci		return -ERANGE;
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	map.m_lblk = iblock;
76162306a36Sopenharmony_ci	map.m_len = bh->b_size >> inode->i_blkbits;
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci	ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map,
76462306a36Sopenharmony_ci			      flags);
76562306a36Sopenharmony_ci	if (ret > 0) {
76662306a36Sopenharmony_ci		map_bh(bh, inode->i_sb, map.m_pblk);
76762306a36Sopenharmony_ci		ext4_update_bh_state(bh, map.m_flags);
76862306a36Sopenharmony_ci		bh->b_size = inode->i_sb->s_blocksize * map.m_len;
76962306a36Sopenharmony_ci		ret = 0;
77062306a36Sopenharmony_ci	} else if (ret == 0) {
77162306a36Sopenharmony_ci		/* hole case, need to fill in bh->b_size */
77262306a36Sopenharmony_ci		bh->b_size = inode->i_sb->s_blocksize * map.m_len;
77362306a36Sopenharmony_ci	}
77462306a36Sopenharmony_ci	return ret;
77562306a36Sopenharmony_ci}
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ciint ext4_get_block(struct inode *inode, sector_t iblock,
77862306a36Sopenharmony_ci		   struct buffer_head *bh, int create)
77962306a36Sopenharmony_ci{
78062306a36Sopenharmony_ci	return _ext4_get_block(inode, iblock, bh,
78162306a36Sopenharmony_ci			       create ? EXT4_GET_BLOCKS_CREATE : 0);
78262306a36Sopenharmony_ci}
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci/*
78562306a36Sopenharmony_ci * Get block function used when preparing for buffered write if we require
78662306a36Sopenharmony_ci * creating an unwritten extent if blocks haven't been allocated.  The extent
78762306a36Sopenharmony_ci * will be converted to written after the IO is complete.
78862306a36Sopenharmony_ci */
78962306a36Sopenharmony_ciint ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
79062306a36Sopenharmony_ci			     struct buffer_head *bh_result, int create)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	int ret = 0;
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci	ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
79562306a36Sopenharmony_ci		   inode->i_ino, create);
79662306a36Sopenharmony_ci	ret = _ext4_get_block(inode, iblock, bh_result,
79762306a36Sopenharmony_ci			       EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_ci	/*
80062306a36Sopenharmony_ci	 * If the buffer is marked unwritten, mark it as new to make sure it is
80162306a36Sopenharmony_ci	 * zeroed out correctly in case of partial writes. Otherwise, there is
80262306a36Sopenharmony_ci	 * a chance of stale data getting exposed.
80362306a36Sopenharmony_ci	 */
80462306a36Sopenharmony_ci	if (ret == 0 && buffer_unwritten(bh_result))
80562306a36Sopenharmony_ci		set_buffer_new(bh_result);
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	return ret;
80862306a36Sopenharmony_ci}
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci/* Maximum number of blocks we map for direct IO at once. */
81162306a36Sopenharmony_ci#define DIO_MAX_BLOCKS 4096
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci/*
81462306a36Sopenharmony_ci * `handle' can be NULL if create is zero
81562306a36Sopenharmony_ci */
81662306a36Sopenharmony_cistruct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
81762306a36Sopenharmony_ci				ext4_lblk_t block, int map_flags)
81862306a36Sopenharmony_ci{
81962306a36Sopenharmony_ci	struct ext4_map_blocks map;
82062306a36Sopenharmony_ci	struct buffer_head *bh;
82162306a36Sopenharmony_ci	int create = map_flags & EXT4_GET_BLOCKS_CREATE;
82262306a36Sopenharmony_ci	bool nowait = map_flags & EXT4_GET_BLOCKS_CACHED_NOWAIT;
82362306a36Sopenharmony_ci	int err;
82462306a36Sopenharmony_ci
82562306a36Sopenharmony_ci	ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
82662306a36Sopenharmony_ci		    || handle != NULL || create == 0);
82762306a36Sopenharmony_ci	ASSERT(create == 0 || !nowait);
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	map.m_lblk = block;
83062306a36Sopenharmony_ci	map.m_len = 1;
83162306a36Sopenharmony_ci	err = ext4_map_blocks(handle, inode, &map, map_flags);
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci	if (err == 0)
83462306a36Sopenharmony_ci		return create ? ERR_PTR(-ENOSPC) : NULL;
83562306a36Sopenharmony_ci	if (err < 0)
83662306a36Sopenharmony_ci		return ERR_PTR(err);
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	if (nowait)
83962306a36Sopenharmony_ci		return sb_find_get_block(inode->i_sb, map.m_pblk);
84062306a36Sopenharmony_ci
84162306a36Sopenharmony_ci	bh = sb_getblk(inode->i_sb, map.m_pblk);
84262306a36Sopenharmony_ci	if (unlikely(!bh))
84362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
84462306a36Sopenharmony_ci	if (map.m_flags & EXT4_MAP_NEW) {
84562306a36Sopenharmony_ci		ASSERT(create != 0);
84662306a36Sopenharmony_ci		ASSERT((EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
84762306a36Sopenharmony_ci			    || (handle != NULL));
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci		/*
85062306a36Sopenharmony_ci		 * Now that we do not always journal data, we should
85162306a36Sopenharmony_ci		 * keep in mind whether this should always journal the
85262306a36Sopenharmony_ci		 * new buffer as metadata.  For now, regular file
85362306a36Sopenharmony_ci		 * writes use ext4_get_block instead, so it's not a
85462306a36Sopenharmony_ci		 * problem.
85562306a36Sopenharmony_ci		 */
85662306a36Sopenharmony_ci		lock_buffer(bh);
85762306a36Sopenharmony_ci		BUFFER_TRACE(bh, "call get_create_access");
85862306a36Sopenharmony_ci		err = ext4_journal_get_create_access(handle, inode->i_sb, bh,
85962306a36Sopenharmony_ci						     EXT4_JTR_NONE);
86062306a36Sopenharmony_ci		if (unlikely(err)) {
86162306a36Sopenharmony_ci			unlock_buffer(bh);
86262306a36Sopenharmony_ci			goto errout;
86362306a36Sopenharmony_ci		}
86462306a36Sopenharmony_ci		if (!buffer_uptodate(bh)) {
86562306a36Sopenharmony_ci			memset(bh->b_data, 0, inode->i_sb->s_blocksize);
86662306a36Sopenharmony_ci			set_buffer_uptodate(bh);
86762306a36Sopenharmony_ci		}
86862306a36Sopenharmony_ci		unlock_buffer(bh);
86962306a36Sopenharmony_ci		BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
87062306a36Sopenharmony_ci		err = ext4_handle_dirty_metadata(handle, inode, bh);
87162306a36Sopenharmony_ci		if (unlikely(err))
87262306a36Sopenharmony_ci			goto errout;
87362306a36Sopenharmony_ci	} else
87462306a36Sopenharmony_ci		BUFFER_TRACE(bh, "not a new buffer");
87562306a36Sopenharmony_ci	return bh;
87662306a36Sopenharmony_cierrout:
87762306a36Sopenharmony_ci	brelse(bh);
87862306a36Sopenharmony_ci	return ERR_PTR(err);
87962306a36Sopenharmony_ci}
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_cistruct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
88262306a36Sopenharmony_ci			       ext4_lblk_t block, int map_flags)
88362306a36Sopenharmony_ci{
88462306a36Sopenharmony_ci	struct buffer_head *bh;
88562306a36Sopenharmony_ci	int ret;
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci	bh = ext4_getblk(handle, inode, block, map_flags);
88862306a36Sopenharmony_ci	if (IS_ERR(bh))
88962306a36Sopenharmony_ci		return bh;
89062306a36Sopenharmony_ci	if (!bh || ext4_buffer_uptodate(bh))
89162306a36Sopenharmony_ci		return bh;
89262306a36Sopenharmony_ci
89362306a36Sopenharmony_ci	ret = ext4_read_bh_lock(bh, REQ_META | REQ_PRIO, true);
89462306a36Sopenharmony_ci	if (ret) {
89562306a36Sopenharmony_ci		put_bh(bh);
89662306a36Sopenharmony_ci		return ERR_PTR(ret);
89762306a36Sopenharmony_ci	}
89862306a36Sopenharmony_ci	return bh;
89962306a36Sopenharmony_ci}
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci/* Read a contiguous batch of blocks. */
90262306a36Sopenharmony_ciint ext4_bread_batch(struct inode *inode, ext4_lblk_t block, int bh_count,
90362306a36Sopenharmony_ci		     bool wait, struct buffer_head **bhs)
90462306a36Sopenharmony_ci{
90562306a36Sopenharmony_ci	int i, err;
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci	for (i = 0; i < bh_count; i++) {
90862306a36Sopenharmony_ci		bhs[i] = ext4_getblk(NULL, inode, block + i, 0 /* map_flags */);
90962306a36Sopenharmony_ci		if (IS_ERR(bhs[i])) {
91062306a36Sopenharmony_ci			err = PTR_ERR(bhs[i]);
91162306a36Sopenharmony_ci			bh_count = i;
91262306a36Sopenharmony_ci			goto out_brelse;
91362306a36Sopenharmony_ci		}
91462306a36Sopenharmony_ci	}
91562306a36Sopenharmony_ci
91662306a36Sopenharmony_ci	for (i = 0; i < bh_count; i++)
91762306a36Sopenharmony_ci		/* Note that NULL bhs[i] is valid because of holes. */
91862306a36Sopenharmony_ci		if (bhs[i] && !ext4_buffer_uptodate(bhs[i]))
91962306a36Sopenharmony_ci			ext4_read_bh_lock(bhs[i], REQ_META | REQ_PRIO, false);
92062306a36Sopenharmony_ci
92162306a36Sopenharmony_ci	if (!wait)
92262306a36Sopenharmony_ci		return 0;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	for (i = 0; i < bh_count; i++)
92562306a36Sopenharmony_ci		if (bhs[i])
92662306a36Sopenharmony_ci			wait_on_buffer(bhs[i]);
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	for (i = 0; i < bh_count; i++) {
92962306a36Sopenharmony_ci		if (bhs[i] && !buffer_uptodate(bhs[i])) {
93062306a36Sopenharmony_ci			err = -EIO;
93162306a36Sopenharmony_ci			goto out_brelse;
93262306a36Sopenharmony_ci		}
93362306a36Sopenharmony_ci	}
93462306a36Sopenharmony_ci	return 0;
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ciout_brelse:
93762306a36Sopenharmony_ci	for (i = 0; i < bh_count; i++) {
93862306a36Sopenharmony_ci		brelse(bhs[i]);
93962306a36Sopenharmony_ci		bhs[i] = NULL;
94062306a36Sopenharmony_ci	}
94162306a36Sopenharmony_ci	return err;
94262306a36Sopenharmony_ci}
94362306a36Sopenharmony_ci
94462306a36Sopenharmony_ciint ext4_walk_page_buffers(handle_t *handle, struct inode *inode,
94562306a36Sopenharmony_ci			   struct buffer_head *head,
94662306a36Sopenharmony_ci			   unsigned from,
94762306a36Sopenharmony_ci			   unsigned to,
94862306a36Sopenharmony_ci			   int *partial,
94962306a36Sopenharmony_ci			   int (*fn)(handle_t *handle, struct inode *inode,
95062306a36Sopenharmony_ci				     struct buffer_head *bh))
95162306a36Sopenharmony_ci{
95262306a36Sopenharmony_ci	struct buffer_head *bh;
95362306a36Sopenharmony_ci	unsigned block_start, block_end;
95462306a36Sopenharmony_ci	unsigned blocksize = head->b_size;
95562306a36Sopenharmony_ci	int err, ret = 0;
95662306a36Sopenharmony_ci	struct buffer_head *next;
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	for (bh = head, block_start = 0;
95962306a36Sopenharmony_ci	     ret == 0 && (bh != head || !block_start);
96062306a36Sopenharmony_ci	     block_start = block_end, bh = next) {
96162306a36Sopenharmony_ci		next = bh->b_this_page;
96262306a36Sopenharmony_ci		block_end = block_start + blocksize;
96362306a36Sopenharmony_ci		if (block_end <= from || block_start >= to) {
96462306a36Sopenharmony_ci			if (partial && !buffer_uptodate(bh))
96562306a36Sopenharmony_ci				*partial = 1;
96662306a36Sopenharmony_ci			continue;
96762306a36Sopenharmony_ci		}
96862306a36Sopenharmony_ci		err = (*fn)(handle, inode, bh);
96962306a36Sopenharmony_ci		if (!ret)
97062306a36Sopenharmony_ci			ret = err;
97162306a36Sopenharmony_ci	}
97262306a36Sopenharmony_ci	return ret;
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci/*
97662306a36Sopenharmony_ci * Helper for handling dirtying of journalled data. We also mark the folio as
97762306a36Sopenharmony_ci * dirty so that writeback code knows about this page (and inode) contains
97862306a36Sopenharmony_ci * dirty data. ext4_writepages() then commits appropriate transaction to
97962306a36Sopenharmony_ci * make data stable.
98062306a36Sopenharmony_ci */
98162306a36Sopenharmony_cistatic int ext4_dirty_journalled_data(handle_t *handle, struct buffer_head *bh)
98262306a36Sopenharmony_ci{
98362306a36Sopenharmony_ci	folio_mark_dirty(bh->b_folio);
98462306a36Sopenharmony_ci	return ext4_handle_dirty_metadata(handle, NULL, bh);
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_ciint do_journal_get_write_access(handle_t *handle, struct inode *inode,
98862306a36Sopenharmony_ci				struct buffer_head *bh)
98962306a36Sopenharmony_ci{
99062306a36Sopenharmony_ci	int dirty = buffer_dirty(bh);
99162306a36Sopenharmony_ci	int ret;
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_ci	if (!buffer_mapped(bh) || buffer_freed(bh))
99462306a36Sopenharmony_ci		return 0;
99562306a36Sopenharmony_ci	/*
99662306a36Sopenharmony_ci	 * __block_write_begin() could have dirtied some buffers. Clean
99762306a36Sopenharmony_ci	 * the dirty bit as jbd2_journal_get_write_access() could complain
99862306a36Sopenharmony_ci	 * otherwise about fs integrity issues. Setting of the dirty bit
99962306a36Sopenharmony_ci	 * by __block_write_begin() isn't a real problem here as we clear
100062306a36Sopenharmony_ci	 * the bit before releasing a page lock and thus writeback cannot
100162306a36Sopenharmony_ci	 * ever write the buffer.
100262306a36Sopenharmony_ci	 */
100362306a36Sopenharmony_ci	if (dirty)
100462306a36Sopenharmony_ci		clear_buffer_dirty(bh);
100562306a36Sopenharmony_ci	BUFFER_TRACE(bh, "get write access");
100662306a36Sopenharmony_ci	ret = ext4_journal_get_write_access(handle, inode->i_sb, bh,
100762306a36Sopenharmony_ci					    EXT4_JTR_NONE);
100862306a36Sopenharmony_ci	if (!ret && dirty)
100962306a36Sopenharmony_ci		ret = ext4_dirty_journalled_data(handle, bh);
101062306a36Sopenharmony_ci	return ret;
101162306a36Sopenharmony_ci}
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION
101462306a36Sopenharmony_cistatic int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len,
101562306a36Sopenharmony_ci				  get_block_t *get_block)
101662306a36Sopenharmony_ci{
101762306a36Sopenharmony_ci	unsigned from = pos & (PAGE_SIZE - 1);
101862306a36Sopenharmony_ci	unsigned to = from + len;
101962306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
102062306a36Sopenharmony_ci	unsigned block_start, block_end;
102162306a36Sopenharmony_ci	sector_t block;
102262306a36Sopenharmony_ci	int err = 0;
102362306a36Sopenharmony_ci	unsigned blocksize = inode->i_sb->s_blocksize;
102462306a36Sopenharmony_ci	unsigned bbits;
102562306a36Sopenharmony_ci	struct buffer_head *bh, *head, *wait[2];
102662306a36Sopenharmony_ci	int nr_wait = 0;
102762306a36Sopenharmony_ci	int i;
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	BUG_ON(!folio_test_locked(folio));
103062306a36Sopenharmony_ci	BUG_ON(from > PAGE_SIZE);
103162306a36Sopenharmony_ci	BUG_ON(to > PAGE_SIZE);
103262306a36Sopenharmony_ci	BUG_ON(from > to);
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	head = folio_buffers(folio);
103562306a36Sopenharmony_ci	if (!head) {
103662306a36Sopenharmony_ci		create_empty_buffers(&folio->page, blocksize, 0);
103762306a36Sopenharmony_ci		head = folio_buffers(folio);
103862306a36Sopenharmony_ci	}
103962306a36Sopenharmony_ci	bbits = ilog2(blocksize);
104062306a36Sopenharmony_ci	block = (sector_t)folio->index << (PAGE_SHIFT - bbits);
104162306a36Sopenharmony_ci
104262306a36Sopenharmony_ci	for (bh = head, block_start = 0; bh != head || !block_start;
104362306a36Sopenharmony_ci	    block++, block_start = block_end, bh = bh->b_this_page) {
104462306a36Sopenharmony_ci		block_end = block_start + blocksize;
104562306a36Sopenharmony_ci		if (block_end <= from || block_start >= to) {
104662306a36Sopenharmony_ci			if (folio_test_uptodate(folio)) {
104762306a36Sopenharmony_ci				set_buffer_uptodate(bh);
104862306a36Sopenharmony_ci			}
104962306a36Sopenharmony_ci			continue;
105062306a36Sopenharmony_ci		}
105162306a36Sopenharmony_ci		if (buffer_new(bh))
105262306a36Sopenharmony_ci			clear_buffer_new(bh);
105362306a36Sopenharmony_ci		if (!buffer_mapped(bh)) {
105462306a36Sopenharmony_ci			WARN_ON(bh->b_size != blocksize);
105562306a36Sopenharmony_ci			err = get_block(inode, block, bh, 1);
105662306a36Sopenharmony_ci			if (err)
105762306a36Sopenharmony_ci				break;
105862306a36Sopenharmony_ci			if (buffer_new(bh)) {
105962306a36Sopenharmony_ci				if (folio_test_uptodate(folio)) {
106062306a36Sopenharmony_ci					clear_buffer_new(bh);
106162306a36Sopenharmony_ci					set_buffer_uptodate(bh);
106262306a36Sopenharmony_ci					mark_buffer_dirty(bh);
106362306a36Sopenharmony_ci					continue;
106462306a36Sopenharmony_ci				}
106562306a36Sopenharmony_ci				if (block_end > to || block_start < from)
106662306a36Sopenharmony_ci					folio_zero_segments(folio, to,
106762306a36Sopenharmony_ci							    block_end,
106862306a36Sopenharmony_ci							    block_start, from);
106962306a36Sopenharmony_ci				continue;
107062306a36Sopenharmony_ci			}
107162306a36Sopenharmony_ci		}
107262306a36Sopenharmony_ci		if (folio_test_uptodate(folio)) {
107362306a36Sopenharmony_ci			set_buffer_uptodate(bh);
107462306a36Sopenharmony_ci			continue;
107562306a36Sopenharmony_ci		}
107662306a36Sopenharmony_ci		if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
107762306a36Sopenharmony_ci		    !buffer_unwritten(bh) &&
107862306a36Sopenharmony_ci		    (block_start < from || block_end > to)) {
107962306a36Sopenharmony_ci			ext4_read_bh_lock(bh, 0, false);
108062306a36Sopenharmony_ci			wait[nr_wait++] = bh;
108162306a36Sopenharmony_ci		}
108262306a36Sopenharmony_ci	}
108362306a36Sopenharmony_ci	/*
108462306a36Sopenharmony_ci	 * If we issued read requests, let them complete.
108562306a36Sopenharmony_ci	 */
108662306a36Sopenharmony_ci	for (i = 0; i < nr_wait; i++) {
108762306a36Sopenharmony_ci		wait_on_buffer(wait[i]);
108862306a36Sopenharmony_ci		if (!buffer_uptodate(wait[i]))
108962306a36Sopenharmony_ci			err = -EIO;
109062306a36Sopenharmony_ci	}
109162306a36Sopenharmony_ci	if (unlikely(err)) {
109262306a36Sopenharmony_ci		folio_zero_new_buffers(folio, from, to);
109362306a36Sopenharmony_ci	} else if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
109462306a36Sopenharmony_ci		for (i = 0; i < nr_wait; i++) {
109562306a36Sopenharmony_ci			int err2;
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_ci			err2 = fscrypt_decrypt_pagecache_blocks(folio,
109862306a36Sopenharmony_ci						blocksize, bh_offset(wait[i]));
109962306a36Sopenharmony_ci			if (err2) {
110062306a36Sopenharmony_ci				clear_buffer_uptodate(wait[i]);
110162306a36Sopenharmony_ci				err = err2;
110262306a36Sopenharmony_ci			}
110362306a36Sopenharmony_ci		}
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	return err;
110762306a36Sopenharmony_ci}
110862306a36Sopenharmony_ci#endif
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci/*
111162306a36Sopenharmony_ci * To preserve ordering, it is essential that the hole instantiation and
111262306a36Sopenharmony_ci * the data write be encapsulated in a single transaction.  We cannot
111362306a36Sopenharmony_ci * close off a transaction and start a new one between the ext4_get_block()
111462306a36Sopenharmony_ci * and the ext4_write_end().  So doing the jbd2_journal_start at the start of
111562306a36Sopenharmony_ci * ext4_write_begin() is the right place.
111662306a36Sopenharmony_ci */
111762306a36Sopenharmony_cistatic int ext4_write_begin(struct file *file, struct address_space *mapping,
111862306a36Sopenharmony_ci			    loff_t pos, unsigned len,
111962306a36Sopenharmony_ci			    struct page **pagep, void **fsdata)
112062306a36Sopenharmony_ci{
112162306a36Sopenharmony_ci	struct inode *inode = mapping->host;
112262306a36Sopenharmony_ci	int ret, needed_blocks;
112362306a36Sopenharmony_ci	handle_t *handle;
112462306a36Sopenharmony_ci	int retries = 0;
112562306a36Sopenharmony_ci	struct folio *folio;
112662306a36Sopenharmony_ci	pgoff_t index;
112762306a36Sopenharmony_ci	unsigned from, to;
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
113062306a36Sopenharmony_ci		return -EIO;
113162306a36Sopenharmony_ci
113262306a36Sopenharmony_ci	trace_ext4_write_begin(inode, pos, len);
113362306a36Sopenharmony_ci	/*
113462306a36Sopenharmony_ci	 * Reserve one block more for addition to orphan list in case
113562306a36Sopenharmony_ci	 * we allocate blocks but write fails for some reason
113662306a36Sopenharmony_ci	 */
113762306a36Sopenharmony_ci	needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
113862306a36Sopenharmony_ci	index = pos >> PAGE_SHIFT;
113962306a36Sopenharmony_ci	from = pos & (PAGE_SIZE - 1);
114062306a36Sopenharmony_ci	to = from + len;
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
114362306a36Sopenharmony_ci		ret = ext4_try_to_write_inline_data(mapping, inode, pos, len,
114462306a36Sopenharmony_ci						    pagep);
114562306a36Sopenharmony_ci		if (ret < 0)
114662306a36Sopenharmony_ci			return ret;
114762306a36Sopenharmony_ci		if (ret == 1)
114862306a36Sopenharmony_ci			return 0;
114962306a36Sopenharmony_ci	}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci	/*
115262306a36Sopenharmony_ci	 * __filemap_get_folio() can take a long time if the
115362306a36Sopenharmony_ci	 * system is thrashing due to memory pressure, or if the folio
115462306a36Sopenharmony_ci	 * is being written back.  So grab it first before we start
115562306a36Sopenharmony_ci	 * the transaction handle.  This also allows us to allocate
115662306a36Sopenharmony_ci	 * the folio (if needed) without using GFP_NOFS.
115762306a36Sopenharmony_ci	 */
115862306a36Sopenharmony_ciretry_grab:
115962306a36Sopenharmony_ci	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
116062306a36Sopenharmony_ci					mapping_gfp_mask(mapping));
116162306a36Sopenharmony_ci	if (IS_ERR(folio))
116262306a36Sopenharmony_ci		return PTR_ERR(folio);
116362306a36Sopenharmony_ci	/*
116462306a36Sopenharmony_ci	 * The same as page allocation, we prealloc buffer heads before
116562306a36Sopenharmony_ci	 * starting the handle.
116662306a36Sopenharmony_ci	 */
116762306a36Sopenharmony_ci	if (!folio_buffers(folio))
116862306a36Sopenharmony_ci		create_empty_buffers(&folio->page, inode->i_sb->s_blocksize, 0);
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	folio_unlock(folio);
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ciretry_journal:
117362306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
117462306a36Sopenharmony_ci	if (IS_ERR(handle)) {
117562306a36Sopenharmony_ci		folio_put(folio);
117662306a36Sopenharmony_ci		return PTR_ERR(handle);
117762306a36Sopenharmony_ci	}
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci	folio_lock(folio);
118062306a36Sopenharmony_ci	if (folio->mapping != mapping) {
118162306a36Sopenharmony_ci		/* The folio got truncated from under us */
118262306a36Sopenharmony_ci		folio_unlock(folio);
118362306a36Sopenharmony_ci		folio_put(folio);
118462306a36Sopenharmony_ci		ext4_journal_stop(handle);
118562306a36Sopenharmony_ci		goto retry_grab;
118662306a36Sopenharmony_ci	}
118762306a36Sopenharmony_ci	/* In case writeback began while the folio was unlocked */
118862306a36Sopenharmony_ci	folio_wait_stable(folio);
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION
119162306a36Sopenharmony_ci	if (ext4_should_dioread_nolock(inode))
119262306a36Sopenharmony_ci		ret = ext4_block_write_begin(folio, pos, len,
119362306a36Sopenharmony_ci					     ext4_get_block_unwritten);
119462306a36Sopenharmony_ci	else
119562306a36Sopenharmony_ci		ret = ext4_block_write_begin(folio, pos, len, ext4_get_block);
119662306a36Sopenharmony_ci#else
119762306a36Sopenharmony_ci	if (ext4_should_dioread_nolock(inode))
119862306a36Sopenharmony_ci		ret = __block_write_begin(&folio->page, pos, len,
119962306a36Sopenharmony_ci					  ext4_get_block_unwritten);
120062306a36Sopenharmony_ci	else
120162306a36Sopenharmony_ci		ret = __block_write_begin(&folio->page, pos, len, ext4_get_block);
120262306a36Sopenharmony_ci#endif
120362306a36Sopenharmony_ci	if (!ret && ext4_should_journal_data(inode)) {
120462306a36Sopenharmony_ci		ret = ext4_walk_page_buffers(handle, inode,
120562306a36Sopenharmony_ci					     folio_buffers(folio), from, to,
120662306a36Sopenharmony_ci					     NULL, do_journal_get_write_access);
120762306a36Sopenharmony_ci	}
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	if (ret) {
121062306a36Sopenharmony_ci		bool extended = (pos + len > inode->i_size) &&
121162306a36Sopenharmony_ci				!ext4_verity_in_progress(inode);
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_ci		folio_unlock(folio);
121462306a36Sopenharmony_ci		/*
121562306a36Sopenharmony_ci		 * __block_write_begin may have instantiated a few blocks
121662306a36Sopenharmony_ci		 * outside i_size.  Trim these off again. Don't need
121762306a36Sopenharmony_ci		 * i_size_read because we hold i_rwsem.
121862306a36Sopenharmony_ci		 *
121962306a36Sopenharmony_ci		 * Add inode to orphan list in case we crash before
122062306a36Sopenharmony_ci		 * truncate finishes
122162306a36Sopenharmony_ci		 */
122262306a36Sopenharmony_ci		if (extended && ext4_can_truncate(inode))
122362306a36Sopenharmony_ci			ext4_orphan_add(handle, inode);
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci		ext4_journal_stop(handle);
122662306a36Sopenharmony_ci		if (extended) {
122762306a36Sopenharmony_ci			ext4_truncate_failed_write(inode);
122862306a36Sopenharmony_ci			/*
122962306a36Sopenharmony_ci			 * If truncate failed early the inode might
123062306a36Sopenharmony_ci			 * still be on the orphan list; we need to
123162306a36Sopenharmony_ci			 * make sure the inode is removed from the
123262306a36Sopenharmony_ci			 * orphan list in that case.
123362306a36Sopenharmony_ci			 */
123462306a36Sopenharmony_ci			if (inode->i_nlink)
123562306a36Sopenharmony_ci				ext4_orphan_del(NULL, inode);
123662306a36Sopenharmony_ci		}
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci		if (ret == -ENOSPC &&
123962306a36Sopenharmony_ci		    ext4_should_retry_alloc(inode->i_sb, &retries))
124062306a36Sopenharmony_ci			goto retry_journal;
124162306a36Sopenharmony_ci		folio_put(folio);
124262306a36Sopenharmony_ci		return ret;
124362306a36Sopenharmony_ci	}
124462306a36Sopenharmony_ci	*pagep = &folio->page;
124562306a36Sopenharmony_ci	return ret;
124662306a36Sopenharmony_ci}
124762306a36Sopenharmony_ci
124862306a36Sopenharmony_ci/* For write_end() in data=journal mode */
124962306a36Sopenharmony_cistatic int write_end_fn(handle_t *handle, struct inode *inode,
125062306a36Sopenharmony_ci			struct buffer_head *bh)
125162306a36Sopenharmony_ci{
125262306a36Sopenharmony_ci	int ret;
125362306a36Sopenharmony_ci	if (!buffer_mapped(bh) || buffer_freed(bh))
125462306a36Sopenharmony_ci		return 0;
125562306a36Sopenharmony_ci	set_buffer_uptodate(bh);
125662306a36Sopenharmony_ci	ret = ext4_dirty_journalled_data(handle, bh);
125762306a36Sopenharmony_ci	clear_buffer_meta(bh);
125862306a36Sopenharmony_ci	clear_buffer_prio(bh);
125962306a36Sopenharmony_ci	return ret;
126062306a36Sopenharmony_ci}
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci/*
126362306a36Sopenharmony_ci * We need to pick up the new inode size which generic_commit_write gave us
126462306a36Sopenharmony_ci * `file' can be NULL - eg, when called from page_symlink().
126562306a36Sopenharmony_ci *
126662306a36Sopenharmony_ci * ext4 never places buffers on inode->i_mapping->private_list.  metadata
126762306a36Sopenharmony_ci * buffers are managed internally.
126862306a36Sopenharmony_ci */
126962306a36Sopenharmony_cistatic int ext4_write_end(struct file *file,
127062306a36Sopenharmony_ci			  struct address_space *mapping,
127162306a36Sopenharmony_ci			  loff_t pos, unsigned len, unsigned copied,
127262306a36Sopenharmony_ci			  struct page *page, void *fsdata)
127362306a36Sopenharmony_ci{
127462306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
127562306a36Sopenharmony_ci	handle_t *handle = ext4_journal_current_handle();
127662306a36Sopenharmony_ci	struct inode *inode = mapping->host;
127762306a36Sopenharmony_ci	loff_t old_size = inode->i_size;
127862306a36Sopenharmony_ci	int ret = 0, ret2;
127962306a36Sopenharmony_ci	int i_size_changed = 0;
128062306a36Sopenharmony_ci	bool verity = ext4_verity_in_progress(inode);
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci	trace_ext4_write_end(inode, pos, len, copied);
128362306a36Sopenharmony_ci
128462306a36Sopenharmony_ci	if (ext4_has_inline_data(inode) &&
128562306a36Sopenharmony_ci	    ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
128662306a36Sopenharmony_ci		return ext4_write_inline_data_end(inode, pos, len, copied,
128762306a36Sopenharmony_ci						  folio);
128862306a36Sopenharmony_ci
128962306a36Sopenharmony_ci	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
129062306a36Sopenharmony_ci	/*
129162306a36Sopenharmony_ci	 * it's important to update i_size while still holding folio lock:
129262306a36Sopenharmony_ci	 * page writeout could otherwise come in and zero beyond i_size.
129362306a36Sopenharmony_ci	 *
129462306a36Sopenharmony_ci	 * If FS_IOC_ENABLE_VERITY is running on this inode, then Merkle tree
129562306a36Sopenharmony_ci	 * blocks are being written past EOF, so skip the i_size update.
129662306a36Sopenharmony_ci	 */
129762306a36Sopenharmony_ci	if (!verity)
129862306a36Sopenharmony_ci		i_size_changed = ext4_update_inode_size(inode, pos + copied);
129962306a36Sopenharmony_ci	folio_unlock(folio);
130062306a36Sopenharmony_ci	folio_put(folio);
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	if (old_size < pos && !verity)
130362306a36Sopenharmony_ci		pagecache_isize_extended(inode, old_size, pos);
130462306a36Sopenharmony_ci	/*
130562306a36Sopenharmony_ci	 * Don't mark the inode dirty under folio lock. First, it unnecessarily
130662306a36Sopenharmony_ci	 * makes the holding time of folio lock longer. Second, it forces lock
130762306a36Sopenharmony_ci	 * ordering of folio lock and transaction start for journaling
130862306a36Sopenharmony_ci	 * filesystems.
130962306a36Sopenharmony_ci	 */
131062306a36Sopenharmony_ci	if (i_size_changed)
131162306a36Sopenharmony_ci		ret = ext4_mark_inode_dirty(handle, inode);
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
131462306a36Sopenharmony_ci		/* if we have allocated more blocks and copied
131562306a36Sopenharmony_ci		 * less. We will have blocks allocated outside
131662306a36Sopenharmony_ci		 * inode->i_size. So truncate them
131762306a36Sopenharmony_ci		 */
131862306a36Sopenharmony_ci		ext4_orphan_add(handle, inode);
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci	ret2 = ext4_journal_stop(handle);
132162306a36Sopenharmony_ci	if (!ret)
132262306a36Sopenharmony_ci		ret = ret2;
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci	if (pos + len > inode->i_size && !verity) {
132562306a36Sopenharmony_ci		ext4_truncate_failed_write(inode);
132662306a36Sopenharmony_ci		/*
132762306a36Sopenharmony_ci		 * If truncate failed early the inode might still be
132862306a36Sopenharmony_ci		 * on the orphan list; we need to make sure the inode
132962306a36Sopenharmony_ci		 * is removed from the orphan list in that case.
133062306a36Sopenharmony_ci		 */
133162306a36Sopenharmony_ci		if (inode->i_nlink)
133262306a36Sopenharmony_ci			ext4_orphan_del(NULL, inode);
133362306a36Sopenharmony_ci	}
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	return ret ? ret : copied;
133662306a36Sopenharmony_ci}
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci/*
133962306a36Sopenharmony_ci * This is a private version of folio_zero_new_buffers() which doesn't
134062306a36Sopenharmony_ci * set the buffer to be dirty, since in data=journalled mode we need
134162306a36Sopenharmony_ci * to call ext4_dirty_journalled_data() instead.
134262306a36Sopenharmony_ci */
134362306a36Sopenharmony_cistatic void ext4_journalled_zero_new_buffers(handle_t *handle,
134462306a36Sopenharmony_ci					    struct inode *inode,
134562306a36Sopenharmony_ci					    struct folio *folio,
134662306a36Sopenharmony_ci					    unsigned from, unsigned to)
134762306a36Sopenharmony_ci{
134862306a36Sopenharmony_ci	unsigned int block_start = 0, block_end;
134962306a36Sopenharmony_ci	struct buffer_head *head, *bh;
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	bh = head = folio_buffers(folio);
135262306a36Sopenharmony_ci	do {
135362306a36Sopenharmony_ci		block_end = block_start + bh->b_size;
135462306a36Sopenharmony_ci		if (buffer_new(bh)) {
135562306a36Sopenharmony_ci			if (block_end > from && block_start < to) {
135662306a36Sopenharmony_ci				if (!folio_test_uptodate(folio)) {
135762306a36Sopenharmony_ci					unsigned start, size;
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci					start = max(from, block_start);
136062306a36Sopenharmony_ci					size = min(to, block_end) - start;
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci					folio_zero_range(folio, start, size);
136362306a36Sopenharmony_ci					write_end_fn(handle, inode, bh);
136462306a36Sopenharmony_ci				}
136562306a36Sopenharmony_ci				clear_buffer_new(bh);
136662306a36Sopenharmony_ci			}
136762306a36Sopenharmony_ci		}
136862306a36Sopenharmony_ci		block_start = block_end;
136962306a36Sopenharmony_ci		bh = bh->b_this_page;
137062306a36Sopenharmony_ci	} while (bh != head);
137162306a36Sopenharmony_ci}
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_cistatic int ext4_journalled_write_end(struct file *file,
137462306a36Sopenharmony_ci				     struct address_space *mapping,
137562306a36Sopenharmony_ci				     loff_t pos, unsigned len, unsigned copied,
137662306a36Sopenharmony_ci				     struct page *page, void *fsdata)
137762306a36Sopenharmony_ci{
137862306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
137962306a36Sopenharmony_ci	handle_t *handle = ext4_journal_current_handle();
138062306a36Sopenharmony_ci	struct inode *inode = mapping->host;
138162306a36Sopenharmony_ci	loff_t old_size = inode->i_size;
138262306a36Sopenharmony_ci	int ret = 0, ret2;
138362306a36Sopenharmony_ci	int partial = 0;
138462306a36Sopenharmony_ci	unsigned from, to;
138562306a36Sopenharmony_ci	int size_changed = 0;
138662306a36Sopenharmony_ci	bool verity = ext4_verity_in_progress(inode);
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci	trace_ext4_journalled_write_end(inode, pos, len, copied);
138962306a36Sopenharmony_ci	from = pos & (PAGE_SIZE - 1);
139062306a36Sopenharmony_ci	to = from + len;
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	BUG_ON(!ext4_handle_valid(handle));
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
139562306a36Sopenharmony_ci		return ext4_write_inline_data_end(inode, pos, len, copied,
139662306a36Sopenharmony_ci						  folio);
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci	if (unlikely(copied < len) && !folio_test_uptodate(folio)) {
139962306a36Sopenharmony_ci		copied = 0;
140062306a36Sopenharmony_ci		ext4_journalled_zero_new_buffers(handle, inode, folio,
140162306a36Sopenharmony_ci						 from, to);
140262306a36Sopenharmony_ci	} else {
140362306a36Sopenharmony_ci		if (unlikely(copied < len))
140462306a36Sopenharmony_ci			ext4_journalled_zero_new_buffers(handle, inode, folio,
140562306a36Sopenharmony_ci							 from + copied, to);
140662306a36Sopenharmony_ci		ret = ext4_walk_page_buffers(handle, inode,
140762306a36Sopenharmony_ci					     folio_buffers(folio),
140862306a36Sopenharmony_ci					     from, from + copied, &partial,
140962306a36Sopenharmony_ci					     write_end_fn);
141062306a36Sopenharmony_ci		if (!partial)
141162306a36Sopenharmony_ci			folio_mark_uptodate(folio);
141262306a36Sopenharmony_ci	}
141362306a36Sopenharmony_ci	if (!verity)
141462306a36Sopenharmony_ci		size_changed = ext4_update_inode_size(inode, pos + copied);
141562306a36Sopenharmony_ci	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
141662306a36Sopenharmony_ci	folio_unlock(folio);
141762306a36Sopenharmony_ci	folio_put(folio);
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci	if (old_size < pos && !verity)
142062306a36Sopenharmony_ci		pagecache_isize_extended(inode, old_size, pos);
142162306a36Sopenharmony_ci
142262306a36Sopenharmony_ci	if (size_changed) {
142362306a36Sopenharmony_ci		ret2 = ext4_mark_inode_dirty(handle, inode);
142462306a36Sopenharmony_ci		if (!ret)
142562306a36Sopenharmony_ci			ret = ret2;
142662306a36Sopenharmony_ci	}
142762306a36Sopenharmony_ci
142862306a36Sopenharmony_ci	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode))
142962306a36Sopenharmony_ci		/* if we have allocated more blocks and copied
143062306a36Sopenharmony_ci		 * less. We will have blocks allocated outside
143162306a36Sopenharmony_ci		 * inode->i_size. So truncate them
143262306a36Sopenharmony_ci		 */
143362306a36Sopenharmony_ci		ext4_orphan_add(handle, inode);
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci	ret2 = ext4_journal_stop(handle);
143662306a36Sopenharmony_ci	if (!ret)
143762306a36Sopenharmony_ci		ret = ret2;
143862306a36Sopenharmony_ci	if (pos + len > inode->i_size && !verity) {
143962306a36Sopenharmony_ci		ext4_truncate_failed_write(inode);
144062306a36Sopenharmony_ci		/*
144162306a36Sopenharmony_ci		 * If truncate failed early the inode might still be
144262306a36Sopenharmony_ci		 * on the orphan list; we need to make sure the inode
144362306a36Sopenharmony_ci		 * is removed from the orphan list in that case.
144462306a36Sopenharmony_ci		 */
144562306a36Sopenharmony_ci		if (inode->i_nlink)
144662306a36Sopenharmony_ci			ext4_orphan_del(NULL, inode);
144762306a36Sopenharmony_ci	}
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_ci	return ret ? ret : copied;
145062306a36Sopenharmony_ci}
145162306a36Sopenharmony_ci
145262306a36Sopenharmony_ci/*
145362306a36Sopenharmony_ci * Reserve space for a single cluster
145462306a36Sopenharmony_ci */
145562306a36Sopenharmony_cistatic int ext4_da_reserve_space(struct inode *inode)
145662306a36Sopenharmony_ci{
145762306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
145862306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
145962306a36Sopenharmony_ci	int ret;
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	/*
146262306a36Sopenharmony_ci	 * We will charge metadata quota at writeout time; this saves
146362306a36Sopenharmony_ci	 * us from metadata over-estimation, though we may go over by
146462306a36Sopenharmony_ci	 * a small amount in the end.  Here we just reserve for data.
146562306a36Sopenharmony_ci	 */
146662306a36Sopenharmony_ci	ret = dquot_reserve_block(inode, EXT4_C2B(sbi, 1));
146762306a36Sopenharmony_ci	if (ret)
146862306a36Sopenharmony_ci		return ret;
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_ci	spin_lock(&ei->i_block_reservation_lock);
147162306a36Sopenharmony_ci	if (ext4_claim_free_clusters(sbi, 1, 0)) {
147262306a36Sopenharmony_ci		spin_unlock(&ei->i_block_reservation_lock);
147362306a36Sopenharmony_ci		dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
147462306a36Sopenharmony_ci		return -ENOSPC;
147562306a36Sopenharmony_ci	}
147662306a36Sopenharmony_ci	ei->i_reserved_data_blocks++;
147762306a36Sopenharmony_ci	trace_ext4_da_reserve_space(inode);
147862306a36Sopenharmony_ci	spin_unlock(&ei->i_block_reservation_lock);
147962306a36Sopenharmony_ci
148062306a36Sopenharmony_ci	return 0;       /* success */
148162306a36Sopenharmony_ci}
148262306a36Sopenharmony_ci
148362306a36Sopenharmony_civoid ext4_da_release_space(struct inode *inode, int to_free)
148462306a36Sopenharmony_ci{
148562306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
148662306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
148762306a36Sopenharmony_ci
148862306a36Sopenharmony_ci	if (!to_free)
148962306a36Sopenharmony_ci		return;		/* Nothing to release, exit */
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
149262306a36Sopenharmony_ci
149362306a36Sopenharmony_ci	trace_ext4_da_release_space(inode, to_free);
149462306a36Sopenharmony_ci	if (unlikely(to_free > ei->i_reserved_data_blocks)) {
149562306a36Sopenharmony_ci		/*
149662306a36Sopenharmony_ci		 * if there aren't enough reserved blocks, then the
149762306a36Sopenharmony_ci		 * counter is messed up somewhere.  Since this
149862306a36Sopenharmony_ci		 * function is called from invalidate page, it's
149962306a36Sopenharmony_ci		 * harmless to return without any action.
150062306a36Sopenharmony_ci		 */
150162306a36Sopenharmony_ci		ext4_warning(inode->i_sb, "ext4_da_release_space: "
150262306a36Sopenharmony_ci			 "ino %lu, to_free %d with only %d reserved "
150362306a36Sopenharmony_ci			 "data blocks", inode->i_ino, to_free,
150462306a36Sopenharmony_ci			 ei->i_reserved_data_blocks);
150562306a36Sopenharmony_ci		WARN_ON(1);
150662306a36Sopenharmony_ci		to_free = ei->i_reserved_data_blocks;
150762306a36Sopenharmony_ci	}
150862306a36Sopenharmony_ci	ei->i_reserved_data_blocks -= to_free;
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ci	/* update fs dirty data blocks counter */
151162306a36Sopenharmony_ci	percpu_counter_sub(&sbi->s_dirtyclusters_counter, to_free);
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ci	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	dquot_release_reservation_block(inode, EXT4_C2B(sbi, to_free));
151662306a36Sopenharmony_ci}
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci/*
151962306a36Sopenharmony_ci * Delayed allocation stuff
152062306a36Sopenharmony_ci */
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_cistruct mpage_da_data {
152362306a36Sopenharmony_ci	/* These are input fields for ext4_do_writepages() */
152462306a36Sopenharmony_ci	struct inode *inode;
152562306a36Sopenharmony_ci	struct writeback_control *wbc;
152662306a36Sopenharmony_ci	unsigned int can_map:1;	/* Can writepages call map blocks? */
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ci	/* These are internal state of ext4_do_writepages() */
152962306a36Sopenharmony_ci	pgoff_t first_page;	/* The first page to write */
153062306a36Sopenharmony_ci	pgoff_t next_page;	/* Current page to examine */
153162306a36Sopenharmony_ci	pgoff_t last_page;	/* Last page to examine */
153262306a36Sopenharmony_ci	/*
153362306a36Sopenharmony_ci	 * Extent to map - this can be after first_page because that can be
153462306a36Sopenharmony_ci	 * fully mapped. We somewhat abuse m_flags to store whether the extent
153562306a36Sopenharmony_ci	 * is delalloc or unwritten.
153662306a36Sopenharmony_ci	 */
153762306a36Sopenharmony_ci	struct ext4_map_blocks map;
153862306a36Sopenharmony_ci	struct ext4_io_submit io_submit;	/* IO submission data */
153962306a36Sopenharmony_ci	unsigned int do_map:1;
154062306a36Sopenharmony_ci	unsigned int scanned_until_end:1;
154162306a36Sopenharmony_ci	unsigned int journalled_more_data:1;
154262306a36Sopenharmony_ci};
154362306a36Sopenharmony_ci
154462306a36Sopenharmony_cistatic void mpage_release_unused_pages(struct mpage_da_data *mpd,
154562306a36Sopenharmony_ci				       bool invalidate)
154662306a36Sopenharmony_ci{
154762306a36Sopenharmony_ci	unsigned nr, i;
154862306a36Sopenharmony_ci	pgoff_t index, end;
154962306a36Sopenharmony_ci	struct folio_batch fbatch;
155062306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
155162306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	/* This is necessary when next_page == 0. */
155462306a36Sopenharmony_ci	if (mpd->first_page >= mpd->next_page)
155562306a36Sopenharmony_ci		return;
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ci	mpd->scanned_until_end = 0;
155862306a36Sopenharmony_ci	index = mpd->first_page;
155962306a36Sopenharmony_ci	end   = mpd->next_page - 1;
156062306a36Sopenharmony_ci	if (invalidate) {
156162306a36Sopenharmony_ci		ext4_lblk_t start, last;
156262306a36Sopenharmony_ci		start = index << (PAGE_SHIFT - inode->i_blkbits);
156362306a36Sopenharmony_ci		last = end << (PAGE_SHIFT - inode->i_blkbits);
156462306a36Sopenharmony_ci
156562306a36Sopenharmony_ci		/*
156662306a36Sopenharmony_ci		 * avoid racing with extent status tree scans made by
156762306a36Sopenharmony_ci		 * ext4_insert_delayed_block()
156862306a36Sopenharmony_ci		 */
156962306a36Sopenharmony_ci		down_write(&EXT4_I(inode)->i_data_sem);
157062306a36Sopenharmony_ci		ext4_es_remove_extent(inode, start, last - start + 1);
157162306a36Sopenharmony_ci		up_write(&EXT4_I(inode)->i_data_sem);
157262306a36Sopenharmony_ci	}
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ci	folio_batch_init(&fbatch);
157562306a36Sopenharmony_ci	while (index <= end) {
157662306a36Sopenharmony_ci		nr = filemap_get_folios(mapping, &index, end, &fbatch);
157762306a36Sopenharmony_ci		if (nr == 0)
157862306a36Sopenharmony_ci			break;
157962306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
158062306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_ci			if (folio->index < mpd->first_page)
158362306a36Sopenharmony_ci				continue;
158462306a36Sopenharmony_ci			if (folio_next_index(folio) - 1 > end)
158562306a36Sopenharmony_ci				continue;
158662306a36Sopenharmony_ci			BUG_ON(!folio_test_locked(folio));
158762306a36Sopenharmony_ci			BUG_ON(folio_test_writeback(folio));
158862306a36Sopenharmony_ci			if (invalidate) {
158962306a36Sopenharmony_ci				if (folio_mapped(folio))
159062306a36Sopenharmony_ci					folio_clear_dirty_for_io(folio);
159162306a36Sopenharmony_ci				block_invalidate_folio(folio, 0,
159262306a36Sopenharmony_ci						folio_size(folio));
159362306a36Sopenharmony_ci				folio_clear_uptodate(folio);
159462306a36Sopenharmony_ci			}
159562306a36Sopenharmony_ci			folio_unlock(folio);
159662306a36Sopenharmony_ci		}
159762306a36Sopenharmony_ci		folio_batch_release(&fbatch);
159862306a36Sopenharmony_ci	}
159962306a36Sopenharmony_ci}
160062306a36Sopenharmony_ci
160162306a36Sopenharmony_cistatic void ext4_print_free_blocks(struct inode *inode)
160262306a36Sopenharmony_ci{
160362306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
160462306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
160562306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
160662306a36Sopenharmony_ci
160762306a36Sopenharmony_ci	ext4_msg(sb, KERN_CRIT, "Total free blocks count %lld",
160862306a36Sopenharmony_ci	       EXT4_C2B(EXT4_SB(inode->i_sb),
160962306a36Sopenharmony_ci			ext4_count_free_clusters(sb)));
161062306a36Sopenharmony_ci	ext4_msg(sb, KERN_CRIT, "Free/Dirty block details");
161162306a36Sopenharmony_ci	ext4_msg(sb, KERN_CRIT, "free_blocks=%lld",
161262306a36Sopenharmony_ci	       (long long) EXT4_C2B(EXT4_SB(sb),
161362306a36Sopenharmony_ci		percpu_counter_sum(&sbi->s_freeclusters_counter)));
161462306a36Sopenharmony_ci	ext4_msg(sb, KERN_CRIT, "dirty_blocks=%lld",
161562306a36Sopenharmony_ci	       (long long) EXT4_C2B(EXT4_SB(sb),
161662306a36Sopenharmony_ci		percpu_counter_sum(&sbi->s_dirtyclusters_counter)));
161762306a36Sopenharmony_ci	ext4_msg(sb, KERN_CRIT, "Block reservation details");
161862306a36Sopenharmony_ci	ext4_msg(sb, KERN_CRIT, "i_reserved_data_blocks=%u",
161962306a36Sopenharmony_ci		 ei->i_reserved_data_blocks);
162062306a36Sopenharmony_ci	return;
162162306a36Sopenharmony_ci}
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci/*
162462306a36Sopenharmony_ci * ext4_insert_delayed_block - adds a delayed block to the extents status
162562306a36Sopenharmony_ci *                             tree, incrementing the reserved cluster/block
162662306a36Sopenharmony_ci *                             count or making a pending reservation
162762306a36Sopenharmony_ci *                             where needed
162862306a36Sopenharmony_ci *
162962306a36Sopenharmony_ci * @inode - file containing the newly added block
163062306a36Sopenharmony_ci * @lblk - logical block to be added
163162306a36Sopenharmony_ci *
163262306a36Sopenharmony_ci * Returns 0 on success, negative error code on failure.
163362306a36Sopenharmony_ci */
163462306a36Sopenharmony_cistatic int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)
163562306a36Sopenharmony_ci{
163662306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
163762306a36Sopenharmony_ci	int ret;
163862306a36Sopenharmony_ci	bool allocated = false;
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_ci	/*
164162306a36Sopenharmony_ci	 * If the cluster containing lblk is shared with a delayed,
164262306a36Sopenharmony_ci	 * written, or unwritten extent in a bigalloc file system, it's
164362306a36Sopenharmony_ci	 * already been accounted for and does not need to be reserved.
164462306a36Sopenharmony_ci	 * A pending reservation must be made for the cluster if it's
164562306a36Sopenharmony_ci	 * shared with a written or unwritten extent and doesn't already
164662306a36Sopenharmony_ci	 * have one.  Written and unwritten extents can be purged from the
164762306a36Sopenharmony_ci	 * extents status tree if the system is under memory pressure, so
164862306a36Sopenharmony_ci	 * it's necessary to examine the extent tree if a search of the
164962306a36Sopenharmony_ci	 * extents status tree doesn't get a match.
165062306a36Sopenharmony_ci	 */
165162306a36Sopenharmony_ci	if (sbi->s_cluster_ratio == 1) {
165262306a36Sopenharmony_ci		ret = ext4_da_reserve_space(inode);
165362306a36Sopenharmony_ci		if (ret != 0)   /* ENOSPC */
165462306a36Sopenharmony_ci			return ret;
165562306a36Sopenharmony_ci	} else {   /* bigalloc */
165662306a36Sopenharmony_ci		if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {
165762306a36Sopenharmony_ci			if (!ext4_es_scan_clu(inode,
165862306a36Sopenharmony_ci					      &ext4_es_is_mapped, lblk)) {
165962306a36Sopenharmony_ci				ret = ext4_clu_mapped(inode,
166062306a36Sopenharmony_ci						      EXT4_B2C(sbi, lblk));
166162306a36Sopenharmony_ci				if (ret < 0)
166262306a36Sopenharmony_ci					return ret;
166362306a36Sopenharmony_ci				if (ret == 0) {
166462306a36Sopenharmony_ci					ret = ext4_da_reserve_space(inode);
166562306a36Sopenharmony_ci					if (ret != 0)   /* ENOSPC */
166662306a36Sopenharmony_ci						return ret;
166762306a36Sopenharmony_ci				} else {
166862306a36Sopenharmony_ci					allocated = true;
166962306a36Sopenharmony_ci				}
167062306a36Sopenharmony_ci			} else {
167162306a36Sopenharmony_ci				allocated = true;
167262306a36Sopenharmony_ci			}
167362306a36Sopenharmony_ci		}
167462306a36Sopenharmony_ci	}
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci	ext4_es_insert_delayed_block(inode, lblk, allocated);
167762306a36Sopenharmony_ci	return 0;
167862306a36Sopenharmony_ci}
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci/*
168162306a36Sopenharmony_ci * This function is grabs code from the very beginning of
168262306a36Sopenharmony_ci * ext4_map_blocks, but assumes that the caller is from delayed write
168362306a36Sopenharmony_ci * time. This function looks up the requested blocks and sets the
168462306a36Sopenharmony_ci * buffer delay bit under the protection of i_data_sem.
168562306a36Sopenharmony_ci */
168662306a36Sopenharmony_cistatic int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
168762306a36Sopenharmony_ci			      struct ext4_map_blocks *map,
168862306a36Sopenharmony_ci			      struct buffer_head *bh)
168962306a36Sopenharmony_ci{
169062306a36Sopenharmony_ci	struct extent_status es;
169162306a36Sopenharmony_ci	int retval;
169262306a36Sopenharmony_ci	sector_t invalid_block = ~((sector_t) 0xffff);
169362306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST
169462306a36Sopenharmony_ci	struct ext4_map_blocks orig_map;
169562306a36Sopenharmony_ci
169662306a36Sopenharmony_ci	memcpy(&orig_map, map, sizeof(*map));
169762306a36Sopenharmony_ci#endif
169862306a36Sopenharmony_ci
169962306a36Sopenharmony_ci	if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
170062306a36Sopenharmony_ci		invalid_block = ~0;
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci	map->m_flags = 0;
170362306a36Sopenharmony_ci	ext_debug(inode, "max_blocks %u, logical block %lu\n", map->m_len,
170462306a36Sopenharmony_ci		  (unsigned long) map->m_lblk);
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	/* Lookup extent status tree firstly */
170762306a36Sopenharmony_ci	if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
170862306a36Sopenharmony_ci		if (ext4_es_is_hole(&es)) {
170962306a36Sopenharmony_ci			retval = 0;
171062306a36Sopenharmony_ci			down_read(&EXT4_I(inode)->i_data_sem);
171162306a36Sopenharmony_ci			goto add_delayed;
171262306a36Sopenharmony_ci		}
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci		/*
171562306a36Sopenharmony_ci		 * Delayed extent could be allocated by fallocate.
171662306a36Sopenharmony_ci		 * So we need to check it.
171762306a36Sopenharmony_ci		 */
171862306a36Sopenharmony_ci		if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) {
171962306a36Sopenharmony_ci			map_bh(bh, inode->i_sb, invalid_block);
172062306a36Sopenharmony_ci			set_buffer_new(bh);
172162306a36Sopenharmony_ci			set_buffer_delay(bh);
172262306a36Sopenharmony_ci			return 0;
172362306a36Sopenharmony_ci		}
172462306a36Sopenharmony_ci
172562306a36Sopenharmony_ci		map->m_pblk = ext4_es_pblock(&es) + iblock - es.es_lblk;
172662306a36Sopenharmony_ci		retval = es.es_len - (iblock - es.es_lblk);
172762306a36Sopenharmony_ci		if (retval > map->m_len)
172862306a36Sopenharmony_ci			retval = map->m_len;
172962306a36Sopenharmony_ci		map->m_len = retval;
173062306a36Sopenharmony_ci		if (ext4_es_is_written(&es))
173162306a36Sopenharmony_ci			map->m_flags |= EXT4_MAP_MAPPED;
173262306a36Sopenharmony_ci		else if (ext4_es_is_unwritten(&es))
173362306a36Sopenharmony_ci			map->m_flags |= EXT4_MAP_UNWRITTEN;
173462306a36Sopenharmony_ci		else
173562306a36Sopenharmony_ci			BUG();
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_ci#ifdef ES_AGGRESSIVE_TEST
173862306a36Sopenharmony_ci		ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
173962306a36Sopenharmony_ci#endif
174062306a36Sopenharmony_ci		return retval;
174162306a36Sopenharmony_ci	}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci	/*
174462306a36Sopenharmony_ci	 * Try to see if we can get the block without requesting a new
174562306a36Sopenharmony_ci	 * file system block.
174662306a36Sopenharmony_ci	 */
174762306a36Sopenharmony_ci	down_read(&EXT4_I(inode)->i_data_sem);
174862306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
174962306a36Sopenharmony_ci		retval = 0;
175062306a36Sopenharmony_ci	else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
175162306a36Sopenharmony_ci		retval = ext4_ext_map_blocks(NULL, inode, map, 0);
175262306a36Sopenharmony_ci	else
175362306a36Sopenharmony_ci		retval = ext4_ind_map_blocks(NULL, inode, map, 0);
175462306a36Sopenharmony_ci
175562306a36Sopenharmony_ciadd_delayed:
175662306a36Sopenharmony_ci	if (retval == 0) {
175762306a36Sopenharmony_ci		int ret;
175862306a36Sopenharmony_ci
175962306a36Sopenharmony_ci		/*
176062306a36Sopenharmony_ci		 * XXX: __block_prepare_write() unmaps passed block,
176162306a36Sopenharmony_ci		 * is it OK?
176262306a36Sopenharmony_ci		 */
176362306a36Sopenharmony_ci
176462306a36Sopenharmony_ci		ret = ext4_insert_delayed_block(inode, map->m_lblk);
176562306a36Sopenharmony_ci		if (ret != 0) {
176662306a36Sopenharmony_ci			retval = ret;
176762306a36Sopenharmony_ci			goto out_unlock;
176862306a36Sopenharmony_ci		}
176962306a36Sopenharmony_ci
177062306a36Sopenharmony_ci		map_bh(bh, inode->i_sb, invalid_block);
177162306a36Sopenharmony_ci		set_buffer_new(bh);
177262306a36Sopenharmony_ci		set_buffer_delay(bh);
177362306a36Sopenharmony_ci	} else if (retval > 0) {
177462306a36Sopenharmony_ci		unsigned int status;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci		if (unlikely(retval != map->m_len)) {
177762306a36Sopenharmony_ci			ext4_warning(inode->i_sb,
177862306a36Sopenharmony_ci				     "ES len assertion failed for inode "
177962306a36Sopenharmony_ci				     "%lu: retval %d != map->m_len %d",
178062306a36Sopenharmony_ci				     inode->i_ino, retval, map->m_len);
178162306a36Sopenharmony_ci			WARN_ON(1);
178262306a36Sopenharmony_ci		}
178362306a36Sopenharmony_ci
178462306a36Sopenharmony_ci		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
178562306a36Sopenharmony_ci				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
178662306a36Sopenharmony_ci		ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
178762306a36Sopenharmony_ci				      map->m_pblk, status);
178862306a36Sopenharmony_ci	}
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ciout_unlock:
179162306a36Sopenharmony_ci	up_read((&EXT4_I(inode)->i_data_sem));
179262306a36Sopenharmony_ci
179362306a36Sopenharmony_ci	return retval;
179462306a36Sopenharmony_ci}
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci/*
179762306a36Sopenharmony_ci * This is a special get_block_t callback which is used by
179862306a36Sopenharmony_ci * ext4_da_write_begin().  It will either return mapped block or
179962306a36Sopenharmony_ci * reserve space for a single block.
180062306a36Sopenharmony_ci *
180162306a36Sopenharmony_ci * For delayed buffer_head we have BH_Mapped, BH_New, BH_Delay set.
180262306a36Sopenharmony_ci * We also have b_blocknr = -1 and b_bdev initialized properly
180362306a36Sopenharmony_ci *
180462306a36Sopenharmony_ci * For unwritten buffer_head we have BH_Mapped, BH_New, BH_Unwritten set.
180562306a36Sopenharmony_ci * We also have b_blocknr = physicalblock mapping unwritten extent and b_bdev
180662306a36Sopenharmony_ci * initialized properly.
180762306a36Sopenharmony_ci */
180862306a36Sopenharmony_ciint ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
180962306a36Sopenharmony_ci			   struct buffer_head *bh, int create)
181062306a36Sopenharmony_ci{
181162306a36Sopenharmony_ci	struct ext4_map_blocks map;
181262306a36Sopenharmony_ci	int ret = 0;
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_ci	BUG_ON(create == 0);
181562306a36Sopenharmony_ci	BUG_ON(bh->b_size != inode->i_sb->s_blocksize);
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_ci	map.m_lblk = iblock;
181862306a36Sopenharmony_ci	map.m_len = 1;
181962306a36Sopenharmony_ci
182062306a36Sopenharmony_ci	/*
182162306a36Sopenharmony_ci	 * first, we need to know whether the block is allocated already
182262306a36Sopenharmony_ci	 * preallocated blocks are unmapped but should treated
182362306a36Sopenharmony_ci	 * the same as allocated blocks.
182462306a36Sopenharmony_ci	 */
182562306a36Sopenharmony_ci	ret = ext4_da_map_blocks(inode, iblock, &map, bh);
182662306a36Sopenharmony_ci	if (ret <= 0)
182762306a36Sopenharmony_ci		return ret;
182862306a36Sopenharmony_ci
182962306a36Sopenharmony_ci	map_bh(bh, inode->i_sb, map.m_pblk);
183062306a36Sopenharmony_ci	ext4_update_bh_state(bh, map.m_flags);
183162306a36Sopenharmony_ci
183262306a36Sopenharmony_ci	if (buffer_unwritten(bh)) {
183362306a36Sopenharmony_ci		/* A delayed write to unwritten bh should be marked
183462306a36Sopenharmony_ci		 * new and mapped.  Mapped ensures that we don't do
183562306a36Sopenharmony_ci		 * get_block multiple times when we write to the same
183662306a36Sopenharmony_ci		 * offset and new ensures that we do proper zero out
183762306a36Sopenharmony_ci		 * for partial write.
183862306a36Sopenharmony_ci		 */
183962306a36Sopenharmony_ci		set_buffer_new(bh);
184062306a36Sopenharmony_ci		set_buffer_mapped(bh);
184162306a36Sopenharmony_ci	}
184262306a36Sopenharmony_ci	return 0;
184362306a36Sopenharmony_ci}
184462306a36Sopenharmony_ci
184562306a36Sopenharmony_cistatic void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio)
184662306a36Sopenharmony_ci{
184762306a36Sopenharmony_ci	mpd->first_page += folio_nr_pages(folio);
184862306a36Sopenharmony_ci	folio_unlock(folio);
184962306a36Sopenharmony_ci}
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_cistatic int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio)
185262306a36Sopenharmony_ci{
185362306a36Sopenharmony_ci	size_t len;
185462306a36Sopenharmony_ci	loff_t size;
185562306a36Sopenharmony_ci	int err;
185662306a36Sopenharmony_ci
185762306a36Sopenharmony_ci	BUG_ON(folio->index != mpd->first_page);
185862306a36Sopenharmony_ci	folio_clear_dirty_for_io(folio);
185962306a36Sopenharmony_ci	/*
186062306a36Sopenharmony_ci	 * We have to be very careful here!  Nothing protects writeback path
186162306a36Sopenharmony_ci	 * against i_size changes and the page can be writeably mapped into
186262306a36Sopenharmony_ci	 * page tables. So an application can be growing i_size and writing
186362306a36Sopenharmony_ci	 * data through mmap while writeback runs. folio_clear_dirty_for_io()
186462306a36Sopenharmony_ci	 * write-protects our page in page tables and the page cannot get
186562306a36Sopenharmony_ci	 * written to again until we release folio lock. So only after
186662306a36Sopenharmony_ci	 * folio_clear_dirty_for_io() we are safe to sample i_size for
186762306a36Sopenharmony_ci	 * ext4_bio_write_folio() to zero-out tail of the written page. We rely
186862306a36Sopenharmony_ci	 * on the barrier provided by folio_test_clear_dirty() in
186962306a36Sopenharmony_ci	 * folio_clear_dirty_for_io() to make sure i_size is really sampled only
187062306a36Sopenharmony_ci	 * after page tables are updated.
187162306a36Sopenharmony_ci	 */
187262306a36Sopenharmony_ci	size = i_size_read(mpd->inode);
187362306a36Sopenharmony_ci	len = folio_size(folio);
187462306a36Sopenharmony_ci	if (folio_pos(folio) + len > size &&
187562306a36Sopenharmony_ci	    !ext4_verity_in_progress(mpd->inode))
187662306a36Sopenharmony_ci		len = size & ~PAGE_MASK;
187762306a36Sopenharmony_ci	err = ext4_bio_write_folio(&mpd->io_submit, folio, len);
187862306a36Sopenharmony_ci	if (!err)
187962306a36Sopenharmony_ci		mpd->wbc->nr_to_write--;
188062306a36Sopenharmony_ci
188162306a36Sopenharmony_ci	return err;
188262306a36Sopenharmony_ci}
188362306a36Sopenharmony_ci
188462306a36Sopenharmony_ci#define BH_FLAGS (BIT(BH_Unwritten) | BIT(BH_Delay))
188562306a36Sopenharmony_ci
188662306a36Sopenharmony_ci/*
188762306a36Sopenharmony_ci * mballoc gives us at most this number of blocks...
188862306a36Sopenharmony_ci * XXX: That seems to be only a limitation of ext4_mb_normalize_request().
188962306a36Sopenharmony_ci * The rest of mballoc seems to handle chunks up to full group size.
189062306a36Sopenharmony_ci */
189162306a36Sopenharmony_ci#define MAX_WRITEPAGES_EXTENT_LEN 2048
189262306a36Sopenharmony_ci
189362306a36Sopenharmony_ci/*
189462306a36Sopenharmony_ci * mpage_add_bh_to_extent - try to add bh to extent of blocks to map
189562306a36Sopenharmony_ci *
189662306a36Sopenharmony_ci * @mpd - extent of blocks
189762306a36Sopenharmony_ci * @lblk - logical number of the block in the file
189862306a36Sopenharmony_ci * @bh - buffer head we want to add to the extent
189962306a36Sopenharmony_ci *
190062306a36Sopenharmony_ci * The function is used to collect contig. blocks in the same state. If the
190162306a36Sopenharmony_ci * buffer doesn't require mapping for writeback and we haven't started the
190262306a36Sopenharmony_ci * extent of buffers to map yet, the function returns 'true' immediately - the
190362306a36Sopenharmony_ci * caller can write the buffer right away. Otherwise the function returns true
190462306a36Sopenharmony_ci * if the block has been added to the extent, false if the block couldn't be
190562306a36Sopenharmony_ci * added.
190662306a36Sopenharmony_ci */
190762306a36Sopenharmony_cistatic bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
190862306a36Sopenharmony_ci				   struct buffer_head *bh)
190962306a36Sopenharmony_ci{
191062306a36Sopenharmony_ci	struct ext4_map_blocks *map = &mpd->map;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	/* Buffer that doesn't need mapping for writeback? */
191362306a36Sopenharmony_ci	if (!buffer_dirty(bh) || !buffer_mapped(bh) ||
191462306a36Sopenharmony_ci	    (!buffer_delay(bh) && !buffer_unwritten(bh))) {
191562306a36Sopenharmony_ci		/* So far no extent to map => we write the buffer right away */
191662306a36Sopenharmony_ci		if (map->m_len == 0)
191762306a36Sopenharmony_ci			return true;
191862306a36Sopenharmony_ci		return false;
191962306a36Sopenharmony_ci	}
192062306a36Sopenharmony_ci
192162306a36Sopenharmony_ci	/* First block in the extent? */
192262306a36Sopenharmony_ci	if (map->m_len == 0) {
192362306a36Sopenharmony_ci		/* We cannot map unless handle is started... */
192462306a36Sopenharmony_ci		if (!mpd->do_map)
192562306a36Sopenharmony_ci			return false;
192662306a36Sopenharmony_ci		map->m_lblk = lblk;
192762306a36Sopenharmony_ci		map->m_len = 1;
192862306a36Sopenharmony_ci		map->m_flags = bh->b_state & BH_FLAGS;
192962306a36Sopenharmony_ci		return true;
193062306a36Sopenharmony_ci	}
193162306a36Sopenharmony_ci
193262306a36Sopenharmony_ci	/* Don't go larger than mballoc is willing to allocate */
193362306a36Sopenharmony_ci	if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN)
193462306a36Sopenharmony_ci		return false;
193562306a36Sopenharmony_ci
193662306a36Sopenharmony_ci	/* Can we merge the block to our big extent? */
193762306a36Sopenharmony_ci	if (lblk == map->m_lblk + map->m_len &&
193862306a36Sopenharmony_ci	    (bh->b_state & BH_FLAGS) == map->m_flags) {
193962306a36Sopenharmony_ci		map->m_len++;
194062306a36Sopenharmony_ci		return true;
194162306a36Sopenharmony_ci	}
194262306a36Sopenharmony_ci	return false;
194362306a36Sopenharmony_ci}
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_ci/*
194662306a36Sopenharmony_ci * mpage_process_page_bufs - submit page buffers for IO or add them to extent
194762306a36Sopenharmony_ci *
194862306a36Sopenharmony_ci * @mpd - extent of blocks for mapping
194962306a36Sopenharmony_ci * @head - the first buffer in the page
195062306a36Sopenharmony_ci * @bh - buffer we should start processing from
195162306a36Sopenharmony_ci * @lblk - logical number of the block in the file corresponding to @bh
195262306a36Sopenharmony_ci *
195362306a36Sopenharmony_ci * Walk through page buffers from @bh upto @head (exclusive) and either submit
195462306a36Sopenharmony_ci * the page for IO if all buffers in this page were mapped and there's no
195562306a36Sopenharmony_ci * accumulated extent of buffers to map or add buffers in the page to the
195662306a36Sopenharmony_ci * extent of buffers to map. The function returns 1 if the caller can continue
195762306a36Sopenharmony_ci * by processing the next page, 0 if it should stop adding buffers to the
195862306a36Sopenharmony_ci * extent to map because we cannot extend it anymore. It can also return value
195962306a36Sopenharmony_ci * < 0 in case of error during IO submission.
196062306a36Sopenharmony_ci */
196162306a36Sopenharmony_cistatic int mpage_process_page_bufs(struct mpage_da_data *mpd,
196262306a36Sopenharmony_ci				   struct buffer_head *head,
196362306a36Sopenharmony_ci				   struct buffer_head *bh,
196462306a36Sopenharmony_ci				   ext4_lblk_t lblk)
196562306a36Sopenharmony_ci{
196662306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
196762306a36Sopenharmony_ci	int err;
196862306a36Sopenharmony_ci	ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
196962306a36Sopenharmony_ci							>> inode->i_blkbits;
197062306a36Sopenharmony_ci
197162306a36Sopenharmony_ci	if (ext4_verity_in_progress(inode))
197262306a36Sopenharmony_ci		blocks = EXT_MAX_BLOCKS;
197362306a36Sopenharmony_ci
197462306a36Sopenharmony_ci	do {
197562306a36Sopenharmony_ci		BUG_ON(buffer_locked(bh));
197662306a36Sopenharmony_ci
197762306a36Sopenharmony_ci		if (lblk >= blocks || !mpage_add_bh_to_extent(mpd, lblk, bh)) {
197862306a36Sopenharmony_ci			/* Found extent to map? */
197962306a36Sopenharmony_ci			if (mpd->map.m_len)
198062306a36Sopenharmony_ci				return 0;
198162306a36Sopenharmony_ci			/* Buffer needs mapping and handle is not started? */
198262306a36Sopenharmony_ci			if (!mpd->do_map)
198362306a36Sopenharmony_ci				return 0;
198462306a36Sopenharmony_ci			/* Everything mapped so far and we hit EOF */
198562306a36Sopenharmony_ci			break;
198662306a36Sopenharmony_ci		}
198762306a36Sopenharmony_ci	} while (lblk++, (bh = bh->b_this_page) != head);
198862306a36Sopenharmony_ci	/* So far everything mapped? Submit the page for IO. */
198962306a36Sopenharmony_ci	if (mpd->map.m_len == 0) {
199062306a36Sopenharmony_ci		err = mpage_submit_folio(mpd, head->b_folio);
199162306a36Sopenharmony_ci		if (err < 0)
199262306a36Sopenharmony_ci			return err;
199362306a36Sopenharmony_ci		mpage_folio_done(mpd, head->b_folio);
199462306a36Sopenharmony_ci	}
199562306a36Sopenharmony_ci	if (lblk >= blocks) {
199662306a36Sopenharmony_ci		mpd->scanned_until_end = 1;
199762306a36Sopenharmony_ci		return 0;
199862306a36Sopenharmony_ci	}
199962306a36Sopenharmony_ci	return 1;
200062306a36Sopenharmony_ci}
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci/*
200362306a36Sopenharmony_ci * mpage_process_folio - update folio buffers corresponding to changed extent
200462306a36Sopenharmony_ci *			 and may submit fully mapped page for IO
200562306a36Sopenharmony_ci * @mpd: description of extent to map, on return next extent to map
200662306a36Sopenharmony_ci * @folio: Contains these buffers.
200762306a36Sopenharmony_ci * @m_lblk: logical block mapping.
200862306a36Sopenharmony_ci * @m_pblk: corresponding physical mapping.
200962306a36Sopenharmony_ci * @map_bh: determines on return whether this page requires any further
201062306a36Sopenharmony_ci *		  mapping or not.
201162306a36Sopenharmony_ci *
201262306a36Sopenharmony_ci * Scan given folio buffers corresponding to changed extent and update buffer
201362306a36Sopenharmony_ci * state according to new extent state.
201462306a36Sopenharmony_ci * We map delalloc buffers to their physical location, clear unwritten bits.
201562306a36Sopenharmony_ci * If the given folio is not fully mapped, we update @mpd to the next extent in
201662306a36Sopenharmony_ci * the given folio that needs mapping & return @map_bh as true.
201762306a36Sopenharmony_ci */
201862306a36Sopenharmony_cistatic int mpage_process_folio(struct mpage_da_data *mpd, struct folio *folio,
201962306a36Sopenharmony_ci			      ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk,
202062306a36Sopenharmony_ci			      bool *map_bh)
202162306a36Sopenharmony_ci{
202262306a36Sopenharmony_ci	struct buffer_head *head, *bh;
202362306a36Sopenharmony_ci	ext4_io_end_t *io_end = mpd->io_submit.io_end;
202462306a36Sopenharmony_ci	ext4_lblk_t lblk = *m_lblk;
202562306a36Sopenharmony_ci	ext4_fsblk_t pblock = *m_pblk;
202662306a36Sopenharmony_ci	int err = 0;
202762306a36Sopenharmony_ci	int blkbits = mpd->inode->i_blkbits;
202862306a36Sopenharmony_ci	ssize_t io_end_size = 0;
202962306a36Sopenharmony_ci	struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_ci	bh = head = folio_buffers(folio);
203262306a36Sopenharmony_ci	do {
203362306a36Sopenharmony_ci		if (lblk < mpd->map.m_lblk)
203462306a36Sopenharmony_ci			continue;
203562306a36Sopenharmony_ci		if (lblk >= mpd->map.m_lblk + mpd->map.m_len) {
203662306a36Sopenharmony_ci			/*
203762306a36Sopenharmony_ci			 * Buffer after end of mapped extent.
203862306a36Sopenharmony_ci			 * Find next buffer in the folio to map.
203962306a36Sopenharmony_ci			 */
204062306a36Sopenharmony_ci			mpd->map.m_len = 0;
204162306a36Sopenharmony_ci			mpd->map.m_flags = 0;
204262306a36Sopenharmony_ci			io_end_vec->size += io_end_size;
204362306a36Sopenharmony_ci
204462306a36Sopenharmony_ci			err = mpage_process_page_bufs(mpd, head, bh, lblk);
204562306a36Sopenharmony_ci			if (err > 0)
204662306a36Sopenharmony_ci				err = 0;
204762306a36Sopenharmony_ci			if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
204862306a36Sopenharmony_ci				io_end_vec = ext4_alloc_io_end_vec(io_end);
204962306a36Sopenharmony_ci				if (IS_ERR(io_end_vec)) {
205062306a36Sopenharmony_ci					err = PTR_ERR(io_end_vec);
205162306a36Sopenharmony_ci					goto out;
205262306a36Sopenharmony_ci				}
205362306a36Sopenharmony_ci				io_end_vec->offset = (loff_t)mpd->map.m_lblk << blkbits;
205462306a36Sopenharmony_ci			}
205562306a36Sopenharmony_ci			*map_bh = true;
205662306a36Sopenharmony_ci			goto out;
205762306a36Sopenharmony_ci		}
205862306a36Sopenharmony_ci		if (buffer_delay(bh)) {
205962306a36Sopenharmony_ci			clear_buffer_delay(bh);
206062306a36Sopenharmony_ci			bh->b_blocknr = pblock++;
206162306a36Sopenharmony_ci		}
206262306a36Sopenharmony_ci		clear_buffer_unwritten(bh);
206362306a36Sopenharmony_ci		io_end_size += (1 << blkbits);
206462306a36Sopenharmony_ci	} while (lblk++, (bh = bh->b_this_page) != head);
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_ci	io_end_vec->size += io_end_size;
206762306a36Sopenharmony_ci	*map_bh = false;
206862306a36Sopenharmony_ciout:
206962306a36Sopenharmony_ci	*m_lblk = lblk;
207062306a36Sopenharmony_ci	*m_pblk = pblock;
207162306a36Sopenharmony_ci	return err;
207262306a36Sopenharmony_ci}
207362306a36Sopenharmony_ci
207462306a36Sopenharmony_ci/*
207562306a36Sopenharmony_ci * mpage_map_buffers - update buffers corresponding to changed extent and
207662306a36Sopenharmony_ci *		       submit fully mapped pages for IO
207762306a36Sopenharmony_ci *
207862306a36Sopenharmony_ci * @mpd - description of extent to map, on return next extent to map
207962306a36Sopenharmony_ci *
208062306a36Sopenharmony_ci * Scan buffers corresponding to changed extent (we expect corresponding pages
208162306a36Sopenharmony_ci * to be already locked) and update buffer state according to new extent state.
208262306a36Sopenharmony_ci * We map delalloc buffers to their physical location, clear unwritten bits,
208362306a36Sopenharmony_ci * and mark buffers as uninit when we perform writes to unwritten extents
208462306a36Sopenharmony_ci * and do extent conversion after IO is finished. If the last page is not fully
208562306a36Sopenharmony_ci * mapped, we update @map to the next extent in the last page that needs
208662306a36Sopenharmony_ci * mapping. Otherwise we submit the page for IO.
208762306a36Sopenharmony_ci */
208862306a36Sopenharmony_cistatic int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
208962306a36Sopenharmony_ci{
209062306a36Sopenharmony_ci	struct folio_batch fbatch;
209162306a36Sopenharmony_ci	unsigned nr, i;
209262306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
209362306a36Sopenharmony_ci	int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
209462306a36Sopenharmony_ci	pgoff_t start, end;
209562306a36Sopenharmony_ci	ext4_lblk_t lblk;
209662306a36Sopenharmony_ci	ext4_fsblk_t pblock;
209762306a36Sopenharmony_ci	int err;
209862306a36Sopenharmony_ci	bool map_bh = false;
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci	start = mpd->map.m_lblk >> bpp_bits;
210162306a36Sopenharmony_ci	end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits;
210262306a36Sopenharmony_ci	lblk = start << bpp_bits;
210362306a36Sopenharmony_ci	pblock = mpd->map.m_pblk;
210462306a36Sopenharmony_ci
210562306a36Sopenharmony_ci	folio_batch_init(&fbatch);
210662306a36Sopenharmony_ci	while (start <= end) {
210762306a36Sopenharmony_ci		nr = filemap_get_folios(inode->i_mapping, &start, end, &fbatch);
210862306a36Sopenharmony_ci		if (nr == 0)
210962306a36Sopenharmony_ci			break;
211062306a36Sopenharmony_ci		for (i = 0; i < nr; i++) {
211162306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
211262306a36Sopenharmony_ci
211362306a36Sopenharmony_ci			err = mpage_process_folio(mpd, folio, &lblk, &pblock,
211462306a36Sopenharmony_ci						 &map_bh);
211562306a36Sopenharmony_ci			/*
211662306a36Sopenharmony_ci			 * If map_bh is true, means page may require further bh
211762306a36Sopenharmony_ci			 * mapping, or maybe the page was submitted for IO.
211862306a36Sopenharmony_ci			 * So we return to call further extent mapping.
211962306a36Sopenharmony_ci			 */
212062306a36Sopenharmony_ci			if (err < 0 || map_bh)
212162306a36Sopenharmony_ci				goto out;
212262306a36Sopenharmony_ci			/* Page fully mapped - let IO run! */
212362306a36Sopenharmony_ci			err = mpage_submit_folio(mpd, folio);
212462306a36Sopenharmony_ci			if (err < 0)
212562306a36Sopenharmony_ci				goto out;
212662306a36Sopenharmony_ci			mpage_folio_done(mpd, folio);
212762306a36Sopenharmony_ci		}
212862306a36Sopenharmony_ci		folio_batch_release(&fbatch);
212962306a36Sopenharmony_ci	}
213062306a36Sopenharmony_ci	/* Extent fully mapped and matches with page boundary. We are done. */
213162306a36Sopenharmony_ci	mpd->map.m_len = 0;
213262306a36Sopenharmony_ci	mpd->map.m_flags = 0;
213362306a36Sopenharmony_ci	return 0;
213462306a36Sopenharmony_ciout:
213562306a36Sopenharmony_ci	folio_batch_release(&fbatch);
213662306a36Sopenharmony_ci	return err;
213762306a36Sopenharmony_ci}
213862306a36Sopenharmony_ci
213962306a36Sopenharmony_cistatic int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
214062306a36Sopenharmony_ci{
214162306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
214262306a36Sopenharmony_ci	struct ext4_map_blocks *map = &mpd->map;
214362306a36Sopenharmony_ci	int get_blocks_flags;
214462306a36Sopenharmony_ci	int err, dioread_nolock;
214562306a36Sopenharmony_ci
214662306a36Sopenharmony_ci	trace_ext4_da_write_pages_extent(inode, map);
214762306a36Sopenharmony_ci	/*
214862306a36Sopenharmony_ci	 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
214962306a36Sopenharmony_ci	 * to convert an unwritten extent to be initialized (in the case
215062306a36Sopenharmony_ci	 * where we have written into one or more preallocated blocks).  It is
215162306a36Sopenharmony_ci	 * possible that we're going to need more metadata blocks than
215262306a36Sopenharmony_ci	 * previously reserved. However we must not fail because we're in
215362306a36Sopenharmony_ci	 * writeback and there is nothing we can do about it so it might result
215462306a36Sopenharmony_ci	 * in data loss.  So use reserved blocks to allocate metadata if
215562306a36Sopenharmony_ci	 * possible.
215662306a36Sopenharmony_ci	 *
215762306a36Sopenharmony_ci	 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
215862306a36Sopenharmony_ci	 * the blocks in question are delalloc blocks.  This indicates
215962306a36Sopenharmony_ci	 * that the blocks and quotas has already been checked when
216062306a36Sopenharmony_ci	 * the data was copied into the page cache.
216162306a36Sopenharmony_ci	 */
216262306a36Sopenharmony_ci	get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
216362306a36Sopenharmony_ci			   EXT4_GET_BLOCKS_METADATA_NOFAIL |
216462306a36Sopenharmony_ci			   EXT4_GET_BLOCKS_IO_SUBMIT;
216562306a36Sopenharmony_ci	dioread_nolock = ext4_should_dioread_nolock(inode);
216662306a36Sopenharmony_ci	if (dioread_nolock)
216762306a36Sopenharmony_ci		get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
216862306a36Sopenharmony_ci	if (map->m_flags & BIT(BH_Delay))
216962306a36Sopenharmony_ci		get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci	err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
217262306a36Sopenharmony_ci	if (err < 0)
217362306a36Sopenharmony_ci		return err;
217462306a36Sopenharmony_ci	if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) {
217562306a36Sopenharmony_ci		if (!mpd->io_submit.io_end->handle &&
217662306a36Sopenharmony_ci		    ext4_handle_valid(handle)) {
217762306a36Sopenharmony_ci			mpd->io_submit.io_end->handle = handle->h_rsv_handle;
217862306a36Sopenharmony_ci			handle->h_rsv_handle = NULL;
217962306a36Sopenharmony_ci		}
218062306a36Sopenharmony_ci		ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
218162306a36Sopenharmony_ci	}
218262306a36Sopenharmony_ci
218362306a36Sopenharmony_ci	BUG_ON(map->m_len == 0);
218462306a36Sopenharmony_ci	return 0;
218562306a36Sopenharmony_ci}
218662306a36Sopenharmony_ci
218762306a36Sopenharmony_ci/*
218862306a36Sopenharmony_ci * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length
218962306a36Sopenharmony_ci *				 mpd->len and submit pages underlying it for IO
219062306a36Sopenharmony_ci *
219162306a36Sopenharmony_ci * @handle - handle for journal operations
219262306a36Sopenharmony_ci * @mpd - extent to map
219362306a36Sopenharmony_ci * @give_up_on_write - we set this to true iff there is a fatal error and there
219462306a36Sopenharmony_ci *                     is no hope of writing the data. The caller should discard
219562306a36Sopenharmony_ci *                     dirty pages to avoid infinite loops.
219662306a36Sopenharmony_ci *
219762306a36Sopenharmony_ci * The function maps extent starting at mpd->lblk of length mpd->len. If it is
219862306a36Sopenharmony_ci * delayed, blocks are allocated, if it is unwritten, we may need to convert
219962306a36Sopenharmony_ci * them to initialized or split the described range from larger unwritten
220062306a36Sopenharmony_ci * extent. Note that we need not map all the described range since allocation
220162306a36Sopenharmony_ci * can return less blocks or the range is covered by more unwritten extents. We
220262306a36Sopenharmony_ci * cannot map more because we are limited by reserved transaction credits. On
220362306a36Sopenharmony_ci * the other hand we always make sure that the last touched page is fully
220462306a36Sopenharmony_ci * mapped so that it can be written out (and thus forward progress is
220562306a36Sopenharmony_ci * guaranteed). After mapping we submit all mapped pages for IO.
220662306a36Sopenharmony_ci */
220762306a36Sopenharmony_cistatic int mpage_map_and_submit_extent(handle_t *handle,
220862306a36Sopenharmony_ci				       struct mpage_da_data *mpd,
220962306a36Sopenharmony_ci				       bool *give_up_on_write)
221062306a36Sopenharmony_ci{
221162306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
221262306a36Sopenharmony_ci	struct ext4_map_blocks *map = &mpd->map;
221362306a36Sopenharmony_ci	int err;
221462306a36Sopenharmony_ci	loff_t disksize;
221562306a36Sopenharmony_ci	int progress = 0;
221662306a36Sopenharmony_ci	ext4_io_end_t *io_end = mpd->io_submit.io_end;
221762306a36Sopenharmony_ci	struct ext4_io_end_vec *io_end_vec;
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_ci	io_end_vec = ext4_alloc_io_end_vec(io_end);
222062306a36Sopenharmony_ci	if (IS_ERR(io_end_vec))
222162306a36Sopenharmony_ci		return PTR_ERR(io_end_vec);
222262306a36Sopenharmony_ci	io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
222362306a36Sopenharmony_ci	do {
222462306a36Sopenharmony_ci		err = mpage_map_one_extent(handle, mpd);
222562306a36Sopenharmony_ci		if (err < 0) {
222662306a36Sopenharmony_ci			struct super_block *sb = inode->i_sb;
222762306a36Sopenharmony_ci
222862306a36Sopenharmony_ci			if (ext4_forced_shutdown(sb))
222962306a36Sopenharmony_ci				goto invalidate_dirty_pages;
223062306a36Sopenharmony_ci			/*
223162306a36Sopenharmony_ci			 * Let the uper layers retry transient errors.
223262306a36Sopenharmony_ci			 * In the case of ENOSPC, if ext4_count_free_blocks()
223362306a36Sopenharmony_ci			 * is non-zero, a commit should free up blocks.
223462306a36Sopenharmony_ci			 */
223562306a36Sopenharmony_ci			if ((err == -ENOMEM) ||
223662306a36Sopenharmony_ci			    (err == -ENOSPC && ext4_count_free_clusters(sb))) {
223762306a36Sopenharmony_ci				if (progress)
223862306a36Sopenharmony_ci					goto update_disksize;
223962306a36Sopenharmony_ci				return err;
224062306a36Sopenharmony_ci			}
224162306a36Sopenharmony_ci			ext4_msg(sb, KERN_CRIT,
224262306a36Sopenharmony_ci				 "Delayed block allocation failed for "
224362306a36Sopenharmony_ci				 "inode %lu at logical offset %llu with"
224462306a36Sopenharmony_ci				 " max blocks %u with error %d",
224562306a36Sopenharmony_ci				 inode->i_ino,
224662306a36Sopenharmony_ci				 (unsigned long long)map->m_lblk,
224762306a36Sopenharmony_ci				 (unsigned)map->m_len, -err);
224862306a36Sopenharmony_ci			ext4_msg(sb, KERN_CRIT,
224962306a36Sopenharmony_ci				 "This should not happen!! Data will "
225062306a36Sopenharmony_ci				 "be lost\n");
225162306a36Sopenharmony_ci			if (err == -ENOSPC)
225262306a36Sopenharmony_ci				ext4_print_free_blocks(inode);
225362306a36Sopenharmony_ci		invalidate_dirty_pages:
225462306a36Sopenharmony_ci			*give_up_on_write = true;
225562306a36Sopenharmony_ci			return err;
225662306a36Sopenharmony_ci		}
225762306a36Sopenharmony_ci		progress = 1;
225862306a36Sopenharmony_ci		/*
225962306a36Sopenharmony_ci		 * Update buffer state, submit mapped pages, and get us new
226062306a36Sopenharmony_ci		 * extent to map
226162306a36Sopenharmony_ci		 */
226262306a36Sopenharmony_ci		err = mpage_map_and_submit_buffers(mpd);
226362306a36Sopenharmony_ci		if (err < 0)
226462306a36Sopenharmony_ci			goto update_disksize;
226562306a36Sopenharmony_ci	} while (map->m_len);
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_ciupdate_disksize:
226862306a36Sopenharmony_ci	/*
226962306a36Sopenharmony_ci	 * Update on-disk size after IO is submitted.  Races with
227062306a36Sopenharmony_ci	 * truncate are avoided by checking i_size under i_data_sem.
227162306a36Sopenharmony_ci	 */
227262306a36Sopenharmony_ci	disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
227362306a36Sopenharmony_ci	if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) {
227462306a36Sopenharmony_ci		int err2;
227562306a36Sopenharmony_ci		loff_t i_size;
227662306a36Sopenharmony_ci
227762306a36Sopenharmony_ci		down_write(&EXT4_I(inode)->i_data_sem);
227862306a36Sopenharmony_ci		i_size = i_size_read(inode);
227962306a36Sopenharmony_ci		if (disksize > i_size)
228062306a36Sopenharmony_ci			disksize = i_size;
228162306a36Sopenharmony_ci		if (disksize > EXT4_I(inode)->i_disksize)
228262306a36Sopenharmony_ci			EXT4_I(inode)->i_disksize = disksize;
228362306a36Sopenharmony_ci		up_write(&EXT4_I(inode)->i_data_sem);
228462306a36Sopenharmony_ci		err2 = ext4_mark_inode_dirty(handle, inode);
228562306a36Sopenharmony_ci		if (err2) {
228662306a36Sopenharmony_ci			ext4_error_err(inode->i_sb, -err2,
228762306a36Sopenharmony_ci				       "Failed to mark inode %lu dirty",
228862306a36Sopenharmony_ci				       inode->i_ino);
228962306a36Sopenharmony_ci		}
229062306a36Sopenharmony_ci		if (!err)
229162306a36Sopenharmony_ci			err = err2;
229262306a36Sopenharmony_ci	}
229362306a36Sopenharmony_ci	return err;
229462306a36Sopenharmony_ci}
229562306a36Sopenharmony_ci
229662306a36Sopenharmony_ci/*
229762306a36Sopenharmony_ci * Calculate the total number of credits to reserve for one writepages
229862306a36Sopenharmony_ci * iteration. This is called from ext4_writepages(). We map an extent of
229962306a36Sopenharmony_ci * up to MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping
230062306a36Sopenharmony_ci * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN +
230162306a36Sopenharmony_ci * bpp - 1 blocks in bpp different extents.
230262306a36Sopenharmony_ci */
230362306a36Sopenharmony_cistatic int ext4_da_writepages_trans_blocks(struct inode *inode)
230462306a36Sopenharmony_ci{
230562306a36Sopenharmony_ci	int bpp = ext4_journal_blocks_per_page(inode);
230662306a36Sopenharmony_ci
230762306a36Sopenharmony_ci	return ext4_meta_trans_blocks(inode,
230862306a36Sopenharmony_ci				MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp);
230962306a36Sopenharmony_ci}
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_cistatic int ext4_journal_folio_buffers(handle_t *handle, struct folio *folio,
231262306a36Sopenharmony_ci				     size_t len)
231362306a36Sopenharmony_ci{
231462306a36Sopenharmony_ci	struct buffer_head *page_bufs = folio_buffers(folio);
231562306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
231662306a36Sopenharmony_ci	int ret, err;
231762306a36Sopenharmony_ci
231862306a36Sopenharmony_ci	ret = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
231962306a36Sopenharmony_ci				     NULL, do_journal_get_write_access);
232062306a36Sopenharmony_ci	err = ext4_walk_page_buffers(handle, inode, page_bufs, 0, len,
232162306a36Sopenharmony_ci				     NULL, write_end_fn);
232262306a36Sopenharmony_ci	if (ret == 0)
232362306a36Sopenharmony_ci		ret = err;
232462306a36Sopenharmony_ci	err = ext4_jbd2_inode_add_write(handle, inode, folio_pos(folio), len);
232562306a36Sopenharmony_ci	if (ret == 0)
232662306a36Sopenharmony_ci		ret = err;
232762306a36Sopenharmony_ci	EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
232862306a36Sopenharmony_ci
232962306a36Sopenharmony_ci	return ret;
233062306a36Sopenharmony_ci}
233162306a36Sopenharmony_ci
233262306a36Sopenharmony_cistatic int mpage_journal_page_buffers(handle_t *handle,
233362306a36Sopenharmony_ci				      struct mpage_da_data *mpd,
233462306a36Sopenharmony_ci				      struct folio *folio)
233562306a36Sopenharmony_ci{
233662306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
233762306a36Sopenharmony_ci	loff_t size = i_size_read(inode);
233862306a36Sopenharmony_ci	size_t len = folio_size(folio);
233962306a36Sopenharmony_ci
234062306a36Sopenharmony_ci	folio_clear_checked(folio);
234162306a36Sopenharmony_ci	mpd->wbc->nr_to_write--;
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	if (folio_pos(folio) + len > size &&
234462306a36Sopenharmony_ci	    !ext4_verity_in_progress(inode))
234562306a36Sopenharmony_ci		len = size - folio_pos(folio);
234662306a36Sopenharmony_ci
234762306a36Sopenharmony_ci	return ext4_journal_folio_buffers(handle, folio, len);
234862306a36Sopenharmony_ci}
234962306a36Sopenharmony_ci
235062306a36Sopenharmony_ci/*
235162306a36Sopenharmony_ci * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages
235262306a36Sopenharmony_ci * 				 needing mapping, submit mapped pages
235362306a36Sopenharmony_ci *
235462306a36Sopenharmony_ci * @mpd - where to look for pages
235562306a36Sopenharmony_ci *
235662306a36Sopenharmony_ci * Walk dirty pages in the mapping. If they are fully mapped, submit them for
235762306a36Sopenharmony_ci * IO immediately. If we cannot map blocks, we submit just already mapped
235862306a36Sopenharmony_ci * buffers in the page for IO and keep page dirty. When we can map blocks and
235962306a36Sopenharmony_ci * we find a page which isn't mapped we start accumulating extent of buffers
236062306a36Sopenharmony_ci * underlying these pages that needs mapping (formed by either delayed or
236162306a36Sopenharmony_ci * unwritten buffers). We also lock the pages containing these buffers. The
236262306a36Sopenharmony_ci * extent found is returned in @mpd structure (starting at mpd->lblk with
236362306a36Sopenharmony_ci * length mpd->len blocks).
236462306a36Sopenharmony_ci *
236562306a36Sopenharmony_ci * Note that this function can attach bios to one io_end structure which are
236662306a36Sopenharmony_ci * neither logically nor physically contiguous. Although it may seem as an
236762306a36Sopenharmony_ci * unnecessary complication, it is actually inevitable in blocksize < pagesize
236862306a36Sopenharmony_ci * case as we need to track IO to all buffers underlying a page in one io_end.
236962306a36Sopenharmony_ci */
237062306a36Sopenharmony_cistatic int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
237162306a36Sopenharmony_ci{
237262306a36Sopenharmony_ci	struct address_space *mapping = mpd->inode->i_mapping;
237362306a36Sopenharmony_ci	struct folio_batch fbatch;
237462306a36Sopenharmony_ci	unsigned int nr_folios;
237562306a36Sopenharmony_ci	pgoff_t index = mpd->first_page;
237662306a36Sopenharmony_ci	pgoff_t end = mpd->last_page;
237762306a36Sopenharmony_ci	xa_mark_t tag;
237862306a36Sopenharmony_ci	int i, err = 0;
237962306a36Sopenharmony_ci	int blkbits = mpd->inode->i_blkbits;
238062306a36Sopenharmony_ci	ext4_lblk_t lblk;
238162306a36Sopenharmony_ci	struct buffer_head *head;
238262306a36Sopenharmony_ci	handle_t *handle = NULL;
238362306a36Sopenharmony_ci	int bpp = ext4_journal_blocks_per_page(mpd->inode);
238462306a36Sopenharmony_ci
238562306a36Sopenharmony_ci	if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages)
238662306a36Sopenharmony_ci		tag = PAGECACHE_TAG_TOWRITE;
238762306a36Sopenharmony_ci	else
238862306a36Sopenharmony_ci		tag = PAGECACHE_TAG_DIRTY;
238962306a36Sopenharmony_ci
239062306a36Sopenharmony_ci	mpd->map.m_len = 0;
239162306a36Sopenharmony_ci	mpd->next_page = index;
239262306a36Sopenharmony_ci	if (ext4_should_journal_data(mpd->inode)) {
239362306a36Sopenharmony_ci		handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE,
239462306a36Sopenharmony_ci					    bpp);
239562306a36Sopenharmony_ci		if (IS_ERR(handle))
239662306a36Sopenharmony_ci			return PTR_ERR(handle);
239762306a36Sopenharmony_ci	}
239862306a36Sopenharmony_ci	folio_batch_init(&fbatch);
239962306a36Sopenharmony_ci	while (index <= end) {
240062306a36Sopenharmony_ci		nr_folios = filemap_get_folios_tag(mapping, &index, end,
240162306a36Sopenharmony_ci				tag, &fbatch);
240262306a36Sopenharmony_ci		if (nr_folios == 0)
240362306a36Sopenharmony_ci			break;
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci		for (i = 0; i < nr_folios; i++) {
240662306a36Sopenharmony_ci			struct folio *folio = fbatch.folios[i];
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci			/*
240962306a36Sopenharmony_ci			 * Accumulated enough dirty pages? This doesn't apply
241062306a36Sopenharmony_ci			 * to WB_SYNC_ALL mode. For integrity sync we have to
241162306a36Sopenharmony_ci			 * keep going because someone may be concurrently
241262306a36Sopenharmony_ci			 * dirtying pages, and we might have synced a lot of
241362306a36Sopenharmony_ci			 * newly appeared dirty pages, but have not synced all
241462306a36Sopenharmony_ci			 * of the old dirty pages.
241562306a36Sopenharmony_ci			 */
241662306a36Sopenharmony_ci			if (mpd->wbc->sync_mode == WB_SYNC_NONE &&
241762306a36Sopenharmony_ci			    mpd->wbc->nr_to_write <=
241862306a36Sopenharmony_ci			    mpd->map.m_len >> (PAGE_SHIFT - blkbits))
241962306a36Sopenharmony_ci				goto out;
242062306a36Sopenharmony_ci
242162306a36Sopenharmony_ci			/* If we can't merge this page, we are done. */
242262306a36Sopenharmony_ci			if (mpd->map.m_len > 0 && mpd->next_page != folio->index)
242362306a36Sopenharmony_ci				goto out;
242462306a36Sopenharmony_ci
242562306a36Sopenharmony_ci			if (handle) {
242662306a36Sopenharmony_ci				err = ext4_journal_ensure_credits(handle, bpp,
242762306a36Sopenharmony_ci								  0);
242862306a36Sopenharmony_ci				if (err < 0)
242962306a36Sopenharmony_ci					goto out;
243062306a36Sopenharmony_ci			}
243162306a36Sopenharmony_ci
243262306a36Sopenharmony_ci			folio_lock(folio);
243362306a36Sopenharmony_ci			/*
243462306a36Sopenharmony_ci			 * If the page is no longer dirty, or its mapping no
243562306a36Sopenharmony_ci			 * longer corresponds to inode we are writing (which
243662306a36Sopenharmony_ci			 * means it has been truncated or invalidated), or the
243762306a36Sopenharmony_ci			 * page is already under writeback and we are not doing
243862306a36Sopenharmony_ci			 * a data integrity writeback, skip the page
243962306a36Sopenharmony_ci			 */
244062306a36Sopenharmony_ci			if (!folio_test_dirty(folio) ||
244162306a36Sopenharmony_ci			    (folio_test_writeback(folio) &&
244262306a36Sopenharmony_ci			     (mpd->wbc->sync_mode == WB_SYNC_NONE)) ||
244362306a36Sopenharmony_ci			    unlikely(folio->mapping != mapping)) {
244462306a36Sopenharmony_ci				folio_unlock(folio);
244562306a36Sopenharmony_ci				continue;
244662306a36Sopenharmony_ci			}
244762306a36Sopenharmony_ci
244862306a36Sopenharmony_ci			folio_wait_writeback(folio);
244962306a36Sopenharmony_ci			BUG_ON(folio_test_writeback(folio));
245062306a36Sopenharmony_ci
245162306a36Sopenharmony_ci			/*
245262306a36Sopenharmony_ci			 * Should never happen but for buggy code in
245362306a36Sopenharmony_ci			 * other subsystems that call
245462306a36Sopenharmony_ci			 * set_page_dirty() without properly warning
245562306a36Sopenharmony_ci			 * the file system first.  See [1] for more
245662306a36Sopenharmony_ci			 * information.
245762306a36Sopenharmony_ci			 *
245862306a36Sopenharmony_ci			 * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz
245962306a36Sopenharmony_ci			 */
246062306a36Sopenharmony_ci			if (!folio_buffers(folio)) {
246162306a36Sopenharmony_ci				ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", folio->index);
246262306a36Sopenharmony_ci				folio_clear_dirty(folio);
246362306a36Sopenharmony_ci				folio_unlock(folio);
246462306a36Sopenharmony_ci				continue;
246562306a36Sopenharmony_ci			}
246662306a36Sopenharmony_ci
246762306a36Sopenharmony_ci			if (mpd->map.m_len == 0)
246862306a36Sopenharmony_ci				mpd->first_page = folio->index;
246962306a36Sopenharmony_ci			mpd->next_page = folio_next_index(folio);
247062306a36Sopenharmony_ci			/*
247162306a36Sopenharmony_ci			 * Writeout when we cannot modify metadata is simple.
247262306a36Sopenharmony_ci			 * Just submit the page. For data=journal mode we
247362306a36Sopenharmony_ci			 * first handle writeout of the page for checkpoint and
247462306a36Sopenharmony_ci			 * only after that handle delayed page dirtying. This
247562306a36Sopenharmony_ci			 * makes sure current data is checkpointed to the final
247662306a36Sopenharmony_ci			 * location before possibly journalling it again which
247762306a36Sopenharmony_ci			 * is desirable when the page is frequently dirtied
247862306a36Sopenharmony_ci			 * through a pin.
247962306a36Sopenharmony_ci			 */
248062306a36Sopenharmony_ci			if (!mpd->can_map) {
248162306a36Sopenharmony_ci				err = mpage_submit_folio(mpd, folio);
248262306a36Sopenharmony_ci				if (err < 0)
248362306a36Sopenharmony_ci					goto out;
248462306a36Sopenharmony_ci				/* Pending dirtying of journalled data? */
248562306a36Sopenharmony_ci				if (folio_test_checked(folio)) {
248662306a36Sopenharmony_ci					err = mpage_journal_page_buffers(handle,
248762306a36Sopenharmony_ci						mpd, folio);
248862306a36Sopenharmony_ci					if (err < 0)
248962306a36Sopenharmony_ci						goto out;
249062306a36Sopenharmony_ci					mpd->journalled_more_data = 1;
249162306a36Sopenharmony_ci				}
249262306a36Sopenharmony_ci				mpage_folio_done(mpd, folio);
249362306a36Sopenharmony_ci			} else {
249462306a36Sopenharmony_ci				/* Add all dirty buffers to mpd */
249562306a36Sopenharmony_ci				lblk = ((ext4_lblk_t)folio->index) <<
249662306a36Sopenharmony_ci					(PAGE_SHIFT - blkbits);
249762306a36Sopenharmony_ci				head = folio_buffers(folio);
249862306a36Sopenharmony_ci				err = mpage_process_page_bufs(mpd, head, head,
249962306a36Sopenharmony_ci						lblk);
250062306a36Sopenharmony_ci				if (err <= 0)
250162306a36Sopenharmony_ci					goto out;
250262306a36Sopenharmony_ci				err = 0;
250362306a36Sopenharmony_ci			}
250462306a36Sopenharmony_ci		}
250562306a36Sopenharmony_ci		folio_batch_release(&fbatch);
250662306a36Sopenharmony_ci		cond_resched();
250762306a36Sopenharmony_ci	}
250862306a36Sopenharmony_ci	mpd->scanned_until_end = 1;
250962306a36Sopenharmony_ci	if (handle)
251062306a36Sopenharmony_ci		ext4_journal_stop(handle);
251162306a36Sopenharmony_ci	return 0;
251262306a36Sopenharmony_ciout:
251362306a36Sopenharmony_ci	folio_batch_release(&fbatch);
251462306a36Sopenharmony_ci	if (handle)
251562306a36Sopenharmony_ci		ext4_journal_stop(handle);
251662306a36Sopenharmony_ci	return err;
251762306a36Sopenharmony_ci}
251862306a36Sopenharmony_ci
251962306a36Sopenharmony_cistatic int ext4_do_writepages(struct mpage_da_data *mpd)
252062306a36Sopenharmony_ci{
252162306a36Sopenharmony_ci	struct writeback_control *wbc = mpd->wbc;
252262306a36Sopenharmony_ci	pgoff_t	writeback_index = 0;
252362306a36Sopenharmony_ci	long nr_to_write = wbc->nr_to_write;
252462306a36Sopenharmony_ci	int range_whole = 0;
252562306a36Sopenharmony_ci	int cycled = 1;
252662306a36Sopenharmony_ci	handle_t *handle = NULL;
252762306a36Sopenharmony_ci	struct inode *inode = mpd->inode;
252862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
252962306a36Sopenharmony_ci	int needed_blocks, rsv_blocks = 0, ret = 0;
253062306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
253162306a36Sopenharmony_ci	struct blk_plug plug;
253262306a36Sopenharmony_ci	bool give_up_on_write = false;
253362306a36Sopenharmony_ci
253462306a36Sopenharmony_ci	trace_ext4_writepages(inode, wbc);
253562306a36Sopenharmony_ci
253662306a36Sopenharmony_ci	/*
253762306a36Sopenharmony_ci	 * No pages to write? This is mainly a kludge to avoid starting
253862306a36Sopenharmony_ci	 * a transaction for special inodes like journal inode on last iput()
253962306a36Sopenharmony_ci	 * because that could violate lock ordering on umount
254062306a36Sopenharmony_ci	 */
254162306a36Sopenharmony_ci	if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
254262306a36Sopenharmony_ci		goto out_writepages;
254362306a36Sopenharmony_ci
254462306a36Sopenharmony_ci	/*
254562306a36Sopenharmony_ci	 * If the filesystem has aborted, it is read-only, so return
254662306a36Sopenharmony_ci	 * right away instead of dumping stack traces later on that
254762306a36Sopenharmony_ci	 * will obscure the real source of the problem.  We test
254862306a36Sopenharmony_ci	 * fs shutdown state instead of sb->s_flag's SB_RDONLY because
254962306a36Sopenharmony_ci	 * the latter could be true if the filesystem is mounted
255062306a36Sopenharmony_ci	 * read-only, and in that case, ext4_writepages should
255162306a36Sopenharmony_ci	 * *never* be called, so if that ever happens, we would want
255262306a36Sopenharmony_ci	 * the stack trace.
255362306a36Sopenharmony_ci	 */
255462306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(mapping->host->i_sb))) {
255562306a36Sopenharmony_ci		ret = -EROFS;
255662306a36Sopenharmony_ci		goto out_writepages;
255762306a36Sopenharmony_ci	}
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_ci	/*
256062306a36Sopenharmony_ci	 * If we have inline data and arrive here, it means that
256162306a36Sopenharmony_ci	 * we will soon create the block for the 1st page, so
256262306a36Sopenharmony_ci	 * we'd better clear the inline data here.
256362306a36Sopenharmony_ci	 */
256462306a36Sopenharmony_ci	if (ext4_has_inline_data(inode)) {
256562306a36Sopenharmony_ci		/* Just inode will be modified... */
256662306a36Sopenharmony_ci		handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
256762306a36Sopenharmony_ci		if (IS_ERR(handle)) {
256862306a36Sopenharmony_ci			ret = PTR_ERR(handle);
256962306a36Sopenharmony_ci			goto out_writepages;
257062306a36Sopenharmony_ci		}
257162306a36Sopenharmony_ci		BUG_ON(ext4_test_inode_state(inode,
257262306a36Sopenharmony_ci				EXT4_STATE_MAY_INLINE_DATA));
257362306a36Sopenharmony_ci		ext4_destroy_inline_data(handle, inode);
257462306a36Sopenharmony_ci		ext4_journal_stop(handle);
257562306a36Sopenharmony_ci	}
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_ci	/*
257862306a36Sopenharmony_ci	 * data=journal mode does not do delalloc so we just need to writeout /
257962306a36Sopenharmony_ci	 * journal already mapped buffers. On the other hand we need to commit
258062306a36Sopenharmony_ci	 * transaction to make data stable. We expect all the data to be
258162306a36Sopenharmony_ci	 * already in the journal (the only exception are DMA pinned pages
258262306a36Sopenharmony_ci	 * dirtied behind our back) so we commit transaction here and run the
258362306a36Sopenharmony_ci	 * writeback loop to checkpoint them. The checkpointing is not actually
258462306a36Sopenharmony_ci	 * necessary to make data persistent *but* quite a few places (extent
258562306a36Sopenharmony_ci	 * shifting operations, fsverity, ...) depend on being able to drop
258662306a36Sopenharmony_ci	 * pagecache pages after calling filemap_write_and_wait() and for that
258762306a36Sopenharmony_ci	 * checkpointing needs to happen.
258862306a36Sopenharmony_ci	 */
258962306a36Sopenharmony_ci	if (ext4_should_journal_data(inode)) {
259062306a36Sopenharmony_ci		mpd->can_map = 0;
259162306a36Sopenharmony_ci		if (wbc->sync_mode == WB_SYNC_ALL)
259262306a36Sopenharmony_ci			ext4_fc_commit(sbi->s_journal,
259362306a36Sopenharmony_ci				       EXT4_I(inode)->i_datasync_tid);
259462306a36Sopenharmony_ci	}
259562306a36Sopenharmony_ci	mpd->journalled_more_data = 0;
259662306a36Sopenharmony_ci
259762306a36Sopenharmony_ci	if (ext4_should_dioread_nolock(inode)) {
259862306a36Sopenharmony_ci		/*
259962306a36Sopenharmony_ci		 * We may need to convert up to one extent per block in
260062306a36Sopenharmony_ci		 * the page and we may dirty the inode.
260162306a36Sopenharmony_ci		 */
260262306a36Sopenharmony_ci		rsv_blocks = 1 + ext4_chunk_trans_blocks(inode,
260362306a36Sopenharmony_ci						PAGE_SIZE >> inode->i_blkbits);
260462306a36Sopenharmony_ci	}
260562306a36Sopenharmony_ci
260662306a36Sopenharmony_ci	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
260762306a36Sopenharmony_ci		range_whole = 1;
260862306a36Sopenharmony_ci
260962306a36Sopenharmony_ci	if (wbc->range_cyclic) {
261062306a36Sopenharmony_ci		writeback_index = mapping->writeback_index;
261162306a36Sopenharmony_ci		if (writeback_index)
261262306a36Sopenharmony_ci			cycled = 0;
261362306a36Sopenharmony_ci		mpd->first_page = writeback_index;
261462306a36Sopenharmony_ci		mpd->last_page = -1;
261562306a36Sopenharmony_ci	} else {
261662306a36Sopenharmony_ci		mpd->first_page = wbc->range_start >> PAGE_SHIFT;
261762306a36Sopenharmony_ci		mpd->last_page = wbc->range_end >> PAGE_SHIFT;
261862306a36Sopenharmony_ci	}
261962306a36Sopenharmony_ci
262062306a36Sopenharmony_ci	ext4_io_submit_init(&mpd->io_submit, wbc);
262162306a36Sopenharmony_ciretry:
262262306a36Sopenharmony_ci	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
262362306a36Sopenharmony_ci		tag_pages_for_writeback(mapping, mpd->first_page,
262462306a36Sopenharmony_ci					mpd->last_page);
262562306a36Sopenharmony_ci	blk_start_plug(&plug);
262662306a36Sopenharmony_ci
262762306a36Sopenharmony_ci	/*
262862306a36Sopenharmony_ci	 * First writeback pages that don't need mapping - we can avoid
262962306a36Sopenharmony_ci	 * starting a transaction unnecessarily and also avoid being blocked
263062306a36Sopenharmony_ci	 * in the block layer on device congestion while having transaction
263162306a36Sopenharmony_ci	 * started.
263262306a36Sopenharmony_ci	 */
263362306a36Sopenharmony_ci	mpd->do_map = 0;
263462306a36Sopenharmony_ci	mpd->scanned_until_end = 0;
263562306a36Sopenharmony_ci	mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
263662306a36Sopenharmony_ci	if (!mpd->io_submit.io_end) {
263762306a36Sopenharmony_ci		ret = -ENOMEM;
263862306a36Sopenharmony_ci		goto unplug;
263962306a36Sopenharmony_ci	}
264062306a36Sopenharmony_ci	ret = mpage_prepare_extent_to_map(mpd);
264162306a36Sopenharmony_ci	/* Unlock pages we didn't use */
264262306a36Sopenharmony_ci	mpage_release_unused_pages(mpd, false);
264362306a36Sopenharmony_ci	/* Submit prepared bio */
264462306a36Sopenharmony_ci	ext4_io_submit(&mpd->io_submit);
264562306a36Sopenharmony_ci	ext4_put_io_end_defer(mpd->io_submit.io_end);
264662306a36Sopenharmony_ci	mpd->io_submit.io_end = NULL;
264762306a36Sopenharmony_ci	if (ret < 0)
264862306a36Sopenharmony_ci		goto unplug;
264962306a36Sopenharmony_ci
265062306a36Sopenharmony_ci	while (!mpd->scanned_until_end && wbc->nr_to_write > 0) {
265162306a36Sopenharmony_ci		/* For each extent of pages we use new io_end */
265262306a36Sopenharmony_ci		mpd->io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
265362306a36Sopenharmony_ci		if (!mpd->io_submit.io_end) {
265462306a36Sopenharmony_ci			ret = -ENOMEM;
265562306a36Sopenharmony_ci			break;
265662306a36Sopenharmony_ci		}
265762306a36Sopenharmony_ci
265862306a36Sopenharmony_ci		WARN_ON_ONCE(!mpd->can_map);
265962306a36Sopenharmony_ci		/*
266062306a36Sopenharmony_ci		 * We have two constraints: We find one extent to map and we
266162306a36Sopenharmony_ci		 * must always write out whole page (makes a difference when
266262306a36Sopenharmony_ci		 * blocksize < pagesize) so that we don't block on IO when we
266362306a36Sopenharmony_ci		 * try to write out the rest of the page. Journalled mode is
266462306a36Sopenharmony_ci		 * not supported by delalloc.
266562306a36Sopenharmony_ci		 */
266662306a36Sopenharmony_ci		BUG_ON(ext4_should_journal_data(inode));
266762306a36Sopenharmony_ci		needed_blocks = ext4_da_writepages_trans_blocks(inode);
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_ci		/* start a new transaction */
267062306a36Sopenharmony_ci		handle = ext4_journal_start_with_reserve(inode,
267162306a36Sopenharmony_ci				EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
267262306a36Sopenharmony_ci		if (IS_ERR(handle)) {
267362306a36Sopenharmony_ci			ret = PTR_ERR(handle);
267462306a36Sopenharmony_ci			ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
267562306a36Sopenharmony_ci			       "%ld pages, ino %lu; err %d", __func__,
267662306a36Sopenharmony_ci				wbc->nr_to_write, inode->i_ino, ret);
267762306a36Sopenharmony_ci			/* Release allocated io_end */
267862306a36Sopenharmony_ci			ext4_put_io_end(mpd->io_submit.io_end);
267962306a36Sopenharmony_ci			mpd->io_submit.io_end = NULL;
268062306a36Sopenharmony_ci			break;
268162306a36Sopenharmony_ci		}
268262306a36Sopenharmony_ci		mpd->do_map = 1;
268362306a36Sopenharmony_ci
268462306a36Sopenharmony_ci		trace_ext4_da_write_pages(inode, mpd->first_page, wbc);
268562306a36Sopenharmony_ci		ret = mpage_prepare_extent_to_map(mpd);
268662306a36Sopenharmony_ci		if (!ret && mpd->map.m_len)
268762306a36Sopenharmony_ci			ret = mpage_map_and_submit_extent(handle, mpd,
268862306a36Sopenharmony_ci					&give_up_on_write);
268962306a36Sopenharmony_ci		/*
269062306a36Sopenharmony_ci		 * Caution: If the handle is synchronous,
269162306a36Sopenharmony_ci		 * ext4_journal_stop() can wait for transaction commit
269262306a36Sopenharmony_ci		 * to finish which may depend on writeback of pages to
269362306a36Sopenharmony_ci		 * complete or on page lock to be released.  In that
269462306a36Sopenharmony_ci		 * case, we have to wait until after we have
269562306a36Sopenharmony_ci		 * submitted all the IO, released page locks we hold,
269662306a36Sopenharmony_ci		 * and dropped io_end reference (for extent conversion
269762306a36Sopenharmony_ci		 * to be able to complete) before stopping the handle.
269862306a36Sopenharmony_ci		 */
269962306a36Sopenharmony_ci		if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
270062306a36Sopenharmony_ci			ext4_journal_stop(handle);
270162306a36Sopenharmony_ci			handle = NULL;
270262306a36Sopenharmony_ci			mpd->do_map = 0;
270362306a36Sopenharmony_ci		}
270462306a36Sopenharmony_ci		/* Unlock pages we didn't use */
270562306a36Sopenharmony_ci		mpage_release_unused_pages(mpd, give_up_on_write);
270662306a36Sopenharmony_ci		/* Submit prepared bio */
270762306a36Sopenharmony_ci		ext4_io_submit(&mpd->io_submit);
270862306a36Sopenharmony_ci
270962306a36Sopenharmony_ci		/*
271062306a36Sopenharmony_ci		 * Drop our io_end reference we got from init. We have
271162306a36Sopenharmony_ci		 * to be careful and use deferred io_end finishing if
271262306a36Sopenharmony_ci		 * we are still holding the transaction as we can
271362306a36Sopenharmony_ci		 * release the last reference to io_end which may end
271462306a36Sopenharmony_ci		 * up doing unwritten extent conversion.
271562306a36Sopenharmony_ci		 */
271662306a36Sopenharmony_ci		if (handle) {
271762306a36Sopenharmony_ci			ext4_put_io_end_defer(mpd->io_submit.io_end);
271862306a36Sopenharmony_ci			ext4_journal_stop(handle);
271962306a36Sopenharmony_ci		} else
272062306a36Sopenharmony_ci			ext4_put_io_end(mpd->io_submit.io_end);
272162306a36Sopenharmony_ci		mpd->io_submit.io_end = NULL;
272262306a36Sopenharmony_ci
272362306a36Sopenharmony_ci		if (ret == -ENOSPC && sbi->s_journal) {
272462306a36Sopenharmony_ci			/*
272562306a36Sopenharmony_ci			 * Commit the transaction which would
272662306a36Sopenharmony_ci			 * free blocks released in the transaction
272762306a36Sopenharmony_ci			 * and try again
272862306a36Sopenharmony_ci			 */
272962306a36Sopenharmony_ci			jbd2_journal_force_commit_nested(sbi->s_journal);
273062306a36Sopenharmony_ci			ret = 0;
273162306a36Sopenharmony_ci			continue;
273262306a36Sopenharmony_ci		}
273362306a36Sopenharmony_ci		/* Fatal error - ENOMEM, EIO... */
273462306a36Sopenharmony_ci		if (ret)
273562306a36Sopenharmony_ci			break;
273662306a36Sopenharmony_ci	}
273762306a36Sopenharmony_ciunplug:
273862306a36Sopenharmony_ci	blk_finish_plug(&plug);
273962306a36Sopenharmony_ci	if (!ret && !cycled && wbc->nr_to_write > 0) {
274062306a36Sopenharmony_ci		cycled = 1;
274162306a36Sopenharmony_ci		mpd->last_page = writeback_index - 1;
274262306a36Sopenharmony_ci		mpd->first_page = 0;
274362306a36Sopenharmony_ci		goto retry;
274462306a36Sopenharmony_ci	}
274562306a36Sopenharmony_ci
274662306a36Sopenharmony_ci	/* Update index */
274762306a36Sopenharmony_ci	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
274862306a36Sopenharmony_ci		/*
274962306a36Sopenharmony_ci		 * Set the writeback_index so that range_cyclic
275062306a36Sopenharmony_ci		 * mode will write it back later
275162306a36Sopenharmony_ci		 */
275262306a36Sopenharmony_ci		mapping->writeback_index = mpd->first_page;
275362306a36Sopenharmony_ci
275462306a36Sopenharmony_ciout_writepages:
275562306a36Sopenharmony_ci	trace_ext4_writepages_result(inode, wbc, ret,
275662306a36Sopenharmony_ci				     nr_to_write - wbc->nr_to_write);
275762306a36Sopenharmony_ci	return ret;
275862306a36Sopenharmony_ci}
275962306a36Sopenharmony_ci
276062306a36Sopenharmony_cistatic int ext4_writepages(struct address_space *mapping,
276162306a36Sopenharmony_ci			   struct writeback_control *wbc)
276262306a36Sopenharmony_ci{
276362306a36Sopenharmony_ci	struct super_block *sb = mapping->host->i_sb;
276462306a36Sopenharmony_ci	struct mpage_da_data mpd = {
276562306a36Sopenharmony_ci		.inode = mapping->host,
276662306a36Sopenharmony_ci		.wbc = wbc,
276762306a36Sopenharmony_ci		.can_map = 1,
276862306a36Sopenharmony_ci	};
276962306a36Sopenharmony_ci	int ret;
277062306a36Sopenharmony_ci	int alloc_ctx;
277162306a36Sopenharmony_ci
277262306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(sb)))
277362306a36Sopenharmony_ci		return -EIO;
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci	alloc_ctx = ext4_writepages_down_read(sb);
277662306a36Sopenharmony_ci	ret = ext4_do_writepages(&mpd);
277762306a36Sopenharmony_ci	/*
277862306a36Sopenharmony_ci	 * For data=journal writeback we could have come across pages marked
277962306a36Sopenharmony_ci	 * for delayed dirtying (PageChecked) which were just added to the
278062306a36Sopenharmony_ci	 * running transaction. Try once more to get them to stable storage.
278162306a36Sopenharmony_ci	 */
278262306a36Sopenharmony_ci	if (!ret && mpd.journalled_more_data)
278362306a36Sopenharmony_ci		ret = ext4_do_writepages(&mpd);
278462306a36Sopenharmony_ci	ext4_writepages_up_read(sb, alloc_ctx);
278562306a36Sopenharmony_ci
278662306a36Sopenharmony_ci	return ret;
278762306a36Sopenharmony_ci}
278862306a36Sopenharmony_ci
278962306a36Sopenharmony_ciint ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode)
279062306a36Sopenharmony_ci{
279162306a36Sopenharmony_ci	struct writeback_control wbc = {
279262306a36Sopenharmony_ci		.sync_mode = WB_SYNC_ALL,
279362306a36Sopenharmony_ci		.nr_to_write = LONG_MAX,
279462306a36Sopenharmony_ci		.range_start = jinode->i_dirty_start,
279562306a36Sopenharmony_ci		.range_end = jinode->i_dirty_end,
279662306a36Sopenharmony_ci	};
279762306a36Sopenharmony_ci	struct mpage_da_data mpd = {
279862306a36Sopenharmony_ci		.inode = jinode->i_vfs_inode,
279962306a36Sopenharmony_ci		.wbc = &wbc,
280062306a36Sopenharmony_ci		.can_map = 0,
280162306a36Sopenharmony_ci	};
280262306a36Sopenharmony_ci	return ext4_do_writepages(&mpd);
280362306a36Sopenharmony_ci}
280462306a36Sopenharmony_ci
280562306a36Sopenharmony_cistatic int ext4_dax_writepages(struct address_space *mapping,
280662306a36Sopenharmony_ci			       struct writeback_control *wbc)
280762306a36Sopenharmony_ci{
280862306a36Sopenharmony_ci	int ret;
280962306a36Sopenharmony_ci	long nr_to_write = wbc->nr_to_write;
281062306a36Sopenharmony_ci	struct inode *inode = mapping->host;
281162306a36Sopenharmony_ci	int alloc_ctx;
281262306a36Sopenharmony_ci
281362306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
281462306a36Sopenharmony_ci		return -EIO;
281562306a36Sopenharmony_ci
281662306a36Sopenharmony_ci	alloc_ctx = ext4_writepages_down_read(inode->i_sb);
281762306a36Sopenharmony_ci	trace_ext4_writepages(inode, wbc);
281862306a36Sopenharmony_ci
281962306a36Sopenharmony_ci	ret = dax_writeback_mapping_range(mapping,
282062306a36Sopenharmony_ci					  EXT4_SB(inode->i_sb)->s_daxdev, wbc);
282162306a36Sopenharmony_ci	trace_ext4_writepages_result(inode, wbc, ret,
282262306a36Sopenharmony_ci				     nr_to_write - wbc->nr_to_write);
282362306a36Sopenharmony_ci	ext4_writepages_up_read(inode->i_sb, alloc_ctx);
282462306a36Sopenharmony_ci	return ret;
282562306a36Sopenharmony_ci}
282662306a36Sopenharmony_ci
282762306a36Sopenharmony_cistatic int ext4_nonda_switch(struct super_block *sb)
282862306a36Sopenharmony_ci{
282962306a36Sopenharmony_ci	s64 free_clusters, dirty_clusters;
283062306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(sb);
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ci	/*
283362306a36Sopenharmony_ci	 * switch to non delalloc mode if we are running low
283462306a36Sopenharmony_ci	 * on free block. The free block accounting via percpu
283562306a36Sopenharmony_ci	 * counters can get slightly wrong with percpu_counter_batch getting
283662306a36Sopenharmony_ci	 * accumulated on each CPU without updating global counters
283762306a36Sopenharmony_ci	 * Delalloc need an accurate free block accounting. So switch
283862306a36Sopenharmony_ci	 * to non delalloc when we are near to error range.
283962306a36Sopenharmony_ci	 */
284062306a36Sopenharmony_ci	free_clusters =
284162306a36Sopenharmony_ci		percpu_counter_read_positive(&sbi->s_freeclusters_counter);
284262306a36Sopenharmony_ci	dirty_clusters =
284362306a36Sopenharmony_ci		percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
284462306a36Sopenharmony_ci	/*
284562306a36Sopenharmony_ci	 * Start pushing delalloc when 1/2 of free blocks are dirty.
284662306a36Sopenharmony_ci	 */
284762306a36Sopenharmony_ci	if (dirty_clusters && (free_clusters < 2 * dirty_clusters))
284862306a36Sopenharmony_ci		try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
284962306a36Sopenharmony_ci
285062306a36Sopenharmony_ci	if (2 * free_clusters < 3 * dirty_clusters ||
285162306a36Sopenharmony_ci	    free_clusters < (dirty_clusters + EXT4_FREECLUSTERS_WATERMARK)) {
285262306a36Sopenharmony_ci		/*
285362306a36Sopenharmony_ci		 * free block count is less than 150% of dirty blocks
285462306a36Sopenharmony_ci		 * or free blocks is less than watermark
285562306a36Sopenharmony_ci		 */
285662306a36Sopenharmony_ci		return 1;
285762306a36Sopenharmony_ci	}
285862306a36Sopenharmony_ci	return 0;
285962306a36Sopenharmony_ci}
286062306a36Sopenharmony_ci
286162306a36Sopenharmony_cistatic int ext4_da_write_begin(struct file *file, struct address_space *mapping,
286262306a36Sopenharmony_ci			       loff_t pos, unsigned len,
286362306a36Sopenharmony_ci			       struct page **pagep, void **fsdata)
286462306a36Sopenharmony_ci{
286562306a36Sopenharmony_ci	int ret, retries = 0;
286662306a36Sopenharmony_ci	struct folio *folio;
286762306a36Sopenharmony_ci	pgoff_t index;
286862306a36Sopenharmony_ci	struct inode *inode = mapping->host;
286962306a36Sopenharmony_ci
287062306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
287162306a36Sopenharmony_ci		return -EIO;
287262306a36Sopenharmony_ci
287362306a36Sopenharmony_ci	index = pos >> PAGE_SHIFT;
287462306a36Sopenharmony_ci
287562306a36Sopenharmony_ci	if (ext4_nonda_switch(inode->i_sb) || ext4_verity_in_progress(inode)) {
287662306a36Sopenharmony_ci		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
287762306a36Sopenharmony_ci		return ext4_write_begin(file, mapping, pos,
287862306a36Sopenharmony_ci					len, pagep, fsdata);
287962306a36Sopenharmony_ci	}
288062306a36Sopenharmony_ci	*fsdata = (void *)0;
288162306a36Sopenharmony_ci	trace_ext4_da_write_begin(inode, pos, len);
288262306a36Sopenharmony_ci
288362306a36Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
288462306a36Sopenharmony_ci		ret = ext4_da_write_inline_data_begin(mapping, inode, pos, len,
288562306a36Sopenharmony_ci						      pagep, fsdata);
288662306a36Sopenharmony_ci		if (ret < 0)
288762306a36Sopenharmony_ci			return ret;
288862306a36Sopenharmony_ci		if (ret == 1)
288962306a36Sopenharmony_ci			return 0;
289062306a36Sopenharmony_ci	}
289162306a36Sopenharmony_ci
289262306a36Sopenharmony_ciretry:
289362306a36Sopenharmony_ci	folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
289462306a36Sopenharmony_ci			mapping_gfp_mask(mapping));
289562306a36Sopenharmony_ci	if (IS_ERR(folio))
289662306a36Sopenharmony_ci		return PTR_ERR(folio);
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci	/* In case writeback began while the folio was unlocked */
289962306a36Sopenharmony_ci	folio_wait_stable(folio);
290062306a36Sopenharmony_ci
290162306a36Sopenharmony_ci#ifdef CONFIG_FS_ENCRYPTION
290262306a36Sopenharmony_ci	ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep);
290362306a36Sopenharmony_ci#else
290462306a36Sopenharmony_ci	ret = __block_write_begin(&folio->page, pos, len, ext4_da_get_block_prep);
290562306a36Sopenharmony_ci#endif
290662306a36Sopenharmony_ci	if (ret < 0) {
290762306a36Sopenharmony_ci		folio_unlock(folio);
290862306a36Sopenharmony_ci		folio_put(folio);
290962306a36Sopenharmony_ci		/*
291062306a36Sopenharmony_ci		 * block_write_begin may have instantiated a few blocks
291162306a36Sopenharmony_ci		 * outside i_size.  Trim these off again. Don't need
291262306a36Sopenharmony_ci		 * i_size_read because we hold inode lock.
291362306a36Sopenharmony_ci		 */
291462306a36Sopenharmony_ci		if (pos + len > inode->i_size)
291562306a36Sopenharmony_ci			ext4_truncate_failed_write(inode);
291662306a36Sopenharmony_ci
291762306a36Sopenharmony_ci		if (ret == -ENOSPC &&
291862306a36Sopenharmony_ci		    ext4_should_retry_alloc(inode->i_sb, &retries))
291962306a36Sopenharmony_ci			goto retry;
292062306a36Sopenharmony_ci		return ret;
292162306a36Sopenharmony_ci	}
292262306a36Sopenharmony_ci
292362306a36Sopenharmony_ci	*pagep = &folio->page;
292462306a36Sopenharmony_ci	return ret;
292562306a36Sopenharmony_ci}
292662306a36Sopenharmony_ci
292762306a36Sopenharmony_ci/*
292862306a36Sopenharmony_ci * Check if we should update i_disksize
292962306a36Sopenharmony_ci * when write to the end of file but not require block allocation
293062306a36Sopenharmony_ci */
293162306a36Sopenharmony_cistatic int ext4_da_should_update_i_disksize(struct folio *folio,
293262306a36Sopenharmony_ci					    unsigned long offset)
293362306a36Sopenharmony_ci{
293462306a36Sopenharmony_ci	struct buffer_head *bh;
293562306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
293662306a36Sopenharmony_ci	unsigned int idx;
293762306a36Sopenharmony_ci	int i;
293862306a36Sopenharmony_ci
293962306a36Sopenharmony_ci	bh = folio_buffers(folio);
294062306a36Sopenharmony_ci	idx = offset >> inode->i_blkbits;
294162306a36Sopenharmony_ci
294262306a36Sopenharmony_ci	for (i = 0; i < idx; i++)
294362306a36Sopenharmony_ci		bh = bh->b_this_page;
294462306a36Sopenharmony_ci
294562306a36Sopenharmony_ci	if (!buffer_mapped(bh) || (buffer_delay(bh)) || buffer_unwritten(bh))
294662306a36Sopenharmony_ci		return 0;
294762306a36Sopenharmony_ci	return 1;
294862306a36Sopenharmony_ci}
294962306a36Sopenharmony_ci
295062306a36Sopenharmony_cistatic int ext4_da_do_write_end(struct address_space *mapping,
295162306a36Sopenharmony_ci			loff_t pos, unsigned len, unsigned copied,
295262306a36Sopenharmony_ci			struct page *page)
295362306a36Sopenharmony_ci{
295462306a36Sopenharmony_ci	struct inode *inode = mapping->host;
295562306a36Sopenharmony_ci	loff_t old_size = inode->i_size;
295662306a36Sopenharmony_ci	bool disksize_changed = false;
295762306a36Sopenharmony_ci	loff_t new_i_size;
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci	/*
296062306a36Sopenharmony_ci	 * block_write_end() will mark the inode as dirty with I_DIRTY_PAGES
296162306a36Sopenharmony_ci	 * flag, which all that's needed to trigger page writeback.
296262306a36Sopenharmony_ci	 */
296362306a36Sopenharmony_ci	copied = block_write_end(NULL, mapping, pos, len, copied, page, NULL);
296462306a36Sopenharmony_ci	new_i_size = pos + copied;
296562306a36Sopenharmony_ci
296662306a36Sopenharmony_ci	/*
296762306a36Sopenharmony_ci	 * It's important to update i_size while still holding page lock,
296862306a36Sopenharmony_ci	 * because page writeout could otherwise come in and zero beyond
296962306a36Sopenharmony_ci	 * i_size.
297062306a36Sopenharmony_ci	 *
297162306a36Sopenharmony_ci	 * Since we are holding inode lock, we are sure i_disksize <=
297262306a36Sopenharmony_ci	 * i_size. We also know that if i_disksize < i_size, there are
297362306a36Sopenharmony_ci	 * delalloc writes pending in the range up to i_size. If the end of
297462306a36Sopenharmony_ci	 * the current write is <= i_size, there's no need to touch
297562306a36Sopenharmony_ci	 * i_disksize since writeback will push i_disksize up to i_size
297662306a36Sopenharmony_ci	 * eventually. If the end of the current write is > i_size and
297762306a36Sopenharmony_ci	 * inside an allocated block which ext4_da_should_update_i_disksize()
297862306a36Sopenharmony_ci	 * checked, we need to update i_disksize here as certain
297962306a36Sopenharmony_ci	 * ext4_writepages() paths not allocating blocks and update i_disksize.
298062306a36Sopenharmony_ci	 */
298162306a36Sopenharmony_ci	if (new_i_size > inode->i_size) {
298262306a36Sopenharmony_ci		unsigned long end;
298362306a36Sopenharmony_ci
298462306a36Sopenharmony_ci		i_size_write(inode, new_i_size);
298562306a36Sopenharmony_ci		end = (new_i_size - 1) & (PAGE_SIZE - 1);
298662306a36Sopenharmony_ci		if (copied && ext4_da_should_update_i_disksize(page_folio(page), end)) {
298762306a36Sopenharmony_ci			ext4_update_i_disksize(inode, new_i_size);
298862306a36Sopenharmony_ci			disksize_changed = true;
298962306a36Sopenharmony_ci		}
299062306a36Sopenharmony_ci	}
299162306a36Sopenharmony_ci
299262306a36Sopenharmony_ci	unlock_page(page);
299362306a36Sopenharmony_ci	put_page(page);
299462306a36Sopenharmony_ci
299562306a36Sopenharmony_ci	if (old_size < pos)
299662306a36Sopenharmony_ci		pagecache_isize_extended(inode, old_size, pos);
299762306a36Sopenharmony_ci
299862306a36Sopenharmony_ci	if (disksize_changed) {
299962306a36Sopenharmony_ci		handle_t *handle;
300062306a36Sopenharmony_ci
300162306a36Sopenharmony_ci		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
300262306a36Sopenharmony_ci		if (IS_ERR(handle))
300362306a36Sopenharmony_ci			return PTR_ERR(handle);
300462306a36Sopenharmony_ci		ext4_mark_inode_dirty(handle, inode);
300562306a36Sopenharmony_ci		ext4_journal_stop(handle);
300662306a36Sopenharmony_ci	}
300762306a36Sopenharmony_ci
300862306a36Sopenharmony_ci	return copied;
300962306a36Sopenharmony_ci}
301062306a36Sopenharmony_ci
301162306a36Sopenharmony_cistatic int ext4_da_write_end(struct file *file,
301262306a36Sopenharmony_ci			     struct address_space *mapping,
301362306a36Sopenharmony_ci			     loff_t pos, unsigned len, unsigned copied,
301462306a36Sopenharmony_ci			     struct page *page, void *fsdata)
301562306a36Sopenharmony_ci{
301662306a36Sopenharmony_ci	struct inode *inode = mapping->host;
301762306a36Sopenharmony_ci	int write_mode = (int)(unsigned long)fsdata;
301862306a36Sopenharmony_ci	struct folio *folio = page_folio(page);
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci	if (write_mode == FALL_BACK_TO_NONDELALLOC)
302162306a36Sopenharmony_ci		return ext4_write_end(file, mapping, pos,
302262306a36Sopenharmony_ci				      len, copied, &folio->page, fsdata);
302362306a36Sopenharmony_ci
302462306a36Sopenharmony_ci	trace_ext4_da_write_end(inode, pos, len, copied);
302562306a36Sopenharmony_ci
302662306a36Sopenharmony_ci	if (write_mode != CONVERT_INLINE_DATA &&
302762306a36Sopenharmony_ci	    ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) &&
302862306a36Sopenharmony_ci	    ext4_has_inline_data(inode))
302962306a36Sopenharmony_ci		return ext4_write_inline_data_end(inode, pos, len, copied,
303062306a36Sopenharmony_ci						  folio);
303162306a36Sopenharmony_ci
303262306a36Sopenharmony_ci	if (unlikely(copied < len) && !PageUptodate(page))
303362306a36Sopenharmony_ci		copied = 0;
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci	return ext4_da_do_write_end(mapping, pos, len, copied, &folio->page);
303662306a36Sopenharmony_ci}
303762306a36Sopenharmony_ci
303862306a36Sopenharmony_ci/*
303962306a36Sopenharmony_ci * Force all delayed allocation blocks to be allocated for a given inode.
304062306a36Sopenharmony_ci */
304162306a36Sopenharmony_ciint ext4_alloc_da_blocks(struct inode *inode)
304262306a36Sopenharmony_ci{
304362306a36Sopenharmony_ci	trace_ext4_alloc_da_blocks(inode);
304462306a36Sopenharmony_ci
304562306a36Sopenharmony_ci	if (!EXT4_I(inode)->i_reserved_data_blocks)
304662306a36Sopenharmony_ci		return 0;
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_ci	/*
304962306a36Sopenharmony_ci	 * We do something simple for now.  The filemap_flush() will
305062306a36Sopenharmony_ci	 * also start triggering a write of the data blocks, which is
305162306a36Sopenharmony_ci	 * not strictly speaking necessary (and for users of
305262306a36Sopenharmony_ci	 * laptop_mode, not even desirable).  However, to do otherwise
305362306a36Sopenharmony_ci	 * would require replicating code paths in:
305462306a36Sopenharmony_ci	 *
305562306a36Sopenharmony_ci	 * ext4_writepages() ->
305662306a36Sopenharmony_ci	 *    write_cache_pages() ---> (via passed in callback function)
305762306a36Sopenharmony_ci	 *        __mpage_da_writepage() -->
305862306a36Sopenharmony_ci	 *           mpage_add_bh_to_extent()
305962306a36Sopenharmony_ci	 *           mpage_da_map_blocks()
306062306a36Sopenharmony_ci	 *
306162306a36Sopenharmony_ci	 * The problem is that write_cache_pages(), located in
306262306a36Sopenharmony_ci	 * mm/page-writeback.c, marks pages clean in preparation for
306362306a36Sopenharmony_ci	 * doing I/O, which is not desirable if we're not planning on
306462306a36Sopenharmony_ci	 * doing I/O at all.
306562306a36Sopenharmony_ci	 *
306662306a36Sopenharmony_ci	 * We could call write_cache_pages(), and then redirty all of
306762306a36Sopenharmony_ci	 * the pages by calling redirty_page_for_writepage() but that
306862306a36Sopenharmony_ci	 * would be ugly in the extreme.  So instead we would need to
306962306a36Sopenharmony_ci	 * replicate parts of the code in the above functions,
307062306a36Sopenharmony_ci	 * simplifying them because we wouldn't actually intend to
307162306a36Sopenharmony_ci	 * write out the pages, but rather only collect contiguous
307262306a36Sopenharmony_ci	 * logical block extents, call the multi-block allocator, and
307362306a36Sopenharmony_ci	 * then update the buffer heads with the block allocations.
307462306a36Sopenharmony_ci	 *
307562306a36Sopenharmony_ci	 * For now, though, we'll cheat by calling filemap_flush(),
307662306a36Sopenharmony_ci	 * which will map the blocks, and start the I/O, but not
307762306a36Sopenharmony_ci	 * actually wait for the I/O to complete.
307862306a36Sopenharmony_ci	 */
307962306a36Sopenharmony_ci	return filemap_flush(inode->i_mapping);
308062306a36Sopenharmony_ci}
308162306a36Sopenharmony_ci
308262306a36Sopenharmony_ci/*
308362306a36Sopenharmony_ci * bmap() is special.  It gets used by applications such as lilo and by
308462306a36Sopenharmony_ci * the swapper to find the on-disk block of a specific piece of data.
308562306a36Sopenharmony_ci *
308662306a36Sopenharmony_ci * Naturally, this is dangerous if the block concerned is still in the
308762306a36Sopenharmony_ci * journal.  If somebody makes a swapfile on an ext4 data-journaling
308862306a36Sopenharmony_ci * filesystem and enables swap, then they may get a nasty shock when the
308962306a36Sopenharmony_ci * data getting swapped to that swapfile suddenly gets overwritten by
309062306a36Sopenharmony_ci * the original zero's written out previously to the journal and
309162306a36Sopenharmony_ci * awaiting writeback in the kernel's buffer cache.
309262306a36Sopenharmony_ci *
309362306a36Sopenharmony_ci * So, if we see any bmap calls here on a modified, data-journaled file,
309462306a36Sopenharmony_ci * take extra steps to flush any blocks which might be in the cache.
309562306a36Sopenharmony_ci */
309662306a36Sopenharmony_cistatic sector_t ext4_bmap(struct address_space *mapping, sector_t block)
309762306a36Sopenharmony_ci{
309862306a36Sopenharmony_ci	struct inode *inode = mapping->host;
309962306a36Sopenharmony_ci	sector_t ret = 0;
310062306a36Sopenharmony_ci
310162306a36Sopenharmony_ci	inode_lock_shared(inode);
310262306a36Sopenharmony_ci	/*
310362306a36Sopenharmony_ci	 * We can get here for an inline file via the FIBMAP ioctl
310462306a36Sopenharmony_ci	 */
310562306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
310662306a36Sopenharmony_ci		goto out;
310762306a36Sopenharmony_ci
310862306a36Sopenharmony_ci	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
310962306a36Sopenharmony_ci	    (test_opt(inode->i_sb, DELALLOC) ||
311062306a36Sopenharmony_ci	     ext4_should_journal_data(inode))) {
311162306a36Sopenharmony_ci		/*
311262306a36Sopenharmony_ci		 * With delalloc or journalled data we want to sync the file so
311362306a36Sopenharmony_ci		 * that we can make sure we allocate blocks for file and data
311462306a36Sopenharmony_ci		 * is in place for the user to see it
311562306a36Sopenharmony_ci		 */
311662306a36Sopenharmony_ci		filemap_write_and_wait(mapping);
311762306a36Sopenharmony_ci	}
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci	ret = iomap_bmap(mapping, block, &ext4_iomap_ops);
312062306a36Sopenharmony_ci
312162306a36Sopenharmony_ciout:
312262306a36Sopenharmony_ci	inode_unlock_shared(inode);
312362306a36Sopenharmony_ci	return ret;
312462306a36Sopenharmony_ci}
312562306a36Sopenharmony_ci
312662306a36Sopenharmony_cistatic int ext4_read_folio(struct file *file, struct folio *folio)
312762306a36Sopenharmony_ci{
312862306a36Sopenharmony_ci	int ret = -EAGAIN;
312962306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
313062306a36Sopenharmony_ci
313162306a36Sopenharmony_ci	trace_ext4_read_folio(inode, folio);
313262306a36Sopenharmony_ci
313362306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
313462306a36Sopenharmony_ci		ret = ext4_readpage_inline(inode, folio);
313562306a36Sopenharmony_ci
313662306a36Sopenharmony_ci	if (ret == -EAGAIN)
313762306a36Sopenharmony_ci		return ext4_mpage_readpages(inode, NULL, folio);
313862306a36Sopenharmony_ci
313962306a36Sopenharmony_ci	return ret;
314062306a36Sopenharmony_ci}
314162306a36Sopenharmony_ci
314262306a36Sopenharmony_cistatic void ext4_readahead(struct readahead_control *rac)
314362306a36Sopenharmony_ci{
314462306a36Sopenharmony_ci	struct inode *inode = rac->mapping->host;
314562306a36Sopenharmony_ci
314662306a36Sopenharmony_ci	/* If the file has inline data, no need to do readahead. */
314762306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
314862306a36Sopenharmony_ci		return;
314962306a36Sopenharmony_ci
315062306a36Sopenharmony_ci	ext4_mpage_readpages(inode, rac, NULL);
315162306a36Sopenharmony_ci}
315262306a36Sopenharmony_ci
315362306a36Sopenharmony_cistatic void ext4_invalidate_folio(struct folio *folio, size_t offset,
315462306a36Sopenharmony_ci				size_t length)
315562306a36Sopenharmony_ci{
315662306a36Sopenharmony_ci	trace_ext4_invalidate_folio(folio, offset, length);
315762306a36Sopenharmony_ci
315862306a36Sopenharmony_ci	/* No journalling happens on data buffers when this function is used */
315962306a36Sopenharmony_ci	WARN_ON(folio_buffers(folio) && buffer_jbd(folio_buffers(folio)));
316062306a36Sopenharmony_ci
316162306a36Sopenharmony_ci	block_invalidate_folio(folio, offset, length);
316262306a36Sopenharmony_ci}
316362306a36Sopenharmony_ci
316462306a36Sopenharmony_cistatic int __ext4_journalled_invalidate_folio(struct folio *folio,
316562306a36Sopenharmony_ci					    size_t offset, size_t length)
316662306a36Sopenharmony_ci{
316762306a36Sopenharmony_ci	journal_t *journal = EXT4_JOURNAL(folio->mapping->host);
316862306a36Sopenharmony_ci
316962306a36Sopenharmony_ci	trace_ext4_journalled_invalidate_folio(folio, offset, length);
317062306a36Sopenharmony_ci
317162306a36Sopenharmony_ci	/*
317262306a36Sopenharmony_ci	 * If it's a full truncate we just forget about the pending dirtying
317362306a36Sopenharmony_ci	 */
317462306a36Sopenharmony_ci	if (offset == 0 && length == folio_size(folio))
317562306a36Sopenharmony_ci		folio_clear_checked(folio);
317662306a36Sopenharmony_ci
317762306a36Sopenharmony_ci	return jbd2_journal_invalidate_folio(journal, folio, offset, length);
317862306a36Sopenharmony_ci}
317962306a36Sopenharmony_ci
318062306a36Sopenharmony_ci/* Wrapper for aops... */
318162306a36Sopenharmony_cistatic void ext4_journalled_invalidate_folio(struct folio *folio,
318262306a36Sopenharmony_ci					   size_t offset,
318362306a36Sopenharmony_ci					   size_t length)
318462306a36Sopenharmony_ci{
318562306a36Sopenharmony_ci	WARN_ON(__ext4_journalled_invalidate_folio(folio, offset, length) < 0);
318662306a36Sopenharmony_ci}
318762306a36Sopenharmony_ci
318862306a36Sopenharmony_cistatic bool ext4_release_folio(struct folio *folio, gfp_t wait)
318962306a36Sopenharmony_ci{
319062306a36Sopenharmony_ci	struct inode *inode = folio->mapping->host;
319162306a36Sopenharmony_ci	journal_t *journal = EXT4_JOURNAL(inode);
319262306a36Sopenharmony_ci
319362306a36Sopenharmony_ci	trace_ext4_release_folio(inode, folio);
319462306a36Sopenharmony_ci
319562306a36Sopenharmony_ci	/* Page has dirty journalled data -> cannot release */
319662306a36Sopenharmony_ci	if (folio_test_checked(folio))
319762306a36Sopenharmony_ci		return false;
319862306a36Sopenharmony_ci	if (journal)
319962306a36Sopenharmony_ci		return jbd2_journal_try_to_free_buffers(journal, folio);
320062306a36Sopenharmony_ci	else
320162306a36Sopenharmony_ci		return try_to_free_buffers(folio);
320262306a36Sopenharmony_ci}
320362306a36Sopenharmony_ci
320462306a36Sopenharmony_cistatic bool ext4_inode_datasync_dirty(struct inode *inode)
320562306a36Sopenharmony_ci{
320662306a36Sopenharmony_ci	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
320762306a36Sopenharmony_ci
320862306a36Sopenharmony_ci	if (journal) {
320962306a36Sopenharmony_ci		if (jbd2_transaction_committed(journal,
321062306a36Sopenharmony_ci			EXT4_I(inode)->i_datasync_tid))
321162306a36Sopenharmony_ci			return false;
321262306a36Sopenharmony_ci		if (test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
321362306a36Sopenharmony_ci			return !list_empty(&EXT4_I(inode)->i_fc_list);
321462306a36Sopenharmony_ci		return true;
321562306a36Sopenharmony_ci	}
321662306a36Sopenharmony_ci
321762306a36Sopenharmony_ci	/* Any metadata buffers to write? */
321862306a36Sopenharmony_ci	if (!list_empty(&inode->i_mapping->private_list))
321962306a36Sopenharmony_ci		return true;
322062306a36Sopenharmony_ci	return inode->i_state & I_DIRTY_DATASYNC;
322162306a36Sopenharmony_ci}
322262306a36Sopenharmony_ci
322362306a36Sopenharmony_cistatic void ext4_set_iomap(struct inode *inode, struct iomap *iomap,
322462306a36Sopenharmony_ci			   struct ext4_map_blocks *map, loff_t offset,
322562306a36Sopenharmony_ci			   loff_t length, unsigned int flags)
322662306a36Sopenharmony_ci{
322762306a36Sopenharmony_ci	u8 blkbits = inode->i_blkbits;
322862306a36Sopenharmony_ci
322962306a36Sopenharmony_ci	/*
323062306a36Sopenharmony_ci	 * Writes that span EOF might trigger an I/O size update on completion,
323162306a36Sopenharmony_ci	 * so consider them to be dirty for the purpose of O_DSYNC, even if
323262306a36Sopenharmony_ci	 * there is no other metadata changes being made or are pending.
323362306a36Sopenharmony_ci	 */
323462306a36Sopenharmony_ci	iomap->flags = 0;
323562306a36Sopenharmony_ci	if (ext4_inode_datasync_dirty(inode) ||
323662306a36Sopenharmony_ci	    offset + length > i_size_read(inode))
323762306a36Sopenharmony_ci		iomap->flags |= IOMAP_F_DIRTY;
323862306a36Sopenharmony_ci
323962306a36Sopenharmony_ci	if (map->m_flags & EXT4_MAP_NEW)
324062306a36Sopenharmony_ci		iomap->flags |= IOMAP_F_NEW;
324162306a36Sopenharmony_ci
324262306a36Sopenharmony_ci	if (flags & IOMAP_DAX)
324362306a36Sopenharmony_ci		iomap->dax_dev = EXT4_SB(inode->i_sb)->s_daxdev;
324462306a36Sopenharmony_ci	else
324562306a36Sopenharmony_ci		iomap->bdev = inode->i_sb->s_bdev;
324662306a36Sopenharmony_ci	iomap->offset = (u64) map->m_lblk << blkbits;
324762306a36Sopenharmony_ci	iomap->length = (u64) map->m_len << blkbits;
324862306a36Sopenharmony_ci
324962306a36Sopenharmony_ci	if ((map->m_flags & EXT4_MAP_MAPPED) &&
325062306a36Sopenharmony_ci	    !ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
325162306a36Sopenharmony_ci		iomap->flags |= IOMAP_F_MERGED;
325262306a36Sopenharmony_ci
325362306a36Sopenharmony_ci	/*
325462306a36Sopenharmony_ci	 * Flags passed to ext4_map_blocks() for direct I/O writes can result
325562306a36Sopenharmony_ci	 * in m_flags having both EXT4_MAP_MAPPED and EXT4_MAP_UNWRITTEN bits
325662306a36Sopenharmony_ci	 * set. In order for any allocated unwritten extents to be converted
325762306a36Sopenharmony_ci	 * into written extents correctly within the ->end_io() handler, we
325862306a36Sopenharmony_ci	 * need to ensure that the iomap->type is set appropriately. Hence, the
325962306a36Sopenharmony_ci	 * reason why we need to check whether the EXT4_MAP_UNWRITTEN bit has
326062306a36Sopenharmony_ci	 * been set first.
326162306a36Sopenharmony_ci	 */
326262306a36Sopenharmony_ci	if (map->m_flags & EXT4_MAP_UNWRITTEN) {
326362306a36Sopenharmony_ci		iomap->type = IOMAP_UNWRITTEN;
326462306a36Sopenharmony_ci		iomap->addr = (u64) map->m_pblk << blkbits;
326562306a36Sopenharmony_ci		if (flags & IOMAP_DAX)
326662306a36Sopenharmony_ci			iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
326762306a36Sopenharmony_ci	} else if (map->m_flags & EXT4_MAP_MAPPED) {
326862306a36Sopenharmony_ci		iomap->type = IOMAP_MAPPED;
326962306a36Sopenharmony_ci		iomap->addr = (u64) map->m_pblk << blkbits;
327062306a36Sopenharmony_ci		if (flags & IOMAP_DAX)
327162306a36Sopenharmony_ci			iomap->addr += EXT4_SB(inode->i_sb)->s_dax_part_off;
327262306a36Sopenharmony_ci	} else {
327362306a36Sopenharmony_ci		iomap->type = IOMAP_HOLE;
327462306a36Sopenharmony_ci		iomap->addr = IOMAP_NULL_ADDR;
327562306a36Sopenharmony_ci	}
327662306a36Sopenharmony_ci}
327762306a36Sopenharmony_ci
327862306a36Sopenharmony_cistatic int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map,
327962306a36Sopenharmony_ci			    unsigned int flags)
328062306a36Sopenharmony_ci{
328162306a36Sopenharmony_ci	handle_t *handle;
328262306a36Sopenharmony_ci	u8 blkbits = inode->i_blkbits;
328362306a36Sopenharmony_ci	int ret, dio_credits, m_flags = 0, retries = 0;
328462306a36Sopenharmony_ci
328562306a36Sopenharmony_ci	/*
328662306a36Sopenharmony_ci	 * Trim the mapping request to the maximum value that we can map at
328762306a36Sopenharmony_ci	 * once for direct I/O.
328862306a36Sopenharmony_ci	 */
328962306a36Sopenharmony_ci	if (map->m_len > DIO_MAX_BLOCKS)
329062306a36Sopenharmony_ci		map->m_len = DIO_MAX_BLOCKS;
329162306a36Sopenharmony_ci	dio_credits = ext4_chunk_trans_blocks(inode, map->m_len);
329262306a36Sopenharmony_ci
329362306a36Sopenharmony_ciretry:
329462306a36Sopenharmony_ci	/*
329562306a36Sopenharmony_ci	 * Either we allocate blocks and then don't get an unwritten extent, so
329662306a36Sopenharmony_ci	 * in that case we have reserved enough credits. Or, the blocks are
329762306a36Sopenharmony_ci	 * already allocated and unwritten. In that case, the extent conversion
329862306a36Sopenharmony_ci	 * fits into the credits as well.
329962306a36Sopenharmony_ci	 */
330062306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
330162306a36Sopenharmony_ci	if (IS_ERR(handle))
330262306a36Sopenharmony_ci		return PTR_ERR(handle);
330362306a36Sopenharmony_ci
330462306a36Sopenharmony_ci	/*
330562306a36Sopenharmony_ci	 * DAX and direct I/O are the only two operations that are currently
330662306a36Sopenharmony_ci	 * supported with IOMAP_WRITE.
330762306a36Sopenharmony_ci	 */
330862306a36Sopenharmony_ci	WARN_ON(!(flags & (IOMAP_DAX | IOMAP_DIRECT)));
330962306a36Sopenharmony_ci	if (flags & IOMAP_DAX)
331062306a36Sopenharmony_ci		m_flags = EXT4_GET_BLOCKS_CREATE_ZERO;
331162306a36Sopenharmony_ci	/*
331262306a36Sopenharmony_ci	 * We use i_size instead of i_disksize here because delalloc writeback
331362306a36Sopenharmony_ci	 * can complete at any point during the I/O and subsequently push the
331462306a36Sopenharmony_ci	 * i_disksize out to i_size. This could be beyond where direct I/O is
331562306a36Sopenharmony_ci	 * happening and thus expose allocated blocks to direct I/O reads.
331662306a36Sopenharmony_ci	 */
331762306a36Sopenharmony_ci	else if (((loff_t)map->m_lblk << blkbits) >= i_size_read(inode))
331862306a36Sopenharmony_ci		m_flags = EXT4_GET_BLOCKS_CREATE;
331962306a36Sopenharmony_ci	else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
332062306a36Sopenharmony_ci		m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT;
332162306a36Sopenharmony_ci
332262306a36Sopenharmony_ci	ret = ext4_map_blocks(handle, inode, map, m_flags);
332362306a36Sopenharmony_ci
332462306a36Sopenharmony_ci	/*
332562306a36Sopenharmony_ci	 * We cannot fill holes in indirect tree based inodes as that could
332662306a36Sopenharmony_ci	 * expose stale data in the case of a crash. Use the magic error code
332762306a36Sopenharmony_ci	 * to fallback to buffered I/O.
332862306a36Sopenharmony_ci	 */
332962306a36Sopenharmony_ci	if (!m_flags && !ret)
333062306a36Sopenharmony_ci		ret = -ENOTBLK;
333162306a36Sopenharmony_ci
333262306a36Sopenharmony_ci	ext4_journal_stop(handle);
333362306a36Sopenharmony_ci	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
333462306a36Sopenharmony_ci		goto retry;
333562306a36Sopenharmony_ci
333662306a36Sopenharmony_ci	return ret;
333762306a36Sopenharmony_ci}
333862306a36Sopenharmony_ci
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_cistatic int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
334162306a36Sopenharmony_ci		unsigned flags, struct iomap *iomap, struct iomap *srcmap)
334262306a36Sopenharmony_ci{
334362306a36Sopenharmony_ci	int ret;
334462306a36Sopenharmony_ci	struct ext4_map_blocks map;
334562306a36Sopenharmony_ci	u8 blkbits = inode->i_blkbits;
334662306a36Sopenharmony_ci
334762306a36Sopenharmony_ci	if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
334862306a36Sopenharmony_ci		return -EINVAL;
334962306a36Sopenharmony_ci
335062306a36Sopenharmony_ci	if (WARN_ON_ONCE(ext4_has_inline_data(inode)))
335162306a36Sopenharmony_ci		return -ERANGE;
335262306a36Sopenharmony_ci
335362306a36Sopenharmony_ci	/*
335462306a36Sopenharmony_ci	 * Calculate the first and last logical blocks respectively.
335562306a36Sopenharmony_ci	 */
335662306a36Sopenharmony_ci	map.m_lblk = offset >> blkbits;
335762306a36Sopenharmony_ci	map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
335862306a36Sopenharmony_ci			  EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
335962306a36Sopenharmony_ci
336062306a36Sopenharmony_ci	if (flags & IOMAP_WRITE) {
336162306a36Sopenharmony_ci		/*
336262306a36Sopenharmony_ci		 * We check here if the blocks are already allocated, then we
336362306a36Sopenharmony_ci		 * don't need to start a journal txn and we can directly return
336462306a36Sopenharmony_ci		 * the mapping information. This could boost performance
336562306a36Sopenharmony_ci		 * especially in multi-threaded overwrite requests.
336662306a36Sopenharmony_ci		 */
336762306a36Sopenharmony_ci		if (offset + length <= i_size_read(inode)) {
336862306a36Sopenharmony_ci			ret = ext4_map_blocks(NULL, inode, &map, 0);
336962306a36Sopenharmony_ci			if (ret > 0 && (map.m_flags & EXT4_MAP_MAPPED))
337062306a36Sopenharmony_ci				goto out;
337162306a36Sopenharmony_ci		}
337262306a36Sopenharmony_ci		ret = ext4_iomap_alloc(inode, &map, flags);
337362306a36Sopenharmony_ci	} else {
337462306a36Sopenharmony_ci		ret = ext4_map_blocks(NULL, inode, &map, 0);
337562306a36Sopenharmony_ci	}
337662306a36Sopenharmony_ci
337762306a36Sopenharmony_ci	if (ret < 0)
337862306a36Sopenharmony_ci		return ret;
337962306a36Sopenharmony_ciout:
338062306a36Sopenharmony_ci	/*
338162306a36Sopenharmony_ci	 * When inline encryption is enabled, sometimes I/O to an encrypted file
338262306a36Sopenharmony_ci	 * has to be broken up to guarantee DUN contiguity.  Handle this by
338362306a36Sopenharmony_ci	 * limiting the length of the mapping returned.
338462306a36Sopenharmony_ci	 */
338562306a36Sopenharmony_ci	map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
338662306a36Sopenharmony_ci
338762306a36Sopenharmony_ci	ext4_set_iomap(inode, iomap, &map, offset, length, flags);
338862306a36Sopenharmony_ci
338962306a36Sopenharmony_ci	return 0;
339062306a36Sopenharmony_ci}
339162306a36Sopenharmony_ci
339262306a36Sopenharmony_cistatic int ext4_iomap_overwrite_begin(struct inode *inode, loff_t offset,
339362306a36Sopenharmony_ci		loff_t length, unsigned flags, struct iomap *iomap,
339462306a36Sopenharmony_ci		struct iomap *srcmap)
339562306a36Sopenharmony_ci{
339662306a36Sopenharmony_ci	int ret;
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci	/*
339962306a36Sopenharmony_ci	 * Even for writes we don't need to allocate blocks, so just pretend
340062306a36Sopenharmony_ci	 * we are reading to save overhead of starting a transaction.
340162306a36Sopenharmony_ci	 */
340262306a36Sopenharmony_ci	flags &= ~IOMAP_WRITE;
340362306a36Sopenharmony_ci	ret = ext4_iomap_begin(inode, offset, length, flags, iomap, srcmap);
340462306a36Sopenharmony_ci	WARN_ON_ONCE(!ret && iomap->type != IOMAP_MAPPED);
340562306a36Sopenharmony_ci	return ret;
340662306a36Sopenharmony_ci}
340762306a36Sopenharmony_ci
340862306a36Sopenharmony_cistatic int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
340962306a36Sopenharmony_ci			  ssize_t written, unsigned flags, struct iomap *iomap)
341062306a36Sopenharmony_ci{
341162306a36Sopenharmony_ci	/*
341262306a36Sopenharmony_ci	 * Check to see whether an error occurred while writing out the data to
341362306a36Sopenharmony_ci	 * the allocated blocks. If so, return the magic error code so that we
341462306a36Sopenharmony_ci	 * fallback to buffered I/O and attempt to complete the remainder of
341562306a36Sopenharmony_ci	 * the I/O. Any blocks that may have been allocated in preparation for
341662306a36Sopenharmony_ci	 * the direct I/O will be reused during buffered I/O.
341762306a36Sopenharmony_ci	 */
341862306a36Sopenharmony_ci	if (flags & (IOMAP_WRITE | IOMAP_DIRECT) && written == 0)
341962306a36Sopenharmony_ci		return -ENOTBLK;
342062306a36Sopenharmony_ci
342162306a36Sopenharmony_ci	return 0;
342262306a36Sopenharmony_ci}
342362306a36Sopenharmony_ci
342462306a36Sopenharmony_ciconst struct iomap_ops ext4_iomap_ops = {
342562306a36Sopenharmony_ci	.iomap_begin		= ext4_iomap_begin,
342662306a36Sopenharmony_ci	.iomap_end		= ext4_iomap_end,
342762306a36Sopenharmony_ci};
342862306a36Sopenharmony_ci
342962306a36Sopenharmony_ciconst struct iomap_ops ext4_iomap_overwrite_ops = {
343062306a36Sopenharmony_ci	.iomap_begin		= ext4_iomap_overwrite_begin,
343162306a36Sopenharmony_ci	.iomap_end		= ext4_iomap_end,
343262306a36Sopenharmony_ci};
343362306a36Sopenharmony_ci
343462306a36Sopenharmony_cistatic bool ext4_iomap_is_delalloc(struct inode *inode,
343562306a36Sopenharmony_ci				   struct ext4_map_blocks *map)
343662306a36Sopenharmony_ci{
343762306a36Sopenharmony_ci	struct extent_status es;
343862306a36Sopenharmony_ci	ext4_lblk_t offset = 0, end = map->m_lblk + map->m_len - 1;
343962306a36Sopenharmony_ci
344062306a36Sopenharmony_ci	ext4_es_find_extent_range(inode, &ext4_es_is_delayed,
344162306a36Sopenharmony_ci				  map->m_lblk, end, &es);
344262306a36Sopenharmony_ci
344362306a36Sopenharmony_ci	if (!es.es_len || es.es_lblk > end)
344462306a36Sopenharmony_ci		return false;
344562306a36Sopenharmony_ci
344662306a36Sopenharmony_ci	if (es.es_lblk > map->m_lblk) {
344762306a36Sopenharmony_ci		map->m_len = es.es_lblk - map->m_lblk;
344862306a36Sopenharmony_ci		return false;
344962306a36Sopenharmony_ci	}
345062306a36Sopenharmony_ci
345162306a36Sopenharmony_ci	offset = map->m_lblk - es.es_lblk;
345262306a36Sopenharmony_ci	map->m_len = es.es_len - offset;
345362306a36Sopenharmony_ci
345462306a36Sopenharmony_ci	return true;
345562306a36Sopenharmony_ci}
345662306a36Sopenharmony_ci
345762306a36Sopenharmony_cistatic int ext4_iomap_begin_report(struct inode *inode, loff_t offset,
345862306a36Sopenharmony_ci				   loff_t length, unsigned int flags,
345962306a36Sopenharmony_ci				   struct iomap *iomap, struct iomap *srcmap)
346062306a36Sopenharmony_ci{
346162306a36Sopenharmony_ci	int ret;
346262306a36Sopenharmony_ci	bool delalloc = false;
346362306a36Sopenharmony_ci	struct ext4_map_blocks map;
346462306a36Sopenharmony_ci	u8 blkbits = inode->i_blkbits;
346562306a36Sopenharmony_ci
346662306a36Sopenharmony_ci	if ((offset >> blkbits) > EXT4_MAX_LOGICAL_BLOCK)
346762306a36Sopenharmony_ci		return -EINVAL;
346862306a36Sopenharmony_ci
346962306a36Sopenharmony_ci	if (ext4_has_inline_data(inode)) {
347062306a36Sopenharmony_ci		ret = ext4_inline_data_iomap(inode, iomap);
347162306a36Sopenharmony_ci		if (ret != -EAGAIN) {
347262306a36Sopenharmony_ci			if (ret == 0 && offset >= iomap->length)
347362306a36Sopenharmony_ci				ret = -ENOENT;
347462306a36Sopenharmony_ci			return ret;
347562306a36Sopenharmony_ci		}
347662306a36Sopenharmony_ci	}
347762306a36Sopenharmony_ci
347862306a36Sopenharmony_ci	/*
347962306a36Sopenharmony_ci	 * Calculate the first and last logical block respectively.
348062306a36Sopenharmony_ci	 */
348162306a36Sopenharmony_ci	map.m_lblk = offset >> blkbits;
348262306a36Sopenharmony_ci	map.m_len = min_t(loff_t, (offset + length - 1) >> blkbits,
348362306a36Sopenharmony_ci			  EXT4_MAX_LOGICAL_BLOCK) - map.m_lblk + 1;
348462306a36Sopenharmony_ci
348562306a36Sopenharmony_ci	/*
348662306a36Sopenharmony_ci	 * Fiemap callers may call for offset beyond s_bitmap_maxbytes.
348762306a36Sopenharmony_ci	 * So handle it here itself instead of querying ext4_map_blocks().
348862306a36Sopenharmony_ci	 * Since ext4_map_blocks() will warn about it and will return
348962306a36Sopenharmony_ci	 * -EIO error.
349062306a36Sopenharmony_ci	 */
349162306a36Sopenharmony_ci	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
349262306a36Sopenharmony_ci		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
349362306a36Sopenharmony_ci
349462306a36Sopenharmony_ci		if (offset >= sbi->s_bitmap_maxbytes) {
349562306a36Sopenharmony_ci			map.m_flags = 0;
349662306a36Sopenharmony_ci			goto set_iomap;
349762306a36Sopenharmony_ci		}
349862306a36Sopenharmony_ci	}
349962306a36Sopenharmony_ci
350062306a36Sopenharmony_ci	ret = ext4_map_blocks(NULL, inode, &map, 0);
350162306a36Sopenharmony_ci	if (ret < 0)
350262306a36Sopenharmony_ci		return ret;
350362306a36Sopenharmony_ci	if (ret == 0)
350462306a36Sopenharmony_ci		delalloc = ext4_iomap_is_delalloc(inode, &map);
350562306a36Sopenharmony_ci
350662306a36Sopenharmony_ciset_iomap:
350762306a36Sopenharmony_ci	ext4_set_iomap(inode, iomap, &map, offset, length, flags);
350862306a36Sopenharmony_ci	if (delalloc && iomap->type == IOMAP_HOLE)
350962306a36Sopenharmony_ci		iomap->type = IOMAP_DELALLOC;
351062306a36Sopenharmony_ci
351162306a36Sopenharmony_ci	return 0;
351262306a36Sopenharmony_ci}
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ciconst struct iomap_ops ext4_iomap_report_ops = {
351562306a36Sopenharmony_ci	.iomap_begin = ext4_iomap_begin_report,
351662306a36Sopenharmony_ci};
351762306a36Sopenharmony_ci
351862306a36Sopenharmony_ci/*
351962306a36Sopenharmony_ci * For data=journal mode, folio should be marked dirty only when it was
352062306a36Sopenharmony_ci * writeably mapped. When that happens, it was already attached to the
352162306a36Sopenharmony_ci * transaction and marked as jbddirty (we take care of this in
352262306a36Sopenharmony_ci * ext4_page_mkwrite()). On transaction commit, we writeprotect page mappings
352362306a36Sopenharmony_ci * so we should have nothing to do here, except for the case when someone
352462306a36Sopenharmony_ci * had the page pinned and dirtied the page through this pin (e.g. by doing
352562306a36Sopenharmony_ci * direct IO to it). In that case we'd need to attach buffers here to the
352662306a36Sopenharmony_ci * transaction but we cannot due to lock ordering.  We cannot just dirty the
352762306a36Sopenharmony_ci * folio and leave attached buffers clean, because the buffers' dirty state is
352862306a36Sopenharmony_ci * "definitive".  We cannot just set the buffers dirty or jbddirty because all
352962306a36Sopenharmony_ci * the journalling code will explode.  So what we do is to mark the folio
353062306a36Sopenharmony_ci * "pending dirty" and next time ext4_writepages() is called, attach buffers
353162306a36Sopenharmony_ci * to the transaction appropriately.
353262306a36Sopenharmony_ci */
353362306a36Sopenharmony_cistatic bool ext4_journalled_dirty_folio(struct address_space *mapping,
353462306a36Sopenharmony_ci		struct folio *folio)
353562306a36Sopenharmony_ci{
353662306a36Sopenharmony_ci	WARN_ON_ONCE(!folio_buffers(folio));
353762306a36Sopenharmony_ci	if (folio_maybe_dma_pinned(folio))
353862306a36Sopenharmony_ci		folio_set_checked(folio);
353962306a36Sopenharmony_ci	return filemap_dirty_folio(mapping, folio);
354062306a36Sopenharmony_ci}
354162306a36Sopenharmony_ci
354262306a36Sopenharmony_cistatic bool ext4_dirty_folio(struct address_space *mapping, struct folio *folio)
354362306a36Sopenharmony_ci{
354462306a36Sopenharmony_ci	WARN_ON_ONCE(!folio_test_locked(folio) && !folio_test_dirty(folio));
354562306a36Sopenharmony_ci	WARN_ON_ONCE(!folio_buffers(folio));
354662306a36Sopenharmony_ci	return block_dirty_folio(mapping, folio);
354762306a36Sopenharmony_ci}
354862306a36Sopenharmony_ci
354962306a36Sopenharmony_cistatic int ext4_iomap_swap_activate(struct swap_info_struct *sis,
355062306a36Sopenharmony_ci				    struct file *file, sector_t *span)
355162306a36Sopenharmony_ci{
355262306a36Sopenharmony_ci	return iomap_swapfile_activate(sis, file, span,
355362306a36Sopenharmony_ci				       &ext4_iomap_report_ops);
355462306a36Sopenharmony_ci}
355562306a36Sopenharmony_ci
355662306a36Sopenharmony_cistatic const struct address_space_operations ext4_aops = {
355762306a36Sopenharmony_ci	.read_folio		= ext4_read_folio,
355862306a36Sopenharmony_ci	.readahead		= ext4_readahead,
355962306a36Sopenharmony_ci	.writepages		= ext4_writepages,
356062306a36Sopenharmony_ci	.write_begin		= ext4_write_begin,
356162306a36Sopenharmony_ci	.write_end		= ext4_write_end,
356262306a36Sopenharmony_ci	.dirty_folio		= ext4_dirty_folio,
356362306a36Sopenharmony_ci	.bmap			= ext4_bmap,
356462306a36Sopenharmony_ci	.invalidate_folio	= ext4_invalidate_folio,
356562306a36Sopenharmony_ci	.release_folio		= ext4_release_folio,
356662306a36Sopenharmony_ci	.direct_IO		= noop_direct_IO,
356762306a36Sopenharmony_ci	.migrate_folio		= buffer_migrate_folio,
356862306a36Sopenharmony_ci	.is_partially_uptodate  = block_is_partially_uptodate,
356962306a36Sopenharmony_ci	.error_remove_page	= generic_error_remove_page,
357062306a36Sopenharmony_ci	.swap_activate		= ext4_iomap_swap_activate,
357162306a36Sopenharmony_ci};
357262306a36Sopenharmony_ci
357362306a36Sopenharmony_cistatic const struct address_space_operations ext4_journalled_aops = {
357462306a36Sopenharmony_ci	.read_folio		= ext4_read_folio,
357562306a36Sopenharmony_ci	.readahead		= ext4_readahead,
357662306a36Sopenharmony_ci	.writepages		= ext4_writepages,
357762306a36Sopenharmony_ci	.write_begin		= ext4_write_begin,
357862306a36Sopenharmony_ci	.write_end		= ext4_journalled_write_end,
357962306a36Sopenharmony_ci	.dirty_folio		= ext4_journalled_dirty_folio,
358062306a36Sopenharmony_ci	.bmap			= ext4_bmap,
358162306a36Sopenharmony_ci	.invalidate_folio	= ext4_journalled_invalidate_folio,
358262306a36Sopenharmony_ci	.release_folio		= ext4_release_folio,
358362306a36Sopenharmony_ci	.direct_IO		= noop_direct_IO,
358462306a36Sopenharmony_ci	.migrate_folio		= buffer_migrate_folio_norefs,
358562306a36Sopenharmony_ci	.is_partially_uptodate  = block_is_partially_uptodate,
358662306a36Sopenharmony_ci	.error_remove_page	= generic_error_remove_page,
358762306a36Sopenharmony_ci	.swap_activate		= ext4_iomap_swap_activate,
358862306a36Sopenharmony_ci};
358962306a36Sopenharmony_ci
359062306a36Sopenharmony_cistatic const struct address_space_operations ext4_da_aops = {
359162306a36Sopenharmony_ci	.read_folio		= ext4_read_folio,
359262306a36Sopenharmony_ci	.readahead		= ext4_readahead,
359362306a36Sopenharmony_ci	.writepages		= ext4_writepages,
359462306a36Sopenharmony_ci	.write_begin		= ext4_da_write_begin,
359562306a36Sopenharmony_ci	.write_end		= ext4_da_write_end,
359662306a36Sopenharmony_ci	.dirty_folio		= ext4_dirty_folio,
359762306a36Sopenharmony_ci	.bmap			= ext4_bmap,
359862306a36Sopenharmony_ci	.invalidate_folio	= ext4_invalidate_folio,
359962306a36Sopenharmony_ci	.release_folio		= ext4_release_folio,
360062306a36Sopenharmony_ci	.direct_IO		= noop_direct_IO,
360162306a36Sopenharmony_ci	.migrate_folio		= buffer_migrate_folio,
360262306a36Sopenharmony_ci	.is_partially_uptodate  = block_is_partially_uptodate,
360362306a36Sopenharmony_ci	.error_remove_page	= generic_error_remove_page,
360462306a36Sopenharmony_ci	.swap_activate		= ext4_iomap_swap_activate,
360562306a36Sopenharmony_ci};
360662306a36Sopenharmony_ci
360762306a36Sopenharmony_cistatic const struct address_space_operations ext4_dax_aops = {
360862306a36Sopenharmony_ci	.writepages		= ext4_dax_writepages,
360962306a36Sopenharmony_ci	.direct_IO		= noop_direct_IO,
361062306a36Sopenharmony_ci	.dirty_folio		= noop_dirty_folio,
361162306a36Sopenharmony_ci	.bmap			= ext4_bmap,
361262306a36Sopenharmony_ci	.swap_activate		= ext4_iomap_swap_activate,
361362306a36Sopenharmony_ci};
361462306a36Sopenharmony_ci
361562306a36Sopenharmony_civoid ext4_set_aops(struct inode *inode)
361662306a36Sopenharmony_ci{
361762306a36Sopenharmony_ci	switch (ext4_inode_journal_mode(inode)) {
361862306a36Sopenharmony_ci	case EXT4_INODE_ORDERED_DATA_MODE:
361962306a36Sopenharmony_ci	case EXT4_INODE_WRITEBACK_DATA_MODE:
362062306a36Sopenharmony_ci		break;
362162306a36Sopenharmony_ci	case EXT4_INODE_JOURNAL_DATA_MODE:
362262306a36Sopenharmony_ci		inode->i_mapping->a_ops = &ext4_journalled_aops;
362362306a36Sopenharmony_ci		return;
362462306a36Sopenharmony_ci	default:
362562306a36Sopenharmony_ci		BUG();
362662306a36Sopenharmony_ci	}
362762306a36Sopenharmony_ci	if (IS_DAX(inode))
362862306a36Sopenharmony_ci		inode->i_mapping->a_ops = &ext4_dax_aops;
362962306a36Sopenharmony_ci	else if (test_opt(inode->i_sb, DELALLOC))
363062306a36Sopenharmony_ci		inode->i_mapping->a_ops = &ext4_da_aops;
363162306a36Sopenharmony_ci	else
363262306a36Sopenharmony_ci		inode->i_mapping->a_ops = &ext4_aops;
363362306a36Sopenharmony_ci}
363462306a36Sopenharmony_ci
363562306a36Sopenharmony_cistatic int __ext4_block_zero_page_range(handle_t *handle,
363662306a36Sopenharmony_ci		struct address_space *mapping, loff_t from, loff_t length)
363762306a36Sopenharmony_ci{
363862306a36Sopenharmony_ci	ext4_fsblk_t index = from >> PAGE_SHIFT;
363962306a36Sopenharmony_ci	unsigned offset = from & (PAGE_SIZE-1);
364062306a36Sopenharmony_ci	unsigned blocksize, pos;
364162306a36Sopenharmony_ci	ext4_lblk_t iblock;
364262306a36Sopenharmony_ci	struct inode *inode = mapping->host;
364362306a36Sopenharmony_ci	struct buffer_head *bh;
364462306a36Sopenharmony_ci	struct folio *folio;
364562306a36Sopenharmony_ci	int err = 0;
364662306a36Sopenharmony_ci
364762306a36Sopenharmony_ci	folio = __filemap_get_folio(mapping, from >> PAGE_SHIFT,
364862306a36Sopenharmony_ci				    FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
364962306a36Sopenharmony_ci				    mapping_gfp_constraint(mapping, ~__GFP_FS));
365062306a36Sopenharmony_ci	if (IS_ERR(folio))
365162306a36Sopenharmony_ci		return PTR_ERR(folio);
365262306a36Sopenharmony_ci
365362306a36Sopenharmony_ci	blocksize = inode->i_sb->s_blocksize;
365462306a36Sopenharmony_ci
365562306a36Sopenharmony_ci	iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
365662306a36Sopenharmony_ci
365762306a36Sopenharmony_ci	bh = folio_buffers(folio);
365862306a36Sopenharmony_ci	if (!bh) {
365962306a36Sopenharmony_ci		create_empty_buffers(&folio->page, blocksize, 0);
366062306a36Sopenharmony_ci		bh = folio_buffers(folio);
366162306a36Sopenharmony_ci	}
366262306a36Sopenharmony_ci
366362306a36Sopenharmony_ci	/* Find the buffer that contains "offset" */
366462306a36Sopenharmony_ci	pos = blocksize;
366562306a36Sopenharmony_ci	while (offset >= pos) {
366662306a36Sopenharmony_ci		bh = bh->b_this_page;
366762306a36Sopenharmony_ci		iblock++;
366862306a36Sopenharmony_ci		pos += blocksize;
366962306a36Sopenharmony_ci	}
367062306a36Sopenharmony_ci	if (buffer_freed(bh)) {
367162306a36Sopenharmony_ci		BUFFER_TRACE(bh, "freed: skip");
367262306a36Sopenharmony_ci		goto unlock;
367362306a36Sopenharmony_ci	}
367462306a36Sopenharmony_ci	if (!buffer_mapped(bh)) {
367562306a36Sopenharmony_ci		BUFFER_TRACE(bh, "unmapped");
367662306a36Sopenharmony_ci		ext4_get_block(inode, iblock, bh, 0);
367762306a36Sopenharmony_ci		/* unmapped? It's a hole - nothing to do */
367862306a36Sopenharmony_ci		if (!buffer_mapped(bh)) {
367962306a36Sopenharmony_ci			BUFFER_TRACE(bh, "still unmapped");
368062306a36Sopenharmony_ci			goto unlock;
368162306a36Sopenharmony_ci		}
368262306a36Sopenharmony_ci	}
368362306a36Sopenharmony_ci
368462306a36Sopenharmony_ci	/* Ok, it's mapped. Make sure it's up-to-date */
368562306a36Sopenharmony_ci	if (folio_test_uptodate(folio))
368662306a36Sopenharmony_ci		set_buffer_uptodate(bh);
368762306a36Sopenharmony_ci
368862306a36Sopenharmony_ci	if (!buffer_uptodate(bh)) {
368962306a36Sopenharmony_ci		err = ext4_read_bh_lock(bh, 0, true);
369062306a36Sopenharmony_ci		if (err)
369162306a36Sopenharmony_ci			goto unlock;
369262306a36Sopenharmony_ci		if (fscrypt_inode_uses_fs_layer_crypto(inode)) {
369362306a36Sopenharmony_ci			/* We expect the key to be set. */
369462306a36Sopenharmony_ci			BUG_ON(!fscrypt_has_encryption_key(inode));
369562306a36Sopenharmony_ci			err = fscrypt_decrypt_pagecache_blocks(folio,
369662306a36Sopenharmony_ci							       blocksize,
369762306a36Sopenharmony_ci							       bh_offset(bh));
369862306a36Sopenharmony_ci			if (err) {
369962306a36Sopenharmony_ci				clear_buffer_uptodate(bh);
370062306a36Sopenharmony_ci				goto unlock;
370162306a36Sopenharmony_ci			}
370262306a36Sopenharmony_ci		}
370362306a36Sopenharmony_ci	}
370462306a36Sopenharmony_ci	if (ext4_should_journal_data(inode)) {
370562306a36Sopenharmony_ci		BUFFER_TRACE(bh, "get write access");
370662306a36Sopenharmony_ci		err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
370762306a36Sopenharmony_ci						    EXT4_JTR_NONE);
370862306a36Sopenharmony_ci		if (err)
370962306a36Sopenharmony_ci			goto unlock;
371062306a36Sopenharmony_ci	}
371162306a36Sopenharmony_ci	folio_zero_range(folio, offset, length);
371262306a36Sopenharmony_ci	BUFFER_TRACE(bh, "zeroed end of block");
371362306a36Sopenharmony_ci
371462306a36Sopenharmony_ci	if (ext4_should_journal_data(inode)) {
371562306a36Sopenharmony_ci		err = ext4_dirty_journalled_data(handle, bh);
371662306a36Sopenharmony_ci	} else {
371762306a36Sopenharmony_ci		err = 0;
371862306a36Sopenharmony_ci		mark_buffer_dirty(bh);
371962306a36Sopenharmony_ci		if (ext4_should_order_data(inode))
372062306a36Sopenharmony_ci			err = ext4_jbd2_inode_add_write(handle, inode, from,
372162306a36Sopenharmony_ci					length);
372262306a36Sopenharmony_ci	}
372362306a36Sopenharmony_ci
372462306a36Sopenharmony_ciunlock:
372562306a36Sopenharmony_ci	folio_unlock(folio);
372662306a36Sopenharmony_ci	folio_put(folio);
372762306a36Sopenharmony_ci	return err;
372862306a36Sopenharmony_ci}
372962306a36Sopenharmony_ci
373062306a36Sopenharmony_ci/*
373162306a36Sopenharmony_ci * ext4_block_zero_page_range() zeros out a mapping of length 'length'
373262306a36Sopenharmony_ci * starting from file offset 'from'.  The range to be zero'd must
373362306a36Sopenharmony_ci * be contained with in one block.  If the specified range exceeds
373462306a36Sopenharmony_ci * the end of the block it will be shortened to end of the block
373562306a36Sopenharmony_ci * that corresponds to 'from'
373662306a36Sopenharmony_ci */
373762306a36Sopenharmony_cistatic int ext4_block_zero_page_range(handle_t *handle,
373862306a36Sopenharmony_ci		struct address_space *mapping, loff_t from, loff_t length)
373962306a36Sopenharmony_ci{
374062306a36Sopenharmony_ci	struct inode *inode = mapping->host;
374162306a36Sopenharmony_ci	unsigned offset = from & (PAGE_SIZE-1);
374262306a36Sopenharmony_ci	unsigned blocksize = inode->i_sb->s_blocksize;
374362306a36Sopenharmony_ci	unsigned max = blocksize - (offset & (blocksize - 1));
374462306a36Sopenharmony_ci
374562306a36Sopenharmony_ci	/*
374662306a36Sopenharmony_ci	 * correct length if it does not fall between
374762306a36Sopenharmony_ci	 * 'from' and the end of the block
374862306a36Sopenharmony_ci	 */
374962306a36Sopenharmony_ci	if (length > max || length < 0)
375062306a36Sopenharmony_ci		length = max;
375162306a36Sopenharmony_ci
375262306a36Sopenharmony_ci	if (IS_DAX(inode)) {
375362306a36Sopenharmony_ci		return dax_zero_range(inode, from, length, NULL,
375462306a36Sopenharmony_ci				      &ext4_iomap_ops);
375562306a36Sopenharmony_ci	}
375662306a36Sopenharmony_ci	return __ext4_block_zero_page_range(handle, mapping, from, length);
375762306a36Sopenharmony_ci}
375862306a36Sopenharmony_ci
375962306a36Sopenharmony_ci/*
376062306a36Sopenharmony_ci * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
376162306a36Sopenharmony_ci * up to the end of the block which corresponds to `from'.
376262306a36Sopenharmony_ci * This required during truncate. We need to physically zero the tail end
376362306a36Sopenharmony_ci * of that block so it doesn't yield old data if the file is later grown.
376462306a36Sopenharmony_ci */
376562306a36Sopenharmony_cistatic int ext4_block_truncate_page(handle_t *handle,
376662306a36Sopenharmony_ci		struct address_space *mapping, loff_t from)
376762306a36Sopenharmony_ci{
376862306a36Sopenharmony_ci	unsigned offset = from & (PAGE_SIZE-1);
376962306a36Sopenharmony_ci	unsigned length;
377062306a36Sopenharmony_ci	unsigned blocksize;
377162306a36Sopenharmony_ci	struct inode *inode = mapping->host;
377262306a36Sopenharmony_ci
377362306a36Sopenharmony_ci	/* If we are processing an encrypted inode during orphan list handling */
377462306a36Sopenharmony_ci	if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode))
377562306a36Sopenharmony_ci		return 0;
377662306a36Sopenharmony_ci
377762306a36Sopenharmony_ci	blocksize = inode->i_sb->s_blocksize;
377862306a36Sopenharmony_ci	length = blocksize - (offset & (blocksize - 1));
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci	return ext4_block_zero_page_range(handle, mapping, from, length);
378162306a36Sopenharmony_ci}
378262306a36Sopenharmony_ci
378362306a36Sopenharmony_ciint ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
378462306a36Sopenharmony_ci			     loff_t lstart, loff_t length)
378562306a36Sopenharmony_ci{
378662306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
378762306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
378862306a36Sopenharmony_ci	unsigned partial_start, partial_end;
378962306a36Sopenharmony_ci	ext4_fsblk_t start, end;
379062306a36Sopenharmony_ci	loff_t byte_end = (lstart + length - 1);
379162306a36Sopenharmony_ci	int err = 0;
379262306a36Sopenharmony_ci
379362306a36Sopenharmony_ci	partial_start = lstart & (sb->s_blocksize - 1);
379462306a36Sopenharmony_ci	partial_end = byte_end & (sb->s_blocksize - 1);
379562306a36Sopenharmony_ci
379662306a36Sopenharmony_ci	start = lstart >> sb->s_blocksize_bits;
379762306a36Sopenharmony_ci	end = byte_end >> sb->s_blocksize_bits;
379862306a36Sopenharmony_ci
379962306a36Sopenharmony_ci	/* Handle partial zero within the single block */
380062306a36Sopenharmony_ci	if (start == end &&
380162306a36Sopenharmony_ci	    (partial_start || (partial_end != sb->s_blocksize - 1))) {
380262306a36Sopenharmony_ci		err = ext4_block_zero_page_range(handle, mapping,
380362306a36Sopenharmony_ci						 lstart, length);
380462306a36Sopenharmony_ci		return err;
380562306a36Sopenharmony_ci	}
380662306a36Sopenharmony_ci	/* Handle partial zero out on the start of the range */
380762306a36Sopenharmony_ci	if (partial_start) {
380862306a36Sopenharmony_ci		err = ext4_block_zero_page_range(handle, mapping,
380962306a36Sopenharmony_ci						 lstart, sb->s_blocksize);
381062306a36Sopenharmony_ci		if (err)
381162306a36Sopenharmony_ci			return err;
381262306a36Sopenharmony_ci	}
381362306a36Sopenharmony_ci	/* Handle partial zero out on the end of the range */
381462306a36Sopenharmony_ci	if (partial_end != sb->s_blocksize - 1)
381562306a36Sopenharmony_ci		err = ext4_block_zero_page_range(handle, mapping,
381662306a36Sopenharmony_ci						 byte_end - partial_end,
381762306a36Sopenharmony_ci						 partial_end + 1);
381862306a36Sopenharmony_ci	return err;
381962306a36Sopenharmony_ci}
382062306a36Sopenharmony_ci
382162306a36Sopenharmony_ciint ext4_can_truncate(struct inode *inode)
382262306a36Sopenharmony_ci{
382362306a36Sopenharmony_ci	if (S_ISREG(inode->i_mode))
382462306a36Sopenharmony_ci		return 1;
382562306a36Sopenharmony_ci	if (S_ISDIR(inode->i_mode))
382662306a36Sopenharmony_ci		return 1;
382762306a36Sopenharmony_ci	if (S_ISLNK(inode->i_mode))
382862306a36Sopenharmony_ci		return !ext4_inode_is_fast_symlink(inode);
382962306a36Sopenharmony_ci	return 0;
383062306a36Sopenharmony_ci}
383162306a36Sopenharmony_ci
383262306a36Sopenharmony_ci/*
383362306a36Sopenharmony_ci * We have to make sure i_disksize gets properly updated before we truncate
383462306a36Sopenharmony_ci * page cache due to hole punching or zero range. Otherwise i_disksize update
383562306a36Sopenharmony_ci * can get lost as it may have been postponed to submission of writeback but
383662306a36Sopenharmony_ci * that will never happen after we truncate page cache.
383762306a36Sopenharmony_ci */
383862306a36Sopenharmony_ciint ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
383962306a36Sopenharmony_ci				      loff_t len)
384062306a36Sopenharmony_ci{
384162306a36Sopenharmony_ci	handle_t *handle;
384262306a36Sopenharmony_ci	int ret;
384362306a36Sopenharmony_ci
384462306a36Sopenharmony_ci	loff_t size = i_size_read(inode);
384562306a36Sopenharmony_ci
384662306a36Sopenharmony_ci	WARN_ON(!inode_is_locked(inode));
384762306a36Sopenharmony_ci	if (offset > size || offset + len < size)
384862306a36Sopenharmony_ci		return 0;
384962306a36Sopenharmony_ci
385062306a36Sopenharmony_ci	if (EXT4_I(inode)->i_disksize >= size)
385162306a36Sopenharmony_ci		return 0;
385262306a36Sopenharmony_ci
385362306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
385462306a36Sopenharmony_ci	if (IS_ERR(handle))
385562306a36Sopenharmony_ci		return PTR_ERR(handle);
385662306a36Sopenharmony_ci	ext4_update_i_disksize(inode, size);
385762306a36Sopenharmony_ci	ret = ext4_mark_inode_dirty(handle, inode);
385862306a36Sopenharmony_ci	ext4_journal_stop(handle);
385962306a36Sopenharmony_ci
386062306a36Sopenharmony_ci	return ret;
386162306a36Sopenharmony_ci}
386262306a36Sopenharmony_ci
386362306a36Sopenharmony_cistatic void ext4_wait_dax_page(struct inode *inode)
386462306a36Sopenharmony_ci{
386562306a36Sopenharmony_ci	filemap_invalidate_unlock(inode->i_mapping);
386662306a36Sopenharmony_ci	schedule();
386762306a36Sopenharmony_ci	filemap_invalidate_lock(inode->i_mapping);
386862306a36Sopenharmony_ci}
386962306a36Sopenharmony_ci
387062306a36Sopenharmony_ciint ext4_break_layouts(struct inode *inode)
387162306a36Sopenharmony_ci{
387262306a36Sopenharmony_ci	struct page *page;
387362306a36Sopenharmony_ci	int error;
387462306a36Sopenharmony_ci
387562306a36Sopenharmony_ci	if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
387662306a36Sopenharmony_ci		return -EINVAL;
387762306a36Sopenharmony_ci
387862306a36Sopenharmony_ci	do {
387962306a36Sopenharmony_ci		page = dax_layout_busy_page(inode->i_mapping);
388062306a36Sopenharmony_ci		if (!page)
388162306a36Sopenharmony_ci			return 0;
388262306a36Sopenharmony_ci
388362306a36Sopenharmony_ci		error = ___wait_var_event(&page->_refcount,
388462306a36Sopenharmony_ci				atomic_read(&page->_refcount) == 1,
388562306a36Sopenharmony_ci				TASK_INTERRUPTIBLE, 0, 0,
388662306a36Sopenharmony_ci				ext4_wait_dax_page(inode));
388762306a36Sopenharmony_ci	} while (error == 0);
388862306a36Sopenharmony_ci
388962306a36Sopenharmony_ci	return error;
389062306a36Sopenharmony_ci}
389162306a36Sopenharmony_ci
389262306a36Sopenharmony_ci/*
389362306a36Sopenharmony_ci * ext4_punch_hole: punches a hole in a file by releasing the blocks
389462306a36Sopenharmony_ci * associated with the given offset and length
389562306a36Sopenharmony_ci *
389662306a36Sopenharmony_ci * @inode:  File inode
389762306a36Sopenharmony_ci * @offset: The offset where the hole will begin
389862306a36Sopenharmony_ci * @len:    The length of the hole
389962306a36Sopenharmony_ci *
390062306a36Sopenharmony_ci * Returns: 0 on success or negative on failure
390162306a36Sopenharmony_ci */
390262306a36Sopenharmony_ci
390362306a36Sopenharmony_ciint ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
390462306a36Sopenharmony_ci{
390562306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
390662306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
390762306a36Sopenharmony_ci	ext4_lblk_t first_block, stop_block;
390862306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
390962306a36Sopenharmony_ci	loff_t first_block_offset, last_block_offset, max_length;
391062306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
391162306a36Sopenharmony_ci	handle_t *handle;
391262306a36Sopenharmony_ci	unsigned int credits;
391362306a36Sopenharmony_ci	int ret = 0, ret2 = 0;
391462306a36Sopenharmony_ci
391562306a36Sopenharmony_ci	trace_ext4_punch_hole(inode, offset, length, 0);
391662306a36Sopenharmony_ci
391762306a36Sopenharmony_ci	/*
391862306a36Sopenharmony_ci	 * Write out all dirty pages to avoid race conditions
391962306a36Sopenharmony_ci	 * Then release them.
392062306a36Sopenharmony_ci	 */
392162306a36Sopenharmony_ci	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
392262306a36Sopenharmony_ci		ret = filemap_write_and_wait_range(mapping, offset,
392362306a36Sopenharmony_ci						   offset + length - 1);
392462306a36Sopenharmony_ci		if (ret)
392562306a36Sopenharmony_ci			return ret;
392662306a36Sopenharmony_ci	}
392762306a36Sopenharmony_ci
392862306a36Sopenharmony_ci	inode_lock(inode);
392962306a36Sopenharmony_ci
393062306a36Sopenharmony_ci	/* No need to punch hole beyond i_size */
393162306a36Sopenharmony_ci	if (offset >= inode->i_size)
393262306a36Sopenharmony_ci		goto out_mutex;
393362306a36Sopenharmony_ci
393462306a36Sopenharmony_ci	/*
393562306a36Sopenharmony_ci	 * If the hole extends beyond i_size, set the hole
393662306a36Sopenharmony_ci	 * to end after the page that contains i_size
393762306a36Sopenharmony_ci	 */
393862306a36Sopenharmony_ci	if (offset + length > inode->i_size) {
393962306a36Sopenharmony_ci		length = inode->i_size +
394062306a36Sopenharmony_ci		   PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) -
394162306a36Sopenharmony_ci		   offset;
394262306a36Sopenharmony_ci	}
394362306a36Sopenharmony_ci
394462306a36Sopenharmony_ci	/*
394562306a36Sopenharmony_ci	 * For punch hole the length + offset needs to be within one block
394662306a36Sopenharmony_ci	 * before last range. Adjust the length if it goes beyond that limit.
394762306a36Sopenharmony_ci	 */
394862306a36Sopenharmony_ci	max_length = sbi->s_bitmap_maxbytes - inode->i_sb->s_blocksize;
394962306a36Sopenharmony_ci	if (offset + length > max_length)
395062306a36Sopenharmony_ci		length = max_length - offset;
395162306a36Sopenharmony_ci
395262306a36Sopenharmony_ci	if (offset & (sb->s_blocksize - 1) ||
395362306a36Sopenharmony_ci	    (offset + length) & (sb->s_blocksize - 1)) {
395462306a36Sopenharmony_ci		/*
395562306a36Sopenharmony_ci		 * Attach jinode to inode for jbd2 if we do any zeroing of
395662306a36Sopenharmony_ci		 * partial block
395762306a36Sopenharmony_ci		 */
395862306a36Sopenharmony_ci		ret = ext4_inode_attach_jinode(inode);
395962306a36Sopenharmony_ci		if (ret < 0)
396062306a36Sopenharmony_ci			goto out_mutex;
396162306a36Sopenharmony_ci
396262306a36Sopenharmony_ci	}
396362306a36Sopenharmony_ci
396462306a36Sopenharmony_ci	/* Wait all existing dio workers, newcomers will block on i_rwsem */
396562306a36Sopenharmony_ci	inode_dio_wait(inode);
396662306a36Sopenharmony_ci
396762306a36Sopenharmony_ci	ret = file_modified(file);
396862306a36Sopenharmony_ci	if (ret)
396962306a36Sopenharmony_ci		goto out_mutex;
397062306a36Sopenharmony_ci
397162306a36Sopenharmony_ci	/*
397262306a36Sopenharmony_ci	 * Prevent page faults from reinstantiating pages we have released from
397362306a36Sopenharmony_ci	 * page cache.
397462306a36Sopenharmony_ci	 */
397562306a36Sopenharmony_ci	filemap_invalidate_lock(mapping);
397662306a36Sopenharmony_ci
397762306a36Sopenharmony_ci	ret = ext4_break_layouts(inode);
397862306a36Sopenharmony_ci	if (ret)
397962306a36Sopenharmony_ci		goto out_dio;
398062306a36Sopenharmony_ci
398162306a36Sopenharmony_ci	first_block_offset = round_up(offset, sb->s_blocksize);
398262306a36Sopenharmony_ci	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
398362306a36Sopenharmony_ci
398462306a36Sopenharmony_ci	/* Now release the pages and zero block aligned part of pages*/
398562306a36Sopenharmony_ci	if (last_block_offset > first_block_offset) {
398662306a36Sopenharmony_ci		ret = ext4_update_disksize_before_punch(inode, offset, length);
398762306a36Sopenharmony_ci		if (ret)
398862306a36Sopenharmony_ci			goto out_dio;
398962306a36Sopenharmony_ci		truncate_pagecache_range(inode, first_block_offset,
399062306a36Sopenharmony_ci					 last_block_offset);
399162306a36Sopenharmony_ci	}
399262306a36Sopenharmony_ci
399362306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
399462306a36Sopenharmony_ci		credits = ext4_writepage_trans_blocks(inode);
399562306a36Sopenharmony_ci	else
399662306a36Sopenharmony_ci		credits = ext4_blocks_for_truncate(inode);
399762306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
399862306a36Sopenharmony_ci	if (IS_ERR(handle)) {
399962306a36Sopenharmony_ci		ret = PTR_ERR(handle);
400062306a36Sopenharmony_ci		ext4_std_error(sb, ret);
400162306a36Sopenharmony_ci		goto out_dio;
400262306a36Sopenharmony_ci	}
400362306a36Sopenharmony_ci
400462306a36Sopenharmony_ci	ret = ext4_zero_partial_blocks(handle, inode, offset,
400562306a36Sopenharmony_ci				       length);
400662306a36Sopenharmony_ci	if (ret)
400762306a36Sopenharmony_ci		goto out_stop;
400862306a36Sopenharmony_ci
400962306a36Sopenharmony_ci	first_block = (offset + sb->s_blocksize - 1) >>
401062306a36Sopenharmony_ci		EXT4_BLOCK_SIZE_BITS(sb);
401162306a36Sopenharmony_ci	stop_block = (offset + length) >> EXT4_BLOCK_SIZE_BITS(sb);
401262306a36Sopenharmony_ci
401362306a36Sopenharmony_ci	/* If there are blocks to remove, do it */
401462306a36Sopenharmony_ci	if (stop_block > first_block) {
401562306a36Sopenharmony_ci
401662306a36Sopenharmony_ci		down_write(&EXT4_I(inode)->i_data_sem);
401762306a36Sopenharmony_ci		ext4_discard_preallocations(inode, 0);
401862306a36Sopenharmony_ci
401962306a36Sopenharmony_ci		ext4_es_remove_extent(inode, first_block,
402062306a36Sopenharmony_ci				      stop_block - first_block);
402162306a36Sopenharmony_ci
402262306a36Sopenharmony_ci		if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
402362306a36Sopenharmony_ci			ret = ext4_ext_remove_space(inode, first_block,
402462306a36Sopenharmony_ci						    stop_block - 1);
402562306a36Sopenharmony_ci		else
402662306a36Sopenharmony_ci			ret = ext4_ind_remove_space(handle, inode, first_block,
402762306a36Sopenharmony_ci						    stop_block);
402862306a36Sopenharmony_ci
402962306a36Sopenharmony_ci		up_write(&EXT4_I(inode)->i_data_sem);
403062306a36Sopenharmony_ci	}
403162306a36Sopenharmony_ci	ext4_fc_track_range(handle, inode, first_block, stop_block);
403262306a36Sopenharmony_ci	if (IS_SYNC(inode))
403362306a36Sopenharmony_ci		ext4_handle_sync(handle);
403462306a36Sopenharmony_ci
403562306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
403662306a36Sopenharmony_ci	ret2 = ext4_mark_inode_dirty(handle, inode);
403762306a36Sopenharmony_ci	if (unlikely(ret2))
403862306a36Sopenharmony_ci		ret = ret2;
403962306a36Sopenharmony_ci	if (ret >= 0)
404062306a36Sopenharmony_ci		ext4_update_inode_fsync_trans(handle, inode, 1);
404162306a36Sopenharmony_ciout_stop:
404262306a36Sopenharmony_ci	ext4_journal_stop(handle);
404362306a36Sopenharmony_ciout_dio:
404462306a36Sopenharmony_ci	filemap_invalidate_unlock(mapping);
404562306a36Sopenharmony_ciout_mutex:
404662306a36Sopenharmony_ci	inode_unlock(inode);
404762306a36Sopenharmony_ci	return ret;
404862306a36Sopenharmony_ci}
404962306a36Sopenharmony_ci
405062306a36Sopenharmony_ciint ext4_inode_attach_jinode(struct inode *inode)
405162306a36Sopenharmony_ci{
405262306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
405362306a36Sopenharmony_ci	struct jbd2_inode *jinode;
405462306a36Sopenharmony_ci
405562306a36Sopenharmony_ci	if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
405662306a36Sopenharmony_ci		return 0;
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	jinode = jbd2_alloc_inode(GFP_KERNEL);
405962306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
406062306a36Sopenharmony_ci	if (!ei->jinode) {
406162306a36Sopenharmony_ci		if (!jinode) {
406262306a36Sopenharmony_ci			spin_unlock(&inode->i_lock);
406362306a36Sopenharmony_ci			return -ENOMEM;
406462306a36Sopenharmony_ci		}
406562306a36Sopenharmony_ci		ei->jinode = jinode;
406662306a36Sopenharmony_ci		jbd2_journal_init_jbd_inode(ei->jinode, inode);
406762306a36Sopenharmony_ci		jinode = NULL;
406862306a36Sopenharmony_ci	}
406962306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
407062306a36Sopenharmony_ci	if (unlikely(jinode != NULL))
407162306a36Sopenharmony_ci		jbd2_free_inode(jinode);
407262306a36Sopenharmony_ci	return 0;
407362306a36Sopenharmony_ci}
407462306a36Sopenharmony_ci
407562306a36Sopenharmony_ci/*
407662306a36Sopenharmony_ci * ext4_truncate()
407762306a36Sopenharmony_ci *
407862306a36Sopenharmony_ci * We block out ext4_get_block() block instantiations across the entire
407962306a36Sopenharmony_ci * transaction, and VFS/VM ensures that ext4_truncate() cannot run
408062306a36Sopenharmony_ci * simultaneously on behalf of the same inode.
408162306a36Sopenharmony_ci *
408262306a36Sopenharmony_ci * As we work through the truncate and commit bits of it to the journal there
408362306a36Sopenharmony_ci * is one core, guiding principle: the file's tree must always be consistent on
408462306a36Sopenharmony_ci * disk.  We must be able to restart the truncate after a crash.
408562306a36Sopenharmony_ci *
408662306a36Sopenharmony_ci * The file's tree may be transiently inconsistent in memory (although it
408762306a36Sopenharmony_ci * probably isn't), but whenever we close off and commit a journal transaction,
408862306a36Sopenharmony_ci * the contents of (the filesystem + the journal) must be consistent and
408962306a36Sopenharmony_ci * restartable.  It's pretty simple, really: bottom up, right to left (although
409062306a36Sopenharmony_ci * left-to-right works OK too).
409162306a36Sopenharmony_ci *
409262306a36Sopenharmony_ci * Note that at recovery time, journal replay occurs *before* the restart of
409362306a36Sopenharmony_ci * truncate against the orphan inode list.
409462306a36Sopenharmony_ci *
409562306a36Sopenharmony_ci * The committed inode has the new, desired i_size (which is the same as
409662306a36Sopenharmony_ci * i_disksize in this case).  After a crash, ext4_orphan_cleanup() will see
409762306a36Sopenharmony_ci * that this inode's truncate did not complete and it will again call
409862306a36Sopenharmony_ci * ext4_truncate() to have another go.  So there will be instantiated blocks
409962306a36Sopenharmony_ci * to the right of the truncation point in a crashed ext4 filesystem.  But
410062306a36Sopenharmony_ci * that's fine - as long as they are linked from the inode, the post-crash
410162306a36Sopenharmony_ci * ext4_truncate() run will find them and release them.
410262306a36Sopenharmony_ci */
410362306a36Sopenharmony_ciint ext4_truncate(struct inode *inode)
410462306a36Sopenharmony_ci{
410562306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
410662306a36Sopenharmony_ci	unsigned int credits;
410762306a36Sopenharmony_ci	int err = 0, err2;
410862306a36Sopenharmony_ci	handle_t *handle;
410962306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
411062306a36Sopenharmony_ci
411162306a36Sopenharmony_ci	/*
411262306a36Sopenharmony_ci	 * There is a possibility that we're either freeing the inode
411362306a36Sopenharmony_ci	 * or it's a completely new inode. In those cases we might not
411462306a36Sopenharmony_ci	 * have i_rwsem locked because it's not necessary.
411562306a36Sopenharmony_ci	 */
411662306a36Sopenharmony_ci	if (!(inode->i_state & (I_NEW|I_FREEING)))
411762306a36Sopenharmony_ci		WARN_ON(!inode_is_locked(inode));
411862306a36Sopenharmony_ci	trace_ext4_truncate_enter(inode);
411962306a36Sopenharmony_ci
412062306a36Sopenharmony_ci	if (!ext4_can_truncate(inode))
412162306a36Sopenharmony_ci		goto out_trace;
412262306a36Sopenharmony_ci
412362306a36Sopenharmony_ci	if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC))
412462306a36Sopenharmony_ci		ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
412562306a36Sopenharmony_ci
412662306a36Sopenharmony_ci	if (ext4_has_inline_data(inode)) {
412762306a36Sopenharmony_ci		int has_inline = 1;
412862306a36Sopenharmony_ci
412962306a36Sopenharmony_ci		err = ext4_inline_data_truncate(inode, &has_inline);
413062306a36Sopenharmony_ci		if (err || has_inline)
413162306a36Sopenharmony_ci			goto out_trace;
413262306a36Sopenharmony_ci	}
413362306a36Sopenharmony_ci
413462306a36Sopenharmony_ci	/* If we zero-out tail of the page, we have to create jinode for jbd2 */
413562306a36Sopenharmony_ci	if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
413662306a36Sopenharmony_ci		err = ext4_inode_attach_jinode(inode);
413762306a36Sopenharmony_ci		if (err)
413862306a36Sopenharmony_ci			goto out_trace;
413962306a36Sopenharmony_ci	}
414062306a36Sopenharmony_ci
414162306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
414262306a36Sopenharmony_ci		credits = ext4_writepage_trans_blocks(inode);
414362306a36Sopenharmony_ci	else
414462306a36Sopenharmony_ci		credits = ext4_blocks_for_truncate(inode);
414562306a36Sopenharmony_ci
414662306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
414762306a36Sopenharmony_ci	if (IS_ERR(handle)) {
414862306a36Sopenharmony_ci		err = PTR_ERR(handle);
414962306a36Sopenharmony_ci		goto out_trace;
415062306a36Sopenharmony_ci	}
415162306a36Sopenharmony_ci
415262306a36Sopenharmony_ci	if (inode->i_size & (inode->i_sb->s_blocksize - 1))
415362306a36Sopenharmony_ci		ext4_block_truncate_page(handle, mapping, inode->i_size);
415462306a36Sopenharmony_ci
415562306a36Sopenharmony_ci	/*
415662306a36Sopenharmony_ci	 * We add the inode to the orphan list, so that if this
415762306a36Sopenharmony_ci	 * truncate spans multiple transactions, and we crash, we will
415862306a36Sopenharmony_ci	 * resume the truncate when the filesystem recovers.  It also
415962306a36Sopenharmony_ci	 * marks the inode dirty, to catch the new size.
416062306a36Sopenharmony_ci	 *
416162306a36Sopenharmony_ci	 * Implication: the file must always be in a sane, consistent
416262306a36Sopenharmony_ci	 * truncatable state while each transaction commits.
416362306a36Sopenharmony_ci	 */
416462306a36Sopenharmony_ci	err = ext4_orphan_add(handle, inode);
416562306a36Sopenharmony_ci	if (err)
416662306a36Sopenharmony_ci		goto out_stop;
416762306a36Sopenharmony_ci
416862306a36Sopenharmony_ci	down_write(&EXT4_I(inode)->i_data_sem);
416962306a36Sopenharmony_ci
417062306a36Sopenharmony_ci	ext4_discard_preallocations(inode, 0);
417162306a36Sopenharmony_ci
417262306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
417362306a36Sopenharmony_ci		err = ext4_ext_truncate(handle, inode);
417462306a36Sopenharmony_ci	else
417562306a36Sopenharmony_ci		ext4_ind_truncate(handle, inode);
417662306a36Sopenharmony_ci
417762306a36Sopenharmony_ci	up_write(&ei->i_data_sem);
417862306a36Sopenharmony_ci	if (err)
417962306a36Sopenharmony_ci		goto out_stop;
418062306a36Sopenharmony_ci
418162306a36Sopenharmony_ci	if (IS_SYNC(inode))
418262306a36Sopenharmony_ci		ext4_handle_sync(handle);
418362306a36Sopenharmony_ci
418462306a36Sopenharmony_ciout_stop:
418562306a36Sopenharmony_ci	/*
418662306a36Sopenharmony_ci	 * If this was a simple ftruncate() and the file will remain alive,
418762306a36Sopenharmony_ci	 * then we need to clear up the orphan record which we created above.
418862306a36Sopenharmony_ci	 * However, if this was a real unlink then we were called by
418962306a36Sopenharmony_ci	 * ext4_evict_inode(), and we allow that function to clean up the
419062306a36Sopenharmony_ci	 * orphan info for us.
419162306a36Sopenharmony_ci	 */
419262306a36Sopenharmony_ci	if (inode->i_nlink)
419362306a36Sopenharmony_ci		ext4_orphan_del(handle, inode);
419462306a36Sopenharmony_ci
419562306a36Sopenharmony_ci	inode->i_mtime = inode_set_ctime_current(inode);
419662306a36Sopenharmony_ci	err2 = ext4_mark_inode_dirty(handle, inode);
419762306a36Sopenharmony_ci	if (unlikely(err2 && !err))
419862306a36Sopenharmony_ci		err = err2;
419962306a36Sopenharmony_ci	ext4_journal_stop(handle);
420062306a36Sopenharmony_ci
420162306a36Sopenharmony_ciout_trace:
420262306a36Sopenharmony_ci	trace_ext4_truncate_exit(inode);
420362306a36Sopenharmony_ci	return err;
420462306a36Sopenharmony_ci}
420562306a36Sopenharmony_ci
420662306a36Sopenharmony_cistatic inline u64 ext4_inode_peek_iversion(const struct inode *inode)
420762306a36Sopenharmony_ci{
420862306a36Sopenharmony_ci	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
420962306a36Sopenharmony_ci		return inode_peek_iversion_raw(inode);
421062306a36Sopenharmony_ci	else
421162306a36Sopenharmony_ci		return inode_peek_iversion(inode);
421262306a36Sopenharmony_ci}
421362306a36Sopenharmony_ci
421462306a36Sopenharmony_cistatic int ext4_inode_blocks_set(struct ext4_inode *raw_inode,
421562306a36Sopenharmony_ci				 struct ext4_inode_info *ei)
421662306a36Sopenharmony_ci{
421762306a36Sopenharmony_ci	struct inode *inode = &(ei->vfs_inode);
421862306a36Sopenharmony_ci	u64 i_blocks = READ_ONCE(inode->i_blocks);
421962306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
422062306a36Sopenharmony_ci
422162306a36Sopenharmony_ci	if (i_blocks <= ~0U) {
422262306a36Sopenharmony_ci		/*
422362306a36Sopenharmony_ci		 * i_blocks can be represented in a 32 bit variable
422462306a36Sopenharmony_ci		 * as multiple of 512 bytes
422562306a36Sopenharmony_ci		 */
422662306a36Sopenharmony_ci		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
422762306a36Sopenharmony_ci		raw_inode->i_blocks_high = 0;
422862306a36Sopenharmony_ci		ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
422962306a36Sopenharmony_ci		return 0;
423062306a36Sopenharmony_ci	}
423162306a36Sopenharmony_ci
423262306a36Sopenharmony_ci	/*
423362306a36Sopenharmony_ci	 * This should never happen since sb->s_maxbytes should not have
423462306a36Sopenharmony_ci	 * allowed this, sb->s_maxbytes was set according to the huge_file
423562306a36Sopenharmony_ci	 * feature in ext4_fill_super().
423662306a36Sopenharmony_ci	 */
423762306a36Sopenharmony_ci	if (!ext4_has_feature_huge_file(sb))
423862306a36Sopenharmony_ci		return -EFSCORRUPTED;
423962306a36Sopenharmony_ci
424062306a36Sopenharmony_ci	if (i_blocks <= 0xffffffffffffULL) {
424162306a36Sopenharmony_ci		/*
424262306a36Sopenharmony_ci		 * i_blocks can be represented in a 48 bit variable
424362306a36Sopenharmony_ci		 * as multiple of 512 bytes
424462306a36Sopenharmony_ci		 */
424562306a36Sopenharmony_ci		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
424662306a36Sopenharmony_ci		raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
424762306a36Sopenharmony_ci		ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE);
424862306a36Sopenharmony_ci	} else {
424962306a36Sopenharmony_ci		ext4_set_inode_flag(inode, EXT4_INODE_HUGE_FILE);
425062306a36Sopenharmony_ci		/* i_block is stored in file system block size */
425162306a36Sopenharmony_ci		i_blocks = i_blocks >> (inode->i_blkbits - 9);
425262306a36Sopenharmony_ci		raw_inode->i_blocks_lo   = cpu_to_le32(i_blocks);
425362306a36Sopenharmony_ci		raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
425462306a36Sopenharmony_ci	}
425562306a36Sopenharmony_ci	return 0;
425662306a36Sopenharmony_ci}
425762306a36Sopenharmony_ci
425862306a36Sopenharmony_cistatic int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode)
425962306a36Sopenharmony_ci{
426062306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
426162306a36Sopenharmony_ci	uid_t i_uid;
426262306a36Sopenharmony_ci	gid_t i_gid;
426362306a36Sopenharmony_ci	projid_t i_projid;
426462306a36Sopenharmony_ci	int block;
426562306a36Sopenharmony_ci	int err;
426662306a36Sopenharmony_ci
426762306a36Sopenharmony_ci	err = ext4_inode_blocks_set(raw_inode, ei);
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_ci	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
427062306a36Sopenharmony_ci	i_uid = i_uid_read(inode);
427162306a36Sopenharmony_ci	i_gid = i_gid_read(inode);
427262306a36Sopenharmony_ci	i_projid = from_kprojid(&init_user_ns, ei->i_projid);
427362306a36Sopenharmony_ci	if (!(test_opt(inode->i_sb, NO_UID32))) {
427462306a36Sopenharmony_ci		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid));
427562306a36Sopenharmony_ci		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid));
427662306a36Sopenharmony_ci		/*
427762306a36Sopenharmony_ci		 * Fix up interoperability with old kernels. Otherwise,
427862306a36Sopenharmony_ci		 * old inodes get re-used with the upper 16 bits of the
427962306a36Sopenharmony_ci		 * uid/gid intact.
428062306a36Sopenharmony_ci		 */
428162306a36Sopenharmony_ci		if (ei->i_dtime && list_empty(&ei->i_orphan)) {
428262306a36Sopenharmony_ci			raw_inode->i_uid_high = 0;
428362306a36Sopenharmony_ci			raw_inode->i_gid_high = 0;
428462306a36Sopenharmony_ci		} else {
428562306a36Sopenharmony_ci			raw_inode->i_uid_high =
428662306a36Sopenharmony_ci				cpu_to_le16(high_16_bits(i_uid));
428762306a36Sopenharmony_ci			raw_inode->i_gid_high =
428862306a36Sopenharmony_ci				cpu_to_le16(high_16_bits(i_gid));
428962306a36Sopenharmony_ci		}
429062306a36Sopenharmony_ci	} else {
429162306a36Sopenharmony_ci		raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
429262306a36Sopenharmony_ci		raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(i_gid));
429362306a36Sopenharmony_ci		raw_inode->i_uid_high = 0;
429462306a36Sopenharmony_ci		raw_inode->i_gid_high = 0;
429562306a36Sopenharmony_ci	}
429662306a36Sopenharmony_ci	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
429762306a36Sopenharmony_ci
429862306a36Sopenharmony_ci	EXT4_INODE_SET_CTIME(inode, raw_inode);
429962306a36Sopenharmony_ci	EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
430062306a36Sopenharmony_ci	EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
430162306a36Sopenharmony_ci	EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
430262306a36Sopenharmony_ci
430362306a36Sopenharmony_ci	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
430462306a36Sopenharmony_ci	raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
430562306a36Sopenharmony_ci	if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
430662306a36Sopenharmony_ci		raw_inode->i_file_acl_high =
430762306a36Sopenharmony_ci			cpu_to_le16(ei->i_file_acl >> 32);
430862306a36Sopenharmony_ci	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
430962306a36Sopenharmony_ci	ext4_isize_set(raw_inode, ei->i_disksize);
431062306a36Sopenharmony_ci
431162306a36Sopenharmony_ci	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
431262306a36Sopenharmony_ci	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
431362306a36Sopenharmony_ci		if (old_valid_dev(inode->i_rdev)) {
431462306a36Sopenharmony_ci			raw_inode->i_block[0] =
431562306a36Sopenharmony_ci				cpu_to_le32(old_encode_dev(inode->i_rdev));
431662306a36Sopenharmony_ci			raw_inode->i_block[1] = 0;
431762306a36Sopenharmony_ci		} else {
431862306a36Sopenharmony_ci			raw_inode->i_block[0] = 0;
431962306a36Sopenharmony_ci			raw_inode->i_block[1] =
432062306a36Sopenharmony_ci				cpu_to_le32(new_encode_dev(inode->i_rdev));
432162306a36Sopenharmony_ci			raw_inode->i_block[2] = 0;
432262306a36Sopenharmony_ci		}
432362306a36Sopenharmony_ci	} else if (!ext4_has_inline_data(inode)) {
432462306a36Sopenharmony_ci		for (block = 0; block < EXT4_N_BLOCKS; block++)
432562306a36Sopenharmony_ci			raw_inode->i_block[block] = ei->i_data[block];
432662306a36Sopenharmony_ci	}
432762306a36Sopenharmony_ci
432862306a36Sopenharmony_ci	if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
432962306a36Sopenharmony_ci		u64 ivers = ext4_inode_peek_iversion(inode);
433062306a36Sopenharmony_ci
433162306a36Sopenharmony_ci		raw_inode->i_disk_version = cpu_to_le32(ivers);
433262306a36Sopenharmony_ci		if (ei->i_extra_isize) {
433362306a36Sopenharmony_ci			if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
433462306a36Sopenharmony_ci				raw_inode->i_version_hi =
433562306a36Sopenharmony_ci					cpu_to_le32(ivers >> 32);
433662306a36Sopenharmony_ci			raw_inode->i_extra_isize =
433762306a36Sopenharmony_ci				cpu_to_le16(ei->i_extra_isize);
433862306a36Sopenharmony_ci		}
433962306a36Sopenharmony_ci	}
434062306a36Sopenharmony_ci
434162306a36Sopenharmony_ci	if (i_projid != EXT4_DEF_PROJID &&
434262306a36Sopenharmony_ci	    !ext4_has_feature_project(inode->i_sb))
434362306a36Sopenharmony_ci		err = err ?: -EFSCORRUPTED;
434462306a36Sopenharmony_ci
434562306a36Sopenharmony_ci	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
434662306a36Sopenharmony_ci	    EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
434762306a36Sopenharmony_ci		raw_inode->i_projid = cpu_to_le32(i_projid);
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci	ext4_inode_csum_set(inode, raw_inode, ei);
435062306a36Sopenharmony_ci	return err;
435162306a36Sopenharmony_ci}
435262306a36Sopenharmony_ci
435362306a36Sopenharmony_ci/*
435462306a36Sopenharmony_ci * ext4_get_inode_loc returns with an extra refcount against the inode's
435562306a36Sopenharmony_ci * underlying buffer_head on success. If we pass 'inode' and it does not
435662306a36Sopenharmony_ci * have in-inode xattr, we have all inode data in memory that is needed
435762306a36Sopenharmony_ci * to recreate the on-disk version of this inode.
435862306a36Sopenharmony_ci */
435962306a36Sopenharmony_cistatic int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,
436062306a36Sopenharmony_ci				struct inode *inode, struct ext4_iloc *iloc,
436162306a36Sopenharmony_ci				ext4_fsblk_t *ret_block)
436262306a36Sopenharmony_ci{
436362306a36Sopenharmony_ci	struct ext4_group_desc	*gdp;
436462306a36Sopenharmony_ci	struct buffer_head	*bh;
436562306a36Sopenharmony_ci	ext4_fsblk_t		block;
436662306a36Sopenharmony_ci	struct blk_plug		plug;
436762306a36Sopenharmony_ci	int			inodes_per_block, inode_offset;
436862306a36Sopenharmony_ci
436962306a36Sopenharmony_ci	iloc->bh = NULL;
437062306a36Sopenharmony_ci	if (ino < EXT4_ROOT_INO ||
437162306a36Sopenharmony_ci	    ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
437262306a36Sopenharmony_ci		return -EFSCORRUPTED;
437362306a36Sopenharmony_ci
437462306a36Sopenharmony_ci	iloc->block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
437562306a36Sopenharmony_ci	gdp = ext4_get_group_desc(sb, iloc->block_group, NULL);
437662306a36Sopenharmony_ci	if (!gdp)
437762306a36Sopenharmony_ci		return -EIO;
437862306a36Sopenharmony_ci
437962306a36Sopenharmony_ci	/*
438062306a36Sopenharmony_ci	 * Figure out the offset within the block group inode table
438162306a36Sopenharmony_ci	 */
438262306a36Sopenharmony_ci	inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
438362306a36Sopenharmony_ci	inode_offset = ((ino - 1) %
438462306a36Sopenharmony_ci			EXT4_INODES_PER_GROUP(sb));
438562306a36Sopenharmony_ci	iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
438662306a36Sopenharmony_ci
438762306a36Sopenharmony_ci	block = ext4_inode_table(sb, gdp);
438862306a36Sopenharmony_ci	if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
438962306a36Sopenharmony_ci	    (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
439062306a36Sopenharmony_ci		ext4_error(sb, "Invalid inode table block %llu in "
439162306a36Sopenharmony_ci			   "block_group %u", block, iloc->block_group);
439262306a36Sopenharmony_ci		return -EFSCORRUPTED;
439362306a36Sopenharmony_ci	}
439462306a36Sopenharmony_ci	block += (inode_offset / inodes_per_block);
439562306a36Sopenharmony_ci
439662306a36Sopenharmony_ci	bh = sb_getblk(sb, block);
439762306a36Sopenharmony_ci	if (unlikely(!bh))
439862306a36Sopenharmony_ci		return -ENOMEM;
439962306a36Sopenharmony_ci	if (ext4_buffer_uptodate(bh))
440062306a36Sopenharmony_ci		goto has_buffer;
440162306a36Sopenharmony_ci
440262306a36Sopenharmony_ci	lock_buffer(bh);
440362306a36Sopenharmony_ci	if (ext4_buffer_uptodate(bh)) {
440462306a36Sopenharmony_ci		/* Someone brought it uptodate while we waited */
440562306a36Sopenharmony_ci		unlock_buffer(bh);
440662306a36Sopenharmony_ci		goto has_buffer;
440762306a36Sopenharmony_ci	}
440862306a36Sopenharmony_ci
440962306a36Sopenharmony_ci	/*
441062306a36Sopenharmony_ci	 * If we have all information of the inode in memory and this
441162306a36Sopenharmony_ci	 * is the only valid inode in the block, we need not read the
441262306a36Sopenharmony_ci	 * block.
441362306a36Sopenharmony_ci	 */
441462306a36Sopenharmony_ci	if (inode && !ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
441562306a36Sopenharmony_ci		struct buffer_head *bitmap_bh;
441662306a36Sopenharmony_ci		int i, start;
441762306a36Sopenharmony_ci
441862306a36Sopenharmony_ci		start = inode_offset & ~(inodes_per_block - 1);
441962306a36Sopenharmony_ci
442062306a36Sopenharmony_ci		/* Is the inode bitmap in cache? */
442162306a36Sopenharmony_ci		bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp));
442262306a36Sopenharmony_ci		if (unlikely(!bitmap_bh))
442362306a36Sopenharmony_ci			goto make_io;
442462306a36Sopenharmony_ci
442562306a36Sopenharmony_ci		/*
442662306a36Sopenharmony_ci		 * If the inode bitmap isn't in cache then the
442762306a36Sopenharmony_ci		 * optimisation may end up performing two reads instead
442862306a36Sopenharmony_ci		 * of one, so skip it.
442962306a36Sopenharmony_ci		 */
443062306a36Sopenharmony_ci		if (!buffer_uptodate(bitmap_bh)) {
443162306a36Sopenharmony_ci			brelse(bitmap_bh);
443262306a36Sopenharmony_ci			goto make_io;
443362306a36Sopenharmony_ci		}
443462306a36Sopenharmony_ci		for (i = start; i < start + inodes_per_block; i++) {
443562306a36Sopenharmony_ci			if (i == inode_offset)
443662306a36Sopenharmony_ci				continue;
443762306a36Sopenharmony_ci			if (ext4_test_bit(i, bitmap_bh->b_data))
443862306a36Sopenharmony_ci				break;
443962306a36Sopenharmony_ci		}
444062306a36Sopenharmony_ci		brelse(bitmap_bh);
444162306a36Sopenharmony_ci		if (i == start + inodes_per_block) {
444262306a36Sopenharmony_ci			struct ext4_inode *raw_inode =
444362306a36Sopenharmony_ci				(struct ext4_inode *) (bh->b_data + iloc->offset);
444462306a36Sopenharmony_ci
444562306a36Sopenharmony_ci			/* all other inodes are free, so skip I/O */
444662306a36Sopenharmony_ci			memset(bh->b_data, 0, bh->b_size);
444762306a36Sopenharmony_ci			if (!ext4_test_inode_state(inode, EXT4_STATE_NEW))
444862306a36Sopenharmony_ci				ext4_fill_raw_inode(inode, raw_inode);
444962306a36Sopenharmony_ci			set_buffer_uptodate(bh);
445062306a36Sopenharmony_ci			unlock_buffer(bh);
445162306a36Sopenharmony_ci			goto has_buffer;
445262306a36Sopenharmony_ci		}
445362306a36Sopenharmony_ci	}
445462306a36Sopenharmony_ci
445562306a36Sopenharmony_cimake_io:
445662306a36Sopenharmony_ci	/*
445762306a36Sopenharmony_ci	 * If we need to do any I/O, try to pre-readahead extra
445862306a36Sopenharmony_ci	 * blocks from the inode table.
445962306a36Sopenharmony_ci	 */
446062306a36Sopenharmony_ci	blk_start_plug(&plug);
446162306a36Sopenharmony_ci	if (EXT4_SB(sb)->s_inode_readahead_blks) {
446262306a36Sopenharmony_ci		ext4_fsblk_t b, end, table;
446362306a36Sopenharmony_ci		unsigned num;
446462306a36Sopenharmony_ci		__u32 ra_blks = EXT4_SB(sb)->s_inode_readahead_blks;
446562306a36Sopenharmony_ci
446662306a36Sopenharmony_ci		table = ext4_inode_table(sb, gdp);
446762306a36Sopenharmony_ci		/* s_inode_readahead_blks is always a power of 2 */
446862306a36Sopenharmony_ci		b = block & ~((ext4_fsblk_t) ra_blks - 1);
446962306a36Sopenharmony_ci		if (table > b)
447062306a36Sopenharmony_ci			b = table;
447162306a36Sopenharmony_ci		end = b + ra_blks;
447262306a36Sopenharmony_ci		num = EXT4_INODES_PER_GROUP(sb);
447362306a36Sopenharmony_ci		if (ext4_has_group_desc_csum(sb))
447462306a36Sopenharmony_ci			num -= ext4_itable_unused_count(sb, gdp);
447562306a36Sopenharmony_ci		table += num / inodes_per_block;
447662306a36Sopenharmony_ci		if (end > table)
447762306a36Sopenharmony_ci			end = table;
447862306a36Sopenharmony_ci		while (b <= end)
447962306a36Sopenharmony_ci			ext4_sb_breadahead_unmovable(sb, b++);
448062306a36Sopenharmony_ci	}
448162306a36Sopenharmony_ci
448262306a36Sopenharmony_ci	/*
448362306a36Sopenharmony_ci	 * There are other valid inodes in the buffer, this inode
448462306a36Sopenharmony_ci	 * has in-inode xattrs, or we don't have this inode in memory.
448562306a36Sopenharmony_ci	 * Read the block from disk.
448662306a36Sopenharmony_ci	 */
448762306a36Sopenharmony_ci	trace_ext4_load_inode(sb, ino);
448862306a36Sopenharmony_ci	ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL);
448962306a36Sopenharmony_ci	blk_finish_plug(&plug);
449062306a36Sopenharmony_ci	wait_on_buffer(bh);
449162306a36Sopenharmony_ci	ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO);
449262306a36Sopenharmony_ci	if (!buffer_uptodate(bh)) {
449362306a36Sopenharmony_ci		if (ret_block)
449462306a36Sopenharmony_ci			*ret_block = block;
449562306a36Sopenharmony_ci		brelse(bh);
449662306a36Sopenharmony_ci		return -EIO;
449762306a36Sopenharmony_ci	}
449862306a36Sopenharmony_cihas_buffer:
449962306a36Sopenharmony_ci	iloc->bh = bh;
450062306a36Sopenharmony_ci	return 0;
450162306a36Sopenharmony_ci}
450262306a36Sopenharmony_ci
450362306a36Sopenharmony_cistatic int __ext4_get_inode_loc_noinmem(struct inode *inode,
450462306a36Sopenharmony_ci					struct ext4_iloc *iloc)
450562306a36Sopenharmony_ci{
450662306a36Sopenharmony_ci	ext4_fsblk_t err_blk = 0;
450762306a36Sopenharmony_ci	int ret;
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ci	ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, NULL, iloc,
451062306a36Sopenharmony_ci					&err_blk);
451162306a36Sopenharmony_ci
451262306a36Sopenharmony_ci	if (ret == -EIO)
451362306a36Sopenharmony_ci		ext4_error_inode_block(inode, err_blk, EIO,
451462306a36Sopenharmony_ci					"unable to read itable block");
451562306a36Sopenharmony_ci
451662306a36Sopenharmony_ci	return ret;
451762306a36Sopenharmony_ci}
451862306a36Sopenharmony_ci
451962306a36Sopenharmony_ciint ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
452062306a36Sopenharmony_ci{
452162306a36Sopenharmony_ci	ext4_fsblk_t err_blk = 0;
452262306a36Sopenharmony_ci	int ret;
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci	ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, inode, iloc,
452562306a36Sopenharmony_ci					&err_blk);
452662306a36Sopenharmony_ci
452762306a36Sopenharmony_ci	if (ret == -EIO)
452862306a36Sopenharmony_ci		ext4_error_inode_block(inode, err_blk, EIO,
452962306a36Sopenharmony_ci					"unable to read itable block");
453062306a36Sopenharmony_ci
453162306a36Sopenharmony_ci	return ret;
453262306a36Sopenharmony_ci}
453362306a36Sopenharmony_ci
453462306a36Sopenharmony_ci
453562306a36Sopenharmony_ciint ext4_get_fc_inode_loc(struct super_block *sb, unsigned long ino,
453662306a36Sopenharmony_ci			  struct ext4_iloc *iloc)
453762306a36Sopenharmony_ci{
453862306a36Sopenharmony_ci	return __ext4_get_inode_loc(sb, ino, NULL, iloc, NULL);
453962306a36Sopenharmony_ci}
454062306a36Sopenharmony_ci
454162306a36Sopenharmony_cistatic bool ext4_should_enable_dax(struct inode *inode)
454262306a36Sopenharmony_ci{
454362306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
454462306a36Sopenharmony_ci
454562306a36Sopenharmony_ci	if (test_opt2(inode->i_sb, DAX_NEVER))
454662306a36Sopenharmony_ci		return false;
454762306a36Sopenharmony_ci	if (!S_ISREG(inode->i_mode))
454862306a36Sopenharmony_ci		return false;
454962306a36Sopenharmony_ci	if (ext4_should_journal_data(inode))
455062306a36Sopenharmony_ci		return false;
455162306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
455262306a36Sopenharmony_ci		return false;
455362306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT))
455462306a36Sopenharmony_ci		return false;
455562306a36Sopenharmony_ci	if (ext4_test_inode_flag(inode, EXT4_INODE_VERITY))
455662306a36Sopenharmony_ci		return false;
455762306a36Sopenharmony_ci	if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags))
455862306a36Sopenharmony_ci		return false;
455962306a36Sopenharmony_ci	if (test_opt(inode->i_sb, DAX_ALWAYS))
456062306a36Sopenharmony_ci		return true;
456162306a36Sopenharmony_ci
456262306a36Sopenharmony_ci	return ext4_test_inode_flag(inode, EXT4_INODE_DAX);
456362306a36Sopenharmony_ci}
456462306a36Sopenharmony_ci
456562306a36Sopenharmony_civoid ext4_set_inode_flags(struct inode *inode, bool init)
456662306a36Sopenharmony_ci{
456762306a36Sopenharmony_ci	unsigned int flags = EXT4_I(inode)->i_flags;
456862306a36Sopenharmony_ci	unsigned int new_fl = 0;
456962306a36Sopenharmony_ci
457062306a36Sopenharmony_ci	WARN_ON_ONCE(IS_DAX(inode) && init);
457162306a36Sopenharmony_ci
457262306a36Sopenharmony_ci	if (flags & EXT4_SYNC_FL)
457362306a36Sopenharmony_ci		new_fl |= S_SYNC;
457462306a36Sopenharmony_ci	if (flags & EXT4_APPEND_FL)
457562306a36Sopenharmony_ci		new_fl |= S_APPEND;
457662306a36Sopenharmony_ci	if (flags & EXT4_IMMUTABLE_FL)
457762306a36Sopenharmony_ci		new_fl |= S_IMMUTABLE;
457862306a36Sopenharmony_ci	if (flags & EXT4_NOATIME_FL)
457962306a36Sopenharmony_ci		new_fl |= S_NOATIME;
458062306a36Sopenharmony_ci	if (flags & EXT4_DIRSYNC_FL)
458162306a36Sopenharmony_ci		new_fl |= S_DIRSYNC;
458262306a36Sopenharmony_ci
458362306a36Sopenharmony_ci	/* Because of the way inode_set_flags() works we must preserve S_DAX
458462306a36Sopenharmony_ci	 * here if already set. */
458562306a36Sopenharmony_ci	new_fl |= (inode->i_flags & S_DAX);
458662306a36Sopenharmony_ci	if (init && ext4_should_enable_dax(inode))
458762306a36Sopenharmony_ci		new_fl |= S_DAX;
458862306a36Sopenharmony_ci
458962306a36Sopenharmony_ci	if (flags & EXT4_ENCRYPT_FL)
459062306a36Sopenharmony_ci		new_fl |= S_ENCRYPTED;
459162306a36Sopenharmony_ci	if (flags & EXT4_CASEFOLD_FL)
459262306a36Sopenharmony_ci		new_fl |= S_CASEFOLD;
459362306a36Sopenharmony_ci	if (flags & EXT4_VERITY_FL)
459462306a36Sopenharmony_ci		new_fl |= S_VERITY;
459562306a36Sopenharmony_ci	inode_set_flags(inode, new_fl,
459662306a36Sopenharmony_ci			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX|
459762306a36Sopenharmony_ci			S_ENCRYPTED|S_CASEFOLD|S_VERITY);
459862306a36Sopenharmony_ci}
459962306a36Sopenharmony_ci
460062306a36Sopenharmony_cistatic blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
460162306a36Sopenharmony_ci				  struct ext4_inode_info *ei)
460262306a36Sopenharmony_ci{
460362306a36Sopenharmony_ci	blkcnt_t i_blocks ;
460462306a36Sopenharmony_ci	struct inode *inode = &(ei->vfs_inode);
460562306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
460662306a36Sopenharmony_ci
460762306a36Sopenharmony_ci	if (ext4_has_feature_huge_file(sb)) {
460862306a36Sopenharmony_ci		/* we are using combined 48 bit field */
460962306a36Sopenharmony_ci		i_blocks = ((u64)le16_to_cpu(raw_inode->i_blocks_high)) << 32 |
461062306a36Sopenharmony_ci					le32_to_cpu(raw_inode->i_blocks_lo);
461162306a36Sopenharmony_ci		if (ext4_test_inode_flag(inode, EXT4_INODE_HUGE_FILE)) {
461262306a36Sopenharmony_ci			/* i_blocks represent file system block size */
461362306a36Sopenharmony_ci			return i_blocks  << (inode->i_blkbits - 9);
461462306a36Sopenharmony_ci		} else {
461562306a36Sopenharmony_ci			return i_blocks;
461662306a36Sopenharmony_ci		}
461762306a36Sopenharmony_ci	} else {
461862306a36Sopenharmony_ci		return le32_to_cpu(raw_inode->i_blocks_lo);
461962306a36Sopenharmony_ci	}
462062306a36Sopenharmony_ci}
462162306a36Sopenharmony_ci
462262306a36Sopenharmony_cistatic inline int ext4_iget_extra_inode(struct inode *inode,
462362306a36Sopenharmony_ci					 struct ext4_inode *raw_inode,
462462306a36Sopenharmony_ci					 struct ext4_inode_info *ei)
462562306a36Sopenharmony_ci{
462662306a36Sopenharmony_ci	__le32 *magic = (void *)raw_inode +
462762306a36Sopenharmony_ci			EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
462862306a36Sopenharmony_ci
462962306a36Sopenharmony_ci	if (EXT4_INODE_HAS_XATTR_SPACE(inode)  &&
463062306a36Sopenharmony_ci	    *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
463162306a36Sopenharmony_ci		int err;
463262306a36Sopenharmony_ci
463362306a36Sopenharmony_ci		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
463462306a36Sopenharmony_ci		err = ext4_find_inline_data_nolock(inode);
463562306a36Sopenharmony_ci		if (!err && ext4_has_inline_data(inode))
463662306a36Sopenharmony_ci			ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
463762306a36Sopenharmony_ci		return err;
463862306a36Sopenharmony_ci	} else
463962306a36Sopenharmony_ci		EXT4_I(inode)->i_inline_off = 0;
464062306a36Sopenharmony_ci	return 0;
464162306a36Sopenharmony_ci}
464262306a36Sopenharmony_ci
464362306a36Sopenharmony_ciint ext4_get_projid(struct inode *inode, kprojid_t *projid)
464462306a36Sopenharmony_ci{
464562306a36Sopenharmony_ci	if (!ext4_has_feature_project(inode->i_sb))
464662306a36Sopenharmony_ci		return -EOPNOTSUPP;
464762306a36Sopenharmony_ci	*projid = EXT4_I(inode)->i_projid;
464862306a36Sopenharmony_ci	return 0;
464962306a36Sopenharmony_ci}
465062306a36Sopenharmony_ci
465162306a36Sopenharmony_ci/*
465262306a36Sopenharmony_ci * ext4 has self-managed i_version for ea inodes, it stores the lower 32bit of
465362306a36Sopenharmony_ci * refcount in i_version, so use raw values if inode has EXT4_EA_INODE_FL flag
465462306a36Sopenharmony_ci * set.
465562306a36Sopenharmony_ci */
465662306a36Sopenharmony_cistatic inline void ext4_inode_set_iversion_queried(struct inode *inode, u64 val)
465762306a36Sopenharmony_ci{
465862306a36Sopenharmony_ci	if (unlikely(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
465962306a36Sopenharmony_ci		inode_set_iversion_raw(inode, val);
466062306a36Sopenharmony_ci	else
466162306a36Sopenharmony_ci		inode_set_iversion_queried(inode, val);
466262306a36Sopenharmony_ci}
466362306a36Sopenharmony_ci
466462306a36Sopenharmony_cistatic const char *check_igot_inode(struct inode *inode, ext4_iget_flags flags)
466562306a36Sopenharmony_ci
466662306a36Sopenharmony_ci{
466762306a36Sopenharmony_ci	if (flags & EXT4_IGET_EA_INODE) {
466862306a36Sopenharmony_ci		if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
466962306a36Sopenharmony_ci			return "missing EA_INODE flag";
467062306a36Sopenharmony_ci		if (ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
467162306a36Sopenharmony_ci		    EXT4_I(inode)->i_file_acl)
467262306a36Sopenharmony_ci			return "ea_inode with extended attributes";
467362306a36Sopenharmony_ci	} else {
467462306a36Sopenharmony_ci		if ((EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL))
467562306a36Sopenharmony_ci			return "unexpected EA_INODE flag";
467662306a36Sopenharmony_ci	}
467762306a36Sopenharmony_ci	if (is_bad_inode(inode) && !(flags & EXT4_IGET_BAD))
467862306a36Sopenharmony_ci		return "unexpected bad inode w/o EXT4_IGET_BAD";
467962306a36Sopenharmony_ci	return NULL;
468062306a36Sopenharmony_ci}
468162306a36Sopenharmony_ci
468262306a36Sopenharmony_cistruct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
468362306a36Sopenharmony_ci			  ext4_iget_flags flags, const char *function,
468462306a36Sopenharmony_ci			  unsigned int line)
468562306a36Sopenharmony_ci{
468662306a36Sopenharmony_ci	struct ext4_iloc iloc;
468762306a36Sopenharmony_ci	struct ext4_inode *raw_inode;
468862306a36Sopenharmony_ci	struct ext4_inode_info *ei;
468962306a36Sopenharmony_ci	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
469062306a36Sopenharmony_ci	struct inode *inode;
469162306a36Sopenharmony_ci	const char *err_str;
469262306a36Sopenharmony_ci	journal_t *journal = EXT4_SB(sb)->s_journal;
469362306a36Sopenharmony_ci	long ret;
469462306a36Sopenharmony_ci	loff_t size;
469562306a36Sopenharmony_ci	int block;
469662306a36Sopenharmony_ci	uid_t i_uid;
469762306a36Sopenharmony_ci	gid_t i_gid;
469862306a36Sopenharmony_ci	projid_t i_projid;
469962306a36Sopenharmony_ci
470062306a36Sopenharmony_ci	if ((!(flags & EXT4_IGET_SPECIAL) &&
470162306a36Sopenharmony_ci	     ((ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO) ||
470262306a36Sopenharmony_ci	      ino == le32_to_cpu(es->s_usr_quota_inum) ||
470362306a36Sopenharmony_ci	      ino == le32_to_cpu(es->s_grp_quota_inum) ||
470462306a36Sopenharmony_ci	      ino == le32_to_cpu(es->s_prj_quota_inum) ||
470562306a36Sopenharmony_ci	      ino == le32_to_cpu(es->s_orphan_file_inum))) ||
470662306a36Sopenharmony_ci	    (ino < EXT4_ROOT_INO) ||
470762306a36Sopenharmony_ci	    (ino > le32_to_cpu(es->s_inodes_count))) {
470862306a36Sopenharmony_ci		if (flags & EXT4_IGET_HANDLE)
470962306a36Sopenharmony_ci			return ERR_PTR(-ESTALE);
471062306a36Sopenharmony_ci		__ext4_error(sb, function, line, false, EFSCORRUPTED, 0,
471162306a36Sopenharmony_ci			     "inode #%lu: comm %s: iget: illegal inode #",
471262306a36Sopenharmony_ci			     ino, current->comm);
471362306a36Sopenharmony_ci		return ERR_PTR(-EFSCORRUPTED);
471462306a36Sopenharmony_ci	}
471562306a36Sopenharmony_ci
471662306a36Sopenharmony_ci	inode = iget_locked(sb, ino);
471762306a36Sopenharmony_ci	if (!inode)
471862306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
471962306a36Sopenharmony_ci	if (!(inode->i_state & I_NEW)) {
472062306a36Sopenharmony_ci		if ((err_str = check_igot_inode(inode, flags)) != NULL) {
472162306a36Sopenharmony_ci			ext4_error_inode(inode, function, line, 0, err_str);
472262306a36Sopenharmony_ci			iput(inode);
472362306a36Sopenharmony_ci			return ERR_PTR(-EFSCORRUPTED);
472462306a36Sopenharmony_ci		}
472562306a36Sopenharmony_ci		return inode;
472662306a36Sopenharmony_ci	}
472762306a36Sopenharmony_ci
472862306a36Sopenharmony_ci	ei = EXT4_I(inode);
472962306a36Sopenharmony_ci	iloc.bh = NULL;
473062306a36Sopenharmony_ci
473162306a36Sopenharmony_ci	ret = __ext4_get_inode_loc_noinmem(inode, &iloc);
473262306a36Sopenharmony_ci	if (ret < 0)
473362306a36Sopenharmony_ci		goto bad_inode;
473462306a36Sopenharmony_ci	raw_inode = ext4_raw_inode(&iloc);
473562306a36Sopenharmony_ci
473662306a36Sopenharmony_ci	if ((flags & EXT4_IGET_HANDLE) &&
473762306a36Sopenharmony_ci	    (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
473862306a36Sopenharmony_ci		ret = -ESTALE;
473962306a36Sopenharmony_ci		goto bad_inode;
474062306a36Sopenharmony_ci	}
474162306a36Sopenharmony_ci
474262306a36Sopenharmony_ci	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
474362306a36Sopenharmony_ci		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
474462306a36Sopenharmony_ci		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
474562306a36Sopenharmony_ci			EXT4_INODE_SIZE(inode->i_sb) ||
474662306a36Sopenharmony_ci		    (ei->i_extra_isize & 3)) {
474762306a36Sopenharmony_ci			ext4_error_inode(inode, function, line, 0,
474862306a36Sopenharmony_ci					 "iget: bad extra_isize %u "
474962306a36Sopenharmony_ci					 "(inode size %u)",
475062306a36Sopenharmony_ci					 ei->i_extra_isize,
475162306a36Sopenharmony_ci					 EXT4_INODE_SIZE(inode->i_sb));
475262306a36Sopenharmony_ci			ret = -EFSCORRUPTED;
475362306a36Sopenharmony_ci			goto bad_inode;
475462306a36Sopenharmony_ci		}
475562306a36Sopenharmony_ci	} else
475662306a36Sopenharmony_ci		ei->i_extra_isize = 0;
475762306a36Sopenharmony_ci
475862306a36Sopenharmony_ci	/* Precompute checksum seed for inode metadata */
475962306a36Sopenharmony_ci	if (ext4_has_metadata_csum(sb)) {
476062306a36Sopenharmony_ci		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
476162306a36Sopenharmony_ci		__u32 csum;
476262306a36Sopenharmony_ci		__le32 inum = cpu_to_le32(inode->i_ino);
476362306a36Sopenharmony_ci		__le32 gen = raw_inode->i_generation;
476462306a36Sopenharmony_ci		csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&inum,
476562306a36Sopenharmony_ci				   sizeof(inum));
476662306a36Sopenharmony_ci		ei->i_csum_seed = ext4_chksum(sbi, csum, (__u8 *)&gen,
476762306a36Sopenharmony_ci					      sizeof(gen));
476862306a36Sopenharmony_ci	}
476962306a36Sopenharmony_ci
477062306a36Sopenharmony_ci	if ((!ext4_inode_csum_verify(inode, raw_inode, ei) ||
477162306a36Sopenharmony_ci	    ext4_simulate_fail(sb, EXT4_SIM_INODE_CRC)) &&
477262306a36Sopenharmony_ci	     (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))) {
477362306a36Sopenharmony_ci		ext4_error_inode_err(inode, function, line, 0,
477462306a36Sopenharmony_ci				EFSBADCRC, "iget: checksum invalid");
477562306a36Sopenharmony_ci		ret = -EFSBADCRC;
477662306a36Sopenharmony_ci		goto bad_inode;
477762306a36Sopenharmony_ci	}
477862306a36Sopenharmony_ci
477962306a36Sopenharmony_ci	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
478062306a36Sopenharmony_ci	i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
478162306a36Sopenharmony_ci	i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
478262306a36Sopenharmony_ci	if (ext4_has_feature_project(sb) &&
478362306a36Sopenharmony_ci	    EXT4_INODE_SIZE(sb) > EXT4_GOOD_OLD_INODE_SIZE &&
478462306a36Sopenharmony_ci	    EXT4_FITS_IN_INODE(raw_inode, ei, i_projid))
478562306a36Sopenharmony_ci		i_projid = (projid_t)le32_to_cpu(raw_inode->i_projid);
478662306a36Sopenharmony_ci	else
478762306a36Sopenharmony_ci		i_projid = EXT4_DEF_PROJID;
478862306a36Sopenharmony_ci
478962306a36Sopenharmony_ci	if (!(test_opt(inode->i_sb, NO_UID32))) {
479062306a36Sopenharmony_ci		i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
479162306a36Sopenharmony_ci		i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
479262306a36Sopenharmony_ci	}
479362306a36Sopenharmony_ci	i_uid_write(inode, i_uid);
479462306a36Sopenharmony_ci	i_gid_write(inode, i_gid);
479562306a36Sopenharmony_ci	ei->i_projid = make_kprojid(&init_user_ns, i_projid);
479662306a36Sopenharmony_ci	set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
479762306a36Sopenharmony_ci
479862306a36Sopenharmony_ci	ext4_clear_state_flags(ei);	/* Only relevant on 32-bit archs */
479962306a36Sopenharmony_ci	ei->i_inline_off = 0;
480062306a36Sopenharmony_ci	ei->i_dir_start_lookup = 0;
480162306a36Sopenharmony_ci	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
480262306a36Sopenharmony_ci	/* We now have enough fields to check if the inode was active or not.
480362306a36Sopenharmony_ci	 * This is needed because nfsd might try to access dead inodes
480462306a36Sopenharmony_ci	 * the test is that same one that e2fsck uses
480562306a36Sopenharmony_ci	 * NeilBrown 1999oct15
480662306a36Sopenharmony_ci	 */
480762306a36Sopenharmony_ci	if (inode->i_nlink == 0) {
480862306a36Sopenharmony_ci		if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
480962306a36Sopenharmony_ci		     !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
481062306a36Sopenharmony_ci		    ino != EXT4_BOOT_LOADER_INO) {
481162306a36Sopenharmony_ci			/* this inode is deleted or unallocated */
481262306a36Sopenharmony_ci			if (flags & EXT4_IGET_SPECIAL) {
481362306a36Sopenharmony_ci				ext4_error_inode(inode, function, line, 0,
481462306a36Sopenharmony_ci						 "iget: special inode unallocated");
481562306a36Sopenharmony_ci				ret = -EFSCORRUPTED;
481662306a36Sopenharmony_ci			} else
481762306a36Sopenharmony_ci				ret = -ESTALE;
481862306a36Sopenharmony_ci			goto bad_inode;
481962306a36Sopenharmony_ci		}
482062306a36Sopenharmony_ci		/* The only unlinked inodes we let through here have
482162306a36Sopenharmony_ci		 * valid i_mode and are being read by the orphan
482262306a36Sopenharmony_ci		 * recovery code: that's fine, we're about to complete
482362306a36Sopenharmony_ci		 * the process of deleting those.
482462306a36Sopenharmony_ci		 * OR it is the EXT4_BOOT_LOADER_INO which is
482562306a36Sopenharmony_ci		 * not initialized on a new filesystem. */
482662306a36Sopenharmony_ci	}
482762306a36Sopenharmony_ci	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
482862306a36Sopenharmony_ci	ext4_set_inode_flags(inode, true);
482962306a36Sopenharmony_ci	inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
483062306a36Sopenharmony_ci	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
483162306a36Sopenharmony_ci	if (ext4_has_feature_64bit(sb))
483262306a36Sopenharmony_ci		ei->i_file_acl |=
483362306a36Sopenharmony_ci			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
483462306a36Sopenharmony_ci	inode->i_size = ext4_isize(sb, raw_inode);
483562306a36Sopenharmony_ci	if ((size = i_size_read(inode)) < 0) {
483662306a36Sopenharmony_ci		ext4_error_inode(inode, function, line, 0,
483762306a36Sopenharmony_ci				 "iget: bad i_size value: %lld", size);
483862306a36Sopenharmony_ci		ret = -EFSCORRUPTED;
483962306a36Sopenharmony_ci		goto bad_inode;
484062306a36Sopenharmony_ci	}
484162306a36Sopenharmony_ci	/*
484262306a36Sopenharmony_ci	 * If dir_index is not enabled but there's dir with INDEX flag set,
484362306a36Sopenharmony_ci	 * we'd normally treat htree data as empty space. But with metadata
484462306a36Sopenharmony_ci	 * checksumming that corrupts checksums so forbid that.
484562306a36Sopenharmony_ci	 */
484662306a36Sopenharmony_ci	if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) &&
484762306a36Sopenharmony_ci	    ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) {
484862306a36Sopenharmony_ci		ext4_error_inode(inode, function, line, 0,
484962306a36Sopenharmony_ci			 "iget: Dir with htree data on filesystem without dir_index feature.");
485062306a36Sopenharmony_ci		ret = -EFSCORRUPTED;
485162306a36Sopenharmony_ci		goto bad_inode;
485262306a36Sopenharmony_ci	}
485362306a36Sopenharmony_ci	ei->i_disksize = inode->i_size;
485462306a36Sopenharmony_ci#ifdef CONFIG_QUOTA
485562306a36Sopenharmony_ci	ei->i_reserved_quota = 0;
485662306a36Sopenharmony_ci#endif
485762306a36Sopenharmony_ci	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
485862306a36Sopenharmony_ci	ei->i_block_group = iloc.block_group;
485962306a36Sopenharmony_ci	ei->i_last_alloc_group = ~0;
486062306a36Sopenharmony_ci	/*
486162306a36Sopenharmony_ci	 * NOTE! The in-memory inode i_data array is in little-endian order
486262306a36Sopenharmony_ci	 * even on big-endian machines: we do NOT byteswap the block numbers!
486362306a36Sopenharmony_ci	 */
486462306a36Sopenharmony_ci	for (block = 0; block < EXT4_N_BLOCKS; block++)
486562306a36Sopenharmony_ci		ei->i_data[block] = raw_inode->i_block[block];
486662306a36Sopenharmony_ci	INIT_LIST_HEAD(&ei->i_orphan);
486762306a36Sopenharmony_ci	ext4_fc_init_inode(&ei->vfs_inode);
486862306a36Sopenharmony_ci
486962306a36Sopenharmony_ci	/*
487062306a36Sopenharmony_ci	 * Set transaction id's of transactions that have to be committed
487162306a36Sopenharmony_ci	 * to finish f[data]sync. We set them to currently running transaction
487262306a36Sopenharmony_ci	 * as we cannot be sure that the inode or some of its metadata isn't
487362306a36Sopenharmony_ci	 * part of the transaction - the inode could have been reclaimed and
487462306a36Sopenharmony_ci	 * now it is reread from disk.
487562306a36Sopenharmony_ci	 */
487662306a36Sopenharmony_ci	if (journal) {
487762306a36Sopenharmony_ci		transaction_t *transaction;
487862306a36Sopenharmony_ci		tid_t tid;
487962306a36Sopenharmony_ci
488062306a36Sopenharmony_ci		read_lock(&journal->j_state_lock);
488162306a36Sopenharmony_ci		if (journal->j_running_transaction)
488262306a36Sopenharmony_ci			transaction = journal->j_running_transaction;
488362306a36Sopenharmony_ci		else
488462306a36Sopenharmony_ci			transaction = journal->j_committing_transaction;
488562306a36Sopenharmony_ci		if (transaction)
488662306a36Sopenharmony_ci			tid = transaction->t_tid;
488762306a36Sopenharmony_ci		else
488862306a36Sopenharmony_ci			tid = journal->j_commit_sequence;
488962306a36Sopenharmony_ci		read_unlock(&journal->j_state_lock);
489062306a36Sopenharmony_ci		ei->i_sync_tid = tid;
489162306a36Sopenharmony_ci		ei->i_datasync_tid = tid;
489262306a36Sopenharmony_ci	}
489362306a36Sopenharmony_ci
489462306a36Sopenharmony_ci	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
489562306a36Sopenharmony_ci		if (ei->i_extra_isize == 0) {
489662306a36Sopenharmony_ci			/* The extra space is currently unused. Use it. */
489762306a36Sopenharmony_ci			BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);
489862306a36Sopenharmony_ci			ei->i_extra_isize = sizeof(struct ext4_inode) -
489962306a36Sopenharmony_ci					    EXT4_GOOD_OLD_INODE_SIZE;
490062306a36Sopenharmony_ci		} else {
490162306a36Sopenharmony_ci			ret = ext4_iget_extra_inode(inode, raw_inode, ei);
490262306a36Sopenharmony_ci			if (ret)
490362306a36Sopenharmony_ci				goto bad_inode;
490462306a36Sopenharmony_ci		}
490562306a36Sopenharmony_ci	}
490662306a36Sopenharmony_ci
490762306a36Sopenharmony_ci	EXT4_INODE_GET_CTIME(inode, raw_inode);
490862306a36Sopenharmony_ci	EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
490962306a36Sopenharmony_ci	EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
491062306a36Sopenharmony_ci	EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
491162306a36Sopenharmony_ci
491262306a36Sopenharmony_ci	if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) {
491362306a36Sopenharmony_ci		u64 ivers = le32_to_cpu(raw_inode->i_disk_version);
491462306a36Sopenharmony_ci
491562306a36Sopenharmony_ci		if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
491662306a36Sopenharmony_ci			if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi))
491762306a36Sopenharmony_ci				ivers |=
491862306a36Sopenharmony_ci		    (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
491962306a36Sopenharmony_ci		}
492062306a36Sopenharmony_ci		ext4_inode_set_iversion_queried(inode, ivers);
492162306a36Sopenharmony_ci	}
492262306a36Sopenharmony_ci
492362306a36Sopenharmony_ci	ret = 0;
492462306a36Sopenharmony_ci	if (ei->i_file_acl &&
492562306a36Sopenharmony_ci	    !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) {
492662306a36Sopenharmony_ci		ext4_error_inode(inode, function, line, 0,
492762306a36Sopenharmony_ci				 "iget: bad extended attribute block %llu",
492862306a36Sopenharmony_ci				 ei->i_file_acl);
492962306a36Sopenharmony_ci		ret = -EFSCORRUPTED;
493062306a36Sopenharmony_ci		goto bad_inode;
493162306a36Sopenharmony_ci	} else if (!ext4_has_inline_data(inode)) {
493262306a36Sopenharmony_ci		/* validate the block references in the inode */
493362306a36Sopenharmony_ci		if (!(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
493462306a36Sopenharmony_ci			(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
493562306a36Sopenharmony_ci			(S_ISLNK(inode->i_mode) &&
493662306a36Sopenharmony_ci			!ext4_inode_is_fast_symlink(inode)))) {
493762306a36Sopenharmony_ci			if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
493862306a36Sopenharmony_ci				ret = ext4_ext_check_inode(inode);
493962306a36Sopenharmony_ci			else
494062306a36Sopenharmony_ci				ret = ext4_ind_check_inode(inode);
494162306a36Sopenharmony_ci		}
494262306a36Sopenharmony_ci	}
494362306a36Sopenharmony_ci	if (ret)
494462306a36Sopenharmony_ci		goto bad_inode;
494562306a36Sopenharmony_ci
494662306a36Sopenharmony_ci	if (S_ISREG(inode->i_mode)) {
494762306a36Sopenharmony_ci		inode->i_op = &ext4_file_inode_operations;
494862306a36Sopenharmony_ci		inode->i_fop = &ext4_file_operations;
494962306a36Sopenharmony_ci		ext4_set_aops(inode);
495062306a36Sopenharmony_ci	} else if (S_ISDIR(inode->i_mode)) {
495162306a36Sopenharmony_ci		inode->i_op = &ext4_dir_inode_operations;
495262306a36Sopenharmony_ci		inode->i_fop = &ext4_dir_operations;
495362306a36Sopenharmony_ci	} else if (S_ISLNK(inode->i_mode)) {
495462306a36Sopenharmony_ci		/* VFS does not allow setting these so must be corruption */
495562306a36Sopenharmony_ci		if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
495662306a36Sopenharmony_ci			ext4_error_inode(inode, function, line, 0,
495762306a36Sopenharmony_ci					 "iget: immutable or append flags "
495862306a36Sopenharmony_ci					 "not allowed on symlinks");
495962306a36Sopenharmony_ci			ret = -EFSCORRUPTED;
496062306a36Sopenharmony_ci			goto bad_inode;
496162306a36Sopenharmony_ci		}
496262306a36Sopenharmony_ci		if (IS_ENCRYPTED(inode)) {
496362306a36Sopenharmony_ci			inode->i_op = &ext4_encrypted_symlink_inode_operations;
496462306a36Sopenharmony_ci		} else if (ext4_inode_is_fast_symlink(inode)) {
496562306a36Sopenharmony_ci			inode->i_link = (char *)ei->i_data;
496662306a36Sopenharmony_ci			inode->i_op = &ext4_fast_symlink_inode_operations;
496762306a36Sopenharmony_ci			nd_terminate_link(ei->i_data, inode->i_size,
496862306a36Sopenharmony_ci				sizeof(ei->i_data) - 1);
496962306a36Sopenharmony_ci		} else {
497062306a36Sopenharmony_ci			inode->i_op = &ext4_symlink_inode_operations;
497162306a36Sopenharmony_ci		}
497262306a36Sopenharmony_ci	} else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
497362306a36Sopenharmony_ci	      S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
497462306a36Sopenharmony_ci		inode->i_op = &ext4_special_inode_operations;
497562306a36Sopenharmony_ci		if (raw_inode->i_block[0])
497662306a36Sopenharmony_ci			init_special_inode(inode, inode->i_mode,
497762306a36Sopenharmony_ci			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
497862306a36Sopenharmony_ci		else
497962306a36Sopenharmony_ci			init_special_inode(inode, inode->i_mode,
498062306a36Sopenharmony_ci			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
498162306a36Sopenharmony_ci	} else if (ino == EXT4_BOOT_LOADER_INO) {
498262306a36Sopenharmony_ci		make_bad_inode(inode);
498362306a36Sopenharmony_ci	} else {
498462306a36Sopenharmony_ci		ret = -EFSCORRUPTED;
498562306a36Sopenharmony_ci		ext4_error_inode(inode, function, line, 0,
498662306a36Sopenharmony_ci				 "iget: bogus i_mode (%o)", inode->i_mode);
498762306a36Sopenharmony_ci		goto bad_inode;
498862306a36Sopenharmony_ci	}
498962306a36Sopenharmony_ci	if (IS_CASEFOLDED(inode) && !ext4_has_feature_casefold(inode->i_sb)) {
499062306a36Sopenharmony_ci		ext4_error_inode(inode, function, line, 0,
499162306a36Sopenharmony_ci				 "casefold flag without casefold feature");
499262306a36Sopenharmony_ci		ret = -EFSCORRUPTED;
499362306a36Sopenharmony_ci		goto bad_inode;
499462306a36Sopenharmony_ci	}
499562306a36Sopenharmony_ci	if ((err_str = check_igot_inode(inode, flags)) != NULL) {
499662306a36Sopenharmony_ci		ext4_error_inode(inode, function, line, 0, err_str);
499762306a36Sopenharmony_ci		ret = -EFSCORRUPTED;
499862306a36Sopenharmony_ci		goto bad_inode;
499962306a36Sopenharmony_ci	}
500062306a36Sopenharmony_ci
500162306a36Sopenharmony_ci	brelse(iloc.bh);
500262306a36Sopenharmony_ci	unlock_new_inode(inode);
500362306a36Sopenharmony_ci	return inode;
500462306a36Sopenharmony_ci
500562306a36Sopenharmony_cibad_inode:
500662306a36Sopenharmony_ci	brelse(iloc.bh);
500762306a36Sopenharmony_ci	iget_failed(inode);
500862306a36Sopenharmony_ci	return ERR_PTR(ret);
500962306a36Sopenharmony_ci}
501062306a36Sopenharmony_ci
501162306a36Sopenharmony_cistatic void __ext4_update_other_inode_time(struct super_block *sb,
501262306a36Sopenharmony_ci					   unsigned long orig_ino,
501362306a36Sopenharmony_ci					   unsigned long ino,
501462306a36Sopenharmony_ci					   struct ext4_inode *raw_inode)
501562306a36Sopenharmony_ci{
501662306a36Sopenharmony_ci	struct inode *inode;
501762306a36Sopenharmony_ci
501862306a36Sopenharmony_ci	inode = find_inode_by_ino_rcu(sb, ino);
501962306a36Sopenharmony_ci	if (!inode)
502062306a36Sopenharmony_ci		return;
502162306a36Sopenharmony_ci
502262306a36Sopenharmony_ci	if (!inode_is_dirtytime_only(inode))
502362306a36Sopenharmony_ci		return;
502462306a36Sopenharmony_ci
502562306a36Sopenharmony_ci	spin_lock(&inode->i_lock);
502662306a36Sopenharmony_ci	if (inode_is_dirtytime_only(inode)) {
502762306a36Sopenharmony_ci		struct ext4_inode_info	*ei = EXT4_I(inode);
502862306a36Sopenharmony_ci
502962306a36Sopenharmony_ci		inode->i_state &= ~I_DIRTY_TIME;
503062306a36Sopenharmony_ci		spin_unlock(&inode->i_lock);
503162306a36Sopenharmony_ci
503262306a36Sopenharmony_ci		spin_lock(&ei->i_raw_lock);
503362306a36Sopenharmony_ci		EXT4_INODE_SET_CTIME(inode, raw_inode);
503462306a36Sopenharmony_ci		EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
503562306a36Sopenharmony_ci		EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
503662306a36Sopenharmony_ci		ext4_inode_csum_set(inode, raw_inode, ei);
503762306a36Sopenharmony_ci		spin_unlock(&ei->i_raw_lock);
503862306a36Sopenharmony_ci		trace_ext4_other_inode_update_time(inode, orig_ino);
503962306a36Sopenharmony_ci		return;
504062306a36Sopenharmony_ci	}
504162306a36Sopenharmony_ci	spin_unlock(&inode->i_lock);
504262306a36Sopenharmony_ci}
504362306a36Sopenharmony_ci
504462306a36Sopenharmony_ci/*
504562306a36Sopenharmony_ci * Opportunistically update the other time fields for other inodes in
504662306a36Sopenharmony_ci * the same inode table block.
504762306a36Sopenharmony_ci */
504862306a36Sopenharmony_cistatic void ext4_update_other_inodes_time(struct super_block *sb,
504962306a36Sopenharmony_ci					  unsigned long orig_ino, char *buf)
505062306a36Sopenharmony_ci{
505162306a36Sopenharmony_ci	unsigned long ino;
505262306a36Sopenharmony_ci	int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
505362306a36Sopenharmony_ci	int inode_size = EXT4_INODE_SIZE(sb);
505462306a36Sopenharmony_ci
505562306a36Sopenharmony_ci	/*
505662306a36Sopenharmony_ci	 * Calculate the first inode in the inode table block.  Inode
505762306a36Sopenharmony_ci	 * numbers are one-based.  That is, the first inode in a block
505862306a36Sopenharmony_ci	 * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1).
505962306a36Sopenharmony_ci	 */
506062306a36Sopenharmony_ci	ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1;
506162306a36Sopenharmony_ci	rcu_read_lock();
506262306a36Sopenharmony_ci	for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
506362306a36Sopenharmony_ci		if (ino == orig_ino)
506462306a36Sopenharmony_ci			continue;
506562306a36Sopenharmony_ci		__ext4_update_other_inode_time(sb, orig_ino, ino,
506662306a36Sopenharmony_ci					       (struct ext4_inode *)buf);
506762306a36Sopenharmony_ci	}
506862306a36Sopenharmony_ci	rcu_read_unlock();
506962306a36Sopenharmony_ci}
507062306a36Sopenharmony_ci
507162306a36Sopenharmony_ci/*
507262306a36Sopenharmony_ci * Post the struct inode info into an on-disk inode location in the
507362306a36Sopenharmony_ci * buffer-cache.  This gobbles the caller's reference to the
507462306a36Sopenharmony_ci * buffer_head in the inode location struct.
507562306a36Sopenharmony_ci *
507662306a36Sopenharmony_ci * The caller must have write access to iloc->bh.
507762306a36Sopenharmony_ci */
507862306a36Sopenharmony_cistatic int ext4_do_update_inode(handle_t *handle,
507962306a36Sopenharmony_ci				struct inode *inode,
508062306a36Sopenharmony_ci				struct ext4_iloc *iloc)
508162306a36Sopenharmony_ci{
508262306a36Sopenharmony_ci	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
508362306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
508462306a36Sopenharmony_ci	struct buffer_head *bh = iloc->bh;
508562306a36Sopenharmony_ci	struct super_block *sb = inode->i_sb;
508662306a36Sopenharmony_ci	int err;
508762306a36Sopenharmony_ci	int need_datasync = 0, set_large_file = 0;
508862306a36Sopenharmony_ci
508962306a36Sopenharmony_ci	spin_lock(&ei->i_raw_lock);
509062306a36Sopenharmony_ci
509162306a36Sopenharmony_ci	/*
509262306a36Sopenharmony_ci	 * For fields not tracked in the in-memory inode, initialise them
509362306a36Sopenharmony_ci	 * to zero for new inodes.
509462306a36Sopenharmony_ci	 */
509562306a36Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
509662306a36Sopenharmony_ci		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
509762306a36Sopenharmony_ci
509862306a36Sopenharmony_ci	if (READ_ONCE(ei->i_disksize) != ext4_isize(inode->i_sb, raw_inode))
509962306a36Sopenharmony_ci		need_datasync = 1;
510062306a36Sopenharmony_ci	if (ei->i_disksize > 0x7fffffffULL) {
510162306a36Sopenharmony_ci		if (!ext4_has_feature_large_file(sb) ||
510262306a36Sopenharmony_ci		    EXT4_SB(sb)->s_es->s_rev_level == cpu_to_le32(EXT4_GOOD_OLD_REV))
510362306a36Sopenharmony_ci			set_large_file = 1;
510462306a36Sopenharmony_ci	}
510562306a36Sopenharmony_ci
510662306a36Sopenharmony_ci	err = ext4_fill_raw_inode(inode, raw_inode);
510762306a36Sopenharmony_ci	spin_unlock(&ei->i_raw_lock);
510862306a36Sopenharmony_ci	if (err) {
510962306a36Sopenharmony_ci		EXT4_ERROR_INODE(inode, "corrupted inode contents");
511062306a36Sopenharmony_ci		goto out_brelse;
511162306a36Sopenharmony_ci	}
511262306a36Sopenharmony_ci
511362306a36Sopenharmony_ci	if (inode->i_sb->s_flags & SB_LAZYTIME)
511462306a36Sopenharmony_ci		ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
511562306a36Sopenharmony_ci					      bh->b_data);
511662306a36Sopenharmony_ci
511762306a36Sopenharmony_ci	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
511862306a36Sopenharmony_ci	err = ext4_handle_dirty_metadata(handle, NULL, bh);
511962306a36Sopenharmony_ci	if (err)
512062306a36Sopenharmony_ci		goto out_error;
512162306a36Sopenharmony_ci	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
512262306a36Sopenharmony_ci	if (set_large_file) {
512362306a36Sopenharmony_ci		BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
512462306a36Sopenharmony_ci		err = ext4_journal_get_write_access(handle, sb,
512562306a36Sopenharmony_ci						    EXT4_SB(sb)->s_sbh,
512662306a36Sopenharmony_ci						    EXT4_JTR_NONE);
512762306a36Sopenharmony_ci		if (err)
512862306a36Sopenharmony_ci			goto out_error;
512962306a36Sopenharmony_ci		lock_buffer(EXT4_SB(sb)->s_sbh);
513062306a36Sopenharmony_ci		ext4_set_feature_large_file(sb);
513162306a36Sopenharmony_ci		ext4_superblock_csum_set(sb);
513262306a36Sopenharmony_ci		unlock_buffer(EXT4_SB(sb)->s_sbh);
513362306a36Sopenharmony_ci		ext4_handle_sync(handle);
513462306a36Sopenharmony_ci		err = ext4_handle_dirty_metadata(handle, NULL,
513562306a36Sopenharmony_ci						 EXT4_SB(sb)->s_sbh);
513662306a36Sopenharmony_ci	}
513762306a36Sopenharmony_ci	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
513862306a36Sopenharmony_ciout_error:
513962306a36Sopenharmony_ci	ext4_std_error(inode->i_sb, err);
514062306a36Sopenharmony_ciout_brelse:
514162306a36Sopenharmony_ci	brelse(bh);
514262306a36Sopenharmony_ci	return err;
514362306a36Sopenharmony_ci}
514462306a36Sopenharmony_ci
514562306a36Sopenharmony_ci/*
514662306a36Sopenharmony_ci * ext4_write_inode()
514762306a36Sopenharmony_ci *
514862306a36Sopenharmony_ci * We are called from a few places:
514962306a36Sopenharmony_ci *
515062306a36Sopenharmony_ci * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
515162306a36Sopenharmony_ci *   Here, there will be no transaction running. We wait for any running
515262306a36Sopenharmony_ci *   transaction to commit.
515362306a36Sopenharmony_ci *
515462306a36Sopenharmony_ci * - Within flush work (sys_sync(), kupdate and such).
515562306a36Sopenharmony_ci *   We wait on commit, if told to.
515662306a36Sopenharmony_ci *
515762306a36Sopenharmony_ci * - Within iput_final() -> write_inode_now()
515862306a36Sopenharmony_ci *   We wait on commit, if told to.
515962306a36Sopenharmony_ci *
516062306a36Sopenharmony_ci * In all cases it is actually safe for us to return without doing anything,
516162306a36Sopenharmony_ci * because the inode has been copied into a raw inode buffer in
516262306a36Sopenharmony_ci * ext4_mark_inode_dirty().  This is a correctness thing for WB_SYNC_ALL
516362306a36Sopenharmony_ci * writeback.
516462306a36Sopenharmony_ci *
516562306a36Sopenharmony_ci * Note that we are absolutely dependent upon all inode dirtiers doing the
516662306a36Sopenharmony_ci * right thing: they *must* call mark_inode_dirty() after dirtying info in
516762306a36Sopenharmony_ci * which we are interested.
516862306a36Sopenharmony_ci *
516962306a36Sopenharmony_ci * It would be a bug for them to not do this.  The code:
517062306a36Sopenharmony_ci *
517162306a36Sopenharmony_ci *	mark_inode_dirty(inode)
517262306a36Sopenharmony_ci *	stuff();
517362306a36Sopenharmony_ci *	inode->i_size = expr;
517462306a36Sopenharmony_ci *
517562306a36Sopenharmony_ci * is in error because write_inode() could occur while `stuff()' is running,
517662306a36Sopenharmony_ci * and the new i_size will be lost.  Plus the inode will no longer be on the
517762306a36Sopenharmony_ci * superblock's dirty inode list.
517862306a36Sopenharmony_ci */
517962306a36Sopenharmony_ciint ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
518062306a36Sopenharmony_ci{
518162306a36Sopenharmony_ci	int err;
518262306a36Sopenharmony_ci
518362306a36Sopenharmony_ci	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
518462306a36Sopenharmony_ci		return 0;
518562306a36Sopenharmony_ci
518662306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
518762306a36Sopenharmony_ci		return -EIO;
518862306a36Sopenharmony_ci
518962306a36Sopenharmony_ci	if (EXT4_SB(inode->i_sb)->s_journal) {
519062306a36Sopenharmony_ci		if (ext4_journal_current_handle()) {
519162306a36Sopenharmony_ci			ext4_debug("called recursively, non-PF_MEMALLOC!\n");
519262306a36Sopenharmony_ci			dump_stack();
519362306a36Sopenharmony_ci			return -EIO;
519462306a36Sopenharmony_ci		}
519562306a36Sopenharmony_ci
519662306a36Sopenharmony_ci		/*
519762306a36Sopenharmony_ci		 * No need to force transaction in WB_SYNC_NONE mode. Also
519862306a36Sopenharmony_ci		 * ext4_sync_fs() will force the commit after everything is
519962306a36Sopenharmony_ci		 * written.
520062306a36Sopenharmony_ci		 */
520162306a36Sopenharmony_ci		if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
520262306a36Sopenharmony_ci			return 0;
520362306a36Sopenharmony_ci
520462306a36Sopenharmony_ci		err = ext4_fc_commit(EXT4_SB(inode->i_sb)->s_journal,
520562306a36Sopenharmony_ci						EXT4_I(inode)->i_sync_tid);
520662306a36Sopenharmony_ci	} else {
520762306a36Sopenharmony_ci		struct ext4_iloc iloc;
520862306a36Sopenharmony_ci
520962306a36Sopenharmony_ci		err = __ext4_get_inode_loc_noinmem(inode, &iloc);
521062306a36Sopenharmony_ci		if (err)
521162306a36Sopenharmony_ci			return err;
521262306a36Sopenharmony_ci		/*
521362306a36Sopenharmony_ci		 * sync(2) will flush the whole buffer cache. No need to do
521462306a36Sopenharmony_ci		 * it here separately for each inode.
521562306a36Sopenharmony_ci		 */
521662306a36Sopenharmony_ci		if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
521762306a36Sopenharmony_ci			sync_dirty_buffer(iloc.bh);
521862306a36Sopenharmony_ci		if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) {
521962306a36Sopenharmony_ci			ext4_error_inode_block(inode, iloc.bh->b_blocknr, EIO,
522062306a36Sopenharmony_ci					       "IO error syncing inode");
522162306a36Sopenharmony_ci			err = -EIO;
522262306a36Sopenharmony_ci		}
522362306a36Sopenharmony_ci		brelse(iloc.bh);
522462306a36Sopenharmony_ci	}
522562306a36Sopenharmony_ci	return err;
522662306a36Sopenharmony_ci}
522762306a36Sopenharmony_ci
522862306a36Sopenharmony_ci/*
522962306a36Sopenharmony_ci * In data=journal mode ext4_journalled_invalidate_folio() may fail to invalidate
523062306a36Sopenharmony_ci * buffers that are attached to a folio straddling i_size and are undergoing
523162306a36Sopenharmony_ci * commit. In that case we have to wait for commit to finish and try again.
523262306a36Sopenharmony_ci */
523362306a36Sopenharmony_cistatic void ext4_wait_for_tail_page_commit(struct inode *inode)
523462306a36Sopenharmony_ci{
523562306a36Sopenharmony_ci	unsigned offset;
523662306a36Sopenharmony_ci	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
523762306a36Sopenharmony_ci	tid_t commit_tid = 0;
523862306a36Sopenharmony_ci	int ret;
523962306a36Sopenharmony_ci
524062306a36Sopenharmony_ci	offset = inode->i_size & (PAGE_SIZE - 1);
524162306a36Sopenharmony_ci	/*
524262306a36Sopenharmony_ci	 * If the folio is fully truncated, we don't need to wait for any commit
524362306a36Sopenharmony_ci	 * (and we even should not as __ext4_journalled_invalidate_folio() may
524462306a36Sopenharmony_ci	 * strip all buffers from the folio but keep the folio dirty which can then
524562306a36Sopenharmony_ci	 * confuse e.g. concurrent ext4_writepages() seeing dirty folio without
524662306a36Sopenharmony_ci	 * buffers). Also we don't need to wait for any commit if all buffers in
524762306a36Sopenharmony_ci	 * the folio remain valid. This is most beneficial for the common case of
524862306a36Sopenharmony_ci	 * blocksize == PAGESIZE.
524962306a36Sopenharmony_ci	 */
525062306a36Sopenharmony_ci	if (!offset || offset > (PAGE_SIZE - i_blocksize(inode)))
525162306a36Sopenharmony_ci		return;
525262306a36Sopenharmony_ci	while (1) {
525362306a36Sopenharmony_ci		struct folio *folio = filemap_lock_folio(inode->i_mapping,
525462306a36Sopenharmony_ci				      inode->i_size >> PAGE_SHIFT);
525562306a36Sopenharmony_ci		if (IS_ERR(folio))
525662306a36Sopenharmony_ci			return;
525762306a36Sopenharmony_ci		ret = __ext4_journalled_invalidate_folio(folio, offset,
525862306a36Sopenharmony_ci						folio_size(folio) - offset);
525962306a36Sopenharmony_ci		folio_unlock(folio);
526062306a36Sopenharmony_ci		folio_put(folio);
526162306a36Sopenharmony_ci		if (ret != -EBUSY)
526262306a36Sopenharmony_ci			return;
526362306a36Sopenharmony_ci		commit_tid = 0;
526462306a36Sopenharmony_ci		read_lock(&journal->j_state_lock);
526562306a36Sopenharmony_ci		if (journal->j_committing_transaction)
526662306a36Sopenharmony_ci			commit_tid = journal->j_committing_transaction->t_tid;
526762306a36Sopenharmony_ci		read_unlock(&journal->j_state_lock);
526862306a36Sopenharmony_ci		if (commit_tid)
526962306a36Sopenharmony_ci			jbd2_log_wait_commit(journal, commit_tid);
527062306a36Sopenharmony_ci	}
527162306a36Sopenharmony_ci}
527262306a36Sopenharmony_ci
527362306a36Sopenharmony_ci/*
527462306a36Sopenharmony_ci * ext4_setattr()
527562306a36Sopenharmony_ci *
527662306a36Sopenharmony_ci * Called from notify_change.
527762306a36Sopenharmony_ci *
527862306a36Sopenharmony_ci * We want to trap VFS attempts to truncate the file as soon as
527962306a36Sopenharmony_ci * possible.  In particular, we want to make sure that when the VFS
528062306a36Sopenharmony_ci * shrinks i_size, we put the inode on the orphan list and modify
528162306a36Sopenharmony_ci * i_disksize immediately, so that during the subsequent flushing of
528262306a36Sopenharmony_ci * dirty pages and freeing of disk blocks, we can guarantee that any
528362306a36Sopenharmony_ci * commit will leave the blocks being flushed in an unused state on
528462306a36Sopenharmony_ci * disk.  (On recovery, the inode will get truncated and the blocks will
528562306a36Sopenharmony_ci * be freed, so we have a strong guarantee that no future commit will
528662306a36Sopenharmony_ci * leave these blocks visible to the user.)
528762306a36Sopenharmony_ci *
528862306a36Sopenharmony_ci * Another thing we have to assure is that if we are in ordered mode
528962306a36Sopenharmony_ci * and inode is still attached to the committing transaction, we must
529062306a36Sopenharmony_ci * we start writeout of all the dirty pages which are being truncated.
529162306a36Sopenharmony_ci * This way we are sure that all the data written in the previous
529262306a36Sopenharmony_ci * transaction are already on disk (truncate waits for pages under
529362306a36Sopenharmony_ci * writeback).
529462306a36Sopenharmony_ci *
529562306a36Sopenharmony_ci * Called with inode->i_rwsem down.
529662306a36Sopenharmony_ci */
529762306a36Sopenharmony_ciint ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
529862306a36Sopenharmony_ci		 struct iattr *attr)
529962306a36Sopenharmony_ci{
530062306a36Sopenharmony_ci	struct inode *inode = d_inode(dentry);
530162306a36Sopenharmony_ci	int error, rc = 0;
530262306a36Sopenharmony_ci	int orphan = 0;
530362306a36Sopenharmony_ci	const unsigned int ia_valid = attr->ia_valid;
530462306a36Sopenharmony_ci	bool inc_ivers = true;
530562306a36Sopenharmony_ci
530662306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
530762306a36Sopenharmony_ci		return -EIO;
530862306a36Sopenharmony_ci
530962306a36Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
531062306a36Sopenharmony_ci		return -EPERM;
531162306a36Sopenharmony_ci
531262306a36Sopenharmony_ci	if (unlikely(IS_APPEND(inode) &&
531362306a36Sopenharmony_ci		     (ia_valid & (ATTR_MODE | ATTR_UID |
531462306a36Sopenharmony_ci				  ATTR_GID | ATTR_TIMES_SET))))
531562306a36Sopenharmony_ci		return -EPERM;
531662306a36Sopenharmony_ci
531762306a36Sopenharmony_ci	error = setattr_prepare(idmap, dentry, attr);
531862306a36Sopenharmony_ci	if (error)
531962306a36Sopenharmony_ci		return error;
532062306a36Sopenharmony_ci
532162306a36Sopenharmony_ci	error = fscrypt_prepare_setattr(dentry, attr);
532262306a36Sopenharmony_ci	if (error)
532362306a36Sopenharmony_ci		return error;
532462306a36Sopenharmony_ci
532562306a36Sopenharmony_ci	error = fsverity_prepare_setattr(dentry, attr);
532662306a36Sopenharmony_ci	if (error)
532762306a36Sopenharmony_ci		return error;
532862306a36Sopenharmony_ci
532962306a36Sopenharmony_ci	if (is_quota_modification(idmap, inode, attr)) {
533062306a36Sopenharmony_ci		error = dquot_initialize(inode);
533162306a36Sopenharmony_ci		if (error)
533262306a36Sopenharmony_ci			return error;
533362306a36Sopenharmony_ci	}
533462306a36Sopenharmony_ci
533562306a36Sopenharmony_ci	if (i_uid_needs_update(idmap, attr, inode) ||
533662306a36Sopenharmony_ci	    i_gid_needs_update(idmap, attr, inode)) {
533762306a36Sopenharmony_ci		handle_t *handle;
533862306a36Sopenharmony_ci
533962306a36Sopenharmony_ci		/* (user+group)*(old+new) structure, inode write (sb,
534062306a36Sopenharmony_ci		 * inode block, ? - but truncate inode update has it) */
534162306a36Sopenharmony_ci		handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
534262306a36Sopenharmony_ci			(EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb) +
534362306a36Sopenharmony_ci			 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)) + 3);
534462306a36Sopenharmony_ci		if (IS_ERR(handle)) {
534562306a36Sopenharmony_ci			error = PTR_ERR(handle);
534662306a36Sopenharmony_ci			goto err_out;
534762306a36Sopenharmony_ci		}
534862306a36Sopenharmony_ci
534962306a36Sopenharmony_ci		/* dquot_transfer() calls back ext4_get_inode_usage() which
535062306a36Sopenharmony_ci		 * counts xattr inode references.
535162306a36Sopenharmony_ci		 */
535262306a36Sopenharmony_ci		down_read(&EXT4_I(inode)->xattr_sem);
535362306a36Sopenharmony_ci		error = dquot_transfer(idmap, inode, attr);
535462306a36Sopenharmony_ci		up_read(&EXT4_I(inode)->xattr_sem);
535562306a36Sopenharmony_ci
535662306a36Sopenharmony_ci		if (error) {
535762306a36Sopenharmony_ci			ext4_journal_stop(handle);
535862306a36Sopenharmony_ci			return error;
535962306a36Sopenharmony_ci		}
536062306a36Sopenharmony_ci		/* Update corresponding info in inode so that everything is in
536162306a36Sopenharmony_ci		 * one transaction */
536262306a36Sopenharmony_ci		i_uid_update(idmap, attr, inode);
536362306a36Sopenharmony_ci		i_gid_update(idmap, attr, inode);
536462306a36Sopenharmony_ci		error = ext4_mark_inode_dirty(handle, inode);
536562306a36Sopenharmony_ci		ext4_journal_stop(handle);
536662306a36Sopenharmony_ci		if (unlikely(error)) {
536762306a36Sopenharmony_ci			return error;
536862306a36Sopenharmony_ci		}
536962306a36Sopenharmony_ci	}
537062306a36Sopenharmony_ci
537162306a36Sopenharmony_ci	if (attr->ia_valid & ATTR_SIZE) {
537262306a36Sopenharmony_ci		handle_t *handle;
537362306a36Sopenharmony_ci		loff_t oldsize = inode->i_size;
537462306a36Sopenharmony_ci		loff_t old_disksize;
537562306a36Sopenharmony_ci		int shrink = (attr->ia_size < inode->i_size);
537662306a36Sopenharmony_ci
537762306a36Sopenharmony_ci		if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
537862306a36Sopenharmony_ci			struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
537962306a36Sopenharmony_ci
538062306a36Sopenharmony_ci			if (attr->ia_size > sbi->s_bitmap_maxbytes) {
538162306a36Sopenharmony_ci				return -EFBIG;
538262306a36Sopenharmony_ci			}
538362306a36Sopenharmony_ci		}
538462306a36Sopenharmony_ci		if (!S_ISREG(inode->i_mode)) {
538562306a36Sopenharmony_ci			return -EINVAL;
538662306a36Sopenharmony_ci		}
538762306a36Sopenharmony_ci
538862306a36Sopenharmony_ci		if (attr->ia_size == inode->i_size)
538962306a36Sopenharmony_ci			inc_ivers = false;
539062306a36Sopenharmony_ci
539162306a36Sopenharmony_ci		if (shrink) {
539262306a36Sopenharmony_ci			if (ext4_should_order_data(inode)) {
539362306a36Sopenharmony_ci				error = ext4_begin_ordered_truncate(inode,
539462306a36Sopenharmony_ci							    attr->ia_size);
539562306a36Sopenharmony_ci				if (error)
539662306a36Sopenharmony_ci					goto err_out;
539762306a36Sopenharmony_ci			}
539862306a36Sopenharmony_ci			/*
539962306a36Sopenharmony_ci			 * Blocks are going to be removed from the inode. Wait
540062306a36Sopenharmony_ci			 * for dio in flight.
540162306a36Sopenharmony_ci			 */
540262306a36Sopenharmony_ci			inode_dio_wait(inode);
540362306a36Sopenharmony_ci		}
540462306a36Sopenharmony_ci
540562306a36Sopenharmony_ci		filemap_invalidate_lock(inode->i_mapping);
540662306a36Sopenharmony_ci
540762306a36Sopenharmony_ci		rc = ext4_break_layouts(inode);
540862306a36Sopenharmony_ci		if (rc) {
540962306a36Sopenharmony_ci			filemap_invalidate_unlock(inode->i_mapping);
541062306a36Sopenharmony_ci			goto err_out;
541162306a36Sopenharmony_ci		}
541262306a36Sopenharmony_ci
541362306a36Sopenharmony_ci		if (attr->ia_size != inode->i_size) {
541462306a36Sopenharmony_ci			handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
541562306a36Sopenharmony_ci			if (IS_ERR(handle)) {
541662306a36Sopenharmony_ci				error = PTR_ERR(handle);
541762306a36Sopenharmony_ci				goto out_mmap_sem;
541862306a36Sopenharmony_ci			}
541962306a36Sopenharmony_ci			if (ext4_handle_valid(handle) && shrink) {
542062306a36Sopenharmony_ci				error = ext4_orphan_add(handle, inode);
542162306a36Sopenharmony_ci				orphan = 1;
542262306a36Sopenharmony_ci			}
542362306a36Sopenharmony_ci			/*
542462306a36Sopenharmony_ci			 * Update c/mtime on truncate up, ext4_truncate() will
542562306a36Sopenharmony_ci			 * update c/mtime in shrink case below
542662306a36Sopenharmony_ci			 */
542762306a36Sopenharmony_ci			if (!shrink)
542862306a36Sopenharmony_ci				inode->i_mtime = inode_set_ctime_current(inode);
542962306a36Sopenharmony_ci
543062306a36Sopenharmony_ci			if (shrink)
543162306a36Sopenharmony_ci				ext4_fc_track_range(handle, inode,
543262306a36Sopenharmony_ci					(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
543362306a36Sopenharmony_ci					inode->i_sb->s_blocksize_bits,
543462306a36Sopenharmony_ci					EXT_MAX_BLOCKS - 1);
543562306a36Sopenharmony_ci			else
543662306a36Sopenharmony_ci				ext4_fc_track_range(
543762306a36Sopenharmony_ci					handle, inode,
543862306a36Sopenharmony_ci					(oldsize > 0 ? oldsize - 1 : oldsize) >>
543962306a36Sopenharmony_ci					inode->i_sb->s_blocksize_bits,
544062306a36Sopenharmony_ci					(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
544162306a36Sopenharmony_ci					inode->i_sb->s_blocksize_bits);
544262306a36Sopenharmony_ci
544362306a36Sopenharmony_ci			down_write(&EXT4_I(inode)->i_data_sem);
544462306a36Sopenharmony_ci			old_disksize = EXT4_I(inode)->i_disksize;
544562306a36Sopenharmony_ci			EXT4_I(inode)->i_disksize = attr->ia_size;
544662306a36Sopenharmony_ci			rc = ext4_mark_inode_dirty(handle, inode);
544762306a36Sopenharmony_ci			if (!error)
544862306a36Sopenharmony_ci				error = rc;
544962306a36Sopenharmony_ci			/*
545062306a36Sopenharmony_ci			 * We have to update i_size under i_data_sem together
545162306a36Sopenharmony_ci			 * with i_disksize to avoid races with writeback code
545262306a36Sopenharmony_ci			 * running ext4_wb_update_i_disksize().
545362306a36Sopenharmony_ci			 */
545462306a36Sopenharmony_ci			if (!error)
545562306a36Sopenharmony_ci				i_size_write(inode, attr->ia_size);
545662306a36Sopenharmony_ci			else
545762306a36Sopenharmony_ci				EXT4_I(inode)->i_disksize = old_disksize;
545862306a36Sopenharmony_ci			up_write(&EXT4_I(inode)->i_data_sem);
545962306a36Sopenharmony_ci			ext4_journal_stop(handle);
546062306a36Sopenharmony_ci			if (error)
546162306a36Sopenharmony_ci				goto out_mmap_sem;
546262306a36Sopenharmony_ci			if (!shrink) {
546362306a36Sopenharmony_ci				pagecache_isize_extended(inode, oldsize,
546462306a36Sopenharmony_ci							 inode->i_size);
546562306a36Sopenharmony_ci			} else if (ext4_should_journal_data(inode)) {
546662306a36Sopenharmony_ci				ext4_wait_for_tail_page_commit(inode);
546762306a36Sopenharmony_ci			}
546862306a36Sopenharmony_ci		}
546962306a36Sopenharmony_ci
547062306a36Sopenharmony_ci		/*
547162306a36Sopenharmony_ci		 * Truncate pagecache after we've waited for commit
547262306a36Sopenharmony_ci		 * in data=journal mode to make pages freeable.
547362306a36Sopenharmony_ci		 */
547462306a36Sopenharmony_ci		truncate_pagecache(inode, inode->i_size);
547562306a36Sopenharmony_ci		/*
547662306a36Sopenharmony_ci		 * Call ext4_truncate() even if i_size didn't change to
547762306a36Sopenharmony_ci		 * truncate possible preallocated blocks.
547862306a36Sopenharmony_ci		 */
547962306a36Sopenharmony_ci		if (attr->ia_size <= oldsize) {
548062306a36Sopenharmony_ci			rc = ext4_truncate(inode);
548162306a36Sopenharmony_ci			if (rc)
548262306a36Sopenharmony_ci				error = rc;
548362306a36Sopenharmony_ci		}
548462306a36Sopenharmony_ciout_mmap_sem:
548562306a36Sopenharmony_ci		filemap_invalidate_unlock(inode->i_mapping);
548662306a36Sopenharmony_ci	}
548762306a36Sopenharmony_ci
548862306a36Sopenharmony_ci	if (!error) {
548962306a36Sopenharmony_ci		if (inc_ivers)
549062306a36Sopenharmony_ci			inode_inc_iversion(inode);
549162306a36Sopenharmony_ci		setattr_copy(idmap, inode, attr);
549262306a36Sopenharmony_ci		mark_inode_dirty(inode);
549362306a36Sopenharmony_ci	}
549462306a36Sopenharmony_ci
549562306a36Sopenharmony_ci	/*
549662306a36Sopenharmony_ci	 * If the call to ext4_truncate failed to get a transaction handle at
549762306a36Sopenharmony_ci	 * all, we need to clean up the in-core orphan list manually.
549862306a36Sopenharmony_ci	 */
549962306a36Sopenharmony_ci	if (orphan && inode->i_nlink)
550062306a36Sopenharmony_ci		ext4_orphan_del(NULL, inode);
550162306a36Sopenharmony_ci
550262306a36Sopenharmony_ci	if (!error && (ia_valid & ATTR_MODE))
550362306a36Sopenharmony_ci		rc = posix_acl_chmod(idmap, dentry, inode->i_mode);
550462306a36Sopenharmony_ci
550562306a36Sopenharmony_cierr_out:
550662306a36Sopenharmony_ci	if  (error)
550762306a36Sopenharmony_ci		ext4_std_error(inode->i_sb, error);
550862306a36Sopenharmony_ci	if (!error)
550962306a36Sopenharmony_ci		error = rc;
551062306a36Sopenharmony_ci	return error;
551162306a36Sopenharmony_ci}
551262306a36Sopenharmony_ci
551362306a36Sopenharmony_ciu32 ext4_dio_alignment(struct inode *inode)
551462306a36Sopenharmony_ci{
551562306a36Sopenharmony_ci	if (fsverity_active(inode))
551662306a36Sopenharmony_ci		return 0;
551762306a36Sopenharmony_ci	if (ext4_should_journal_data(inode))
551862306a36Sopenharmony_ci		return 0;
551962306a36Sopenharmony_ci	if (ext4_has_inline_data(inode))
552062306a36Sopenharmony_ci		return 0;
552162306a36Sopenharmony_ci	if (IS_ENCRYPTED(inode)) {
552262306a36Sopenharmony_ci		if (!fscrypt_dio_supported(inode))
552362306a36Sopenharmony_ci			return 0;
552462306a36Sopenharmony_ci		return i_blocksize(inode);
552562306a36Sopenharmony_ci	}
552662306a36Sopenharmony_ci	return 1; /* use the iomap defaults */
552762306a36Sopenharmony_ci}
552862306a36Sopenharmony_ci
552962306a36Sopenharmony_ciint ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
553062306a36Sopenharmony_ci		 struct kstat *stat, u32 request_mask, unsigned int query_flags)
553162306a36Sopenharmony_ci{
553262306a36Sopenharmony_ci	struct inode *inode = d_inode(path->dentry);
553362306a36Sopenharmony_ci	struct ext4_inode *raw_inode;
553462306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
553562306a36Sopenharmony_ci	unsigned int flags;
553662306a36Sopenharmony_ci
553762306a36Sopenharmony_ci	if ((request_mask & STATX_BTIME) &&
553862306a36Sopenharmony_ci	    EXT4_FITS_IN_INODE(raw_inode, ei, i_crtime)) {
553962306a36Sopenharmony_ci		stat->result_mask |= STATX_BTIME;
554062306a36Sopenharmony_ci		stat->btime.tv_sec = ei->i_crtime.tv_sec;
554162306a36Sopenharmony_ci		stat->btime.tv_nsec = ei->i_crtime.tv_nsec;
554262306a36Sopenharmony_ci	}
554362306a36Sopenharmony_ci
554462306a36Sopenharmony_ci	/*
554562306a36Sopenharmony_ci	 * Return the DIO alignment restrictions if requested.  We only return
554662306a36Sopenharmony_ci	 * this information when requested, since on encrypted files it might
554762306a36Sopenharmony_ci	 * take a fair bit of work to get if the file wasn't opened recently.
554862306a36Sopenharmony_ci	 */
554962306a36Sopenharmony_ci	if ((request_mask & STATX_DIOALIGN) && S_ISREG(inode->i_mode)) {
555062306a36Sopenharmony_ci		u32 dio_align = ext4_dio_alignment(inode);
555162306a36Sopenharmony_ci
555262306a36Sopenharmony_ci		stat->result_mask |= STATX_DIOALIGN;
555362306a36Sopenharmony_ci		if (dio_align == 1) {
555462306a36Sopenharmony_ci			struct block_device *bdev = inode->i_sb->s_bdev;
555562306a36Sopenharmony_ci
555662306a36Sopenharmony_ci			/* iomap defaults */
555762306a36Sopenharmony_ci			stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
555862306a36Sopenharmony_ci			stat->dio_offset_align = bdev_logical_block_size(bdev);
555962306a36Sopenharmony_ci		} else {
556062306a36Sopenharmony_ci			stat->dio_mem_align = dio_align;
556162306a36Sopenharmony_ci			stat->dio_offset_align = dio_align;
556262306a36Sopenharmony_ci		}
556362306a36Sopenharmony_ci	}
556462306a36Sopenharmony_ci
556562306a36Sopenharmony_ci	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
556662306a36Sopenharmony_ci	if (flags & EXT4_APPEND_FL)
556762306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_APPEND;
556862306a36Sopenharmony_ci	if (flags & EXT4_COMPR_FL)
556962306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_COMPRESSED;
557062306a36Sopenharmony_ci	if (flags & EXT4_ENCRYPT_FL)
557162306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_ENCRYPTED;
557262306a36Sopenharmony_ci	if (flags & EXT4_IMMUTABLE_FL)
557362306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_IMMUTABLE;
557462306a36Sopenharmony_ci	if (flags & EXT4_NODUMP_FL)
557562306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_NODUMP;
557662306a36Sopenharmony_ci	if (flags & EXT4_VERITY_FL)
557762306a36Sopenharmony_ci		stat->attributes |= STATX_ATTR_VERITY;
557862306a36Sopenharmony_ci
557962306a36Sopenharmony_ci	stat->attributes_mask |= (STATX_ATTR_APPEND |
558062306a36Sopenharmony_ci				  STATX_ATTR_COMPRESSED |
558162306a36Sopenharmony_ci				  STATX_ATTR_ENCRYPTED |
558262306a36Sopenharmony_ci				  STATX_ATTR_IMMUTABLE |
558362306a36Sopenharmony_ci				  STATX_ATTR_NODUMP |
558462306a36Sopenharmony_ci				  STATX_ATTR_VERITY);
558562306a36Sopenharmony_ci
558662306a36Sopenharmony_ci	generic_fillattr(idmap, request_mask, inode, stat);
558762306a36Sopenharmony_ci	return 0;
558862306a36Sopenharmony_ci}
558962306a36Sopenharmony_ci
559062306a36Sopenharmony_ciint ext4_file_getattr(struct mnt_idmap *idmap,
559162306a36Sopenharmony_ci		      const struct path *path, struct kstat *stat,
559262306a36Sopenharmony_ci		      u32 request_mask, unsigned int query_flags)
559362306a36Sopenharmony_ci{
559462306a36Sopenharmony_ci	struct inode *inode = d_inode(path->dentry);
559562306a36Sopenharmony_ci	u64 delalloc_blocks;
559662306a36Sopenharmony_ci
559762306a36Sopenharmony_ci	ext4_getattr(idmap, path, stat, request_mask, query_flags);
559862306a36Sopenharmony_ci
559962306a36Sopenharmony_ci	/*
560062306a36Sopenharmony_ci	 * If there is inline data in the inode, the inode will normally not
560162306a36Sopenharmony_ci	 * have data blocks allocated (it may have an external xattr block).
560262306a36Sopenharmony_ci	 * Report at least one sector for such files, so tools like tar, rsync,
560362306a36Sopenharmony_ci	 * others don't incorrectly think the file is completely sparse.
560462306a36Sopenharmony_ci	 */
560562306a36Sopenharmony_ci	if (unlikely(ext4_has_inline_data(inode)))
560662306a36Sopenharmony_ci		stat->blocks += (stat->size + 511) >> 9;
560762306a36Sopenharmony_ci
560862306a36Sopenharmony_ci	/*
560962306a36Sopenharmony_ci	 * We can't update i_blocks if the block allocation is delayed
561062306a36Sopenharmony_ci	 * otherwise in the case of system crash before the real block
561162306a36Sopenharmony_ci	 * allocation is done, we will have i_blocks inconsistent with
561262306a36Sopenharmony_ci	 * on-disk file blocks.
561362306a36Sopenharmony_ci	 * We always keep i_blocks updated together with real
561462306a36Sopenharmony_ci	 * allocation. But to not confuse with user, stat
561562306a36Sopenharmony_ci	 * will return the blocks that include the delayed allocation
561662306a36Sopenharmony_ci	 * blocks for this file.
561762306a36Sopenharmony_ci	 */
561862306a36Sopenharmony_ci	delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
561962306a36Sopenharmony_ci				   EXT4_I(inode)->i_reserved_data_blocks);
562062306a36Sopenharmony_ci	stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits - 9);
562162306a36Sopenharmony_ci	return 0;
562262306a36Sopenharmony_ci}
562362306a36Sopenharmony_ci
562462306a36Sopenharmony_cistatic int ext4_index_trans_blocks(struct inode *inode, int lblocks,
562562306a36Sopenharmony_ci				   int pextents)
562662306a36Sopenharmony_ci{
562762306a36Sopenharmony_ci	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
562862306a36Sopenharmony_ci		return ext4_ind_trans_blocks(inode, lblocks);
562962306a36Sopenharmony_ci	return ext4_ext_index_trans_blocks(inode, pextents);
563062306a36Sopenharmony_ci}
563162306a36Sopenharmony_ci
563262306a36Sopenharmony_ci/*
563362306a36Sopenharmony_ci * Account for index blocks, block groups bitmaps and block group
563462306a36Sopenharmony_ci * descriptor blocks if modify datablocks and index blocks
563562306a36Sopenharmony_ci * worse case, the indexs blocks spread over different block groups
563662306a36Sopenharmony_ci *
563762306a36Sopenharmony_ci * If datablocks are discontiguous, they are possible to spread over
563862306a36Sopenharmony_ci * different block groups too. If they are contiguous, with flexbg,
563962306a36Sopenharmony_ci * they could still across block group boundary.
564062306a36Sopenharmony_ci *
564162306a36Sopenharmony_ci * Also account for superblock, inode, quota and xattr blocks
564262306a36Sopenharmony_ci */
564362306a36Sopenharmony_cistatic int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
564462306a36Sopenharmony_ci				  int pextents)
564562306a36Sopenharmony_ci{
564662306a36Sopenharmony_ci	ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
564762306a36Sopenharmony_ci	int gdpblocks;
564862306a36Sopenharmony_ci	int idxblocks;
564962306a36Sopenharmony_ci	int ret;
565062306a36Sopenharmony_ci
565162306a36Sopenharmony_ci	/*
565262306a36Sopenharmony_ci	 * How many index blocks need to touch to map @lblocks logical blocks
565362306a36Sopenharmony_ci	 * to @pextents physical extents?
565462306a36Sopenharmony_ci	 */
565562306a36Sopenharmony_ci	idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents);
565662306a36Sopenharmony_ci
565762306a36Sopenharmony_ci	ret = idxblocks;
565862306a36Sopenharmony_ci
565962306a36Sopenharmony_ci	/*
566062306a36Sopenharmony_ci	 * Now let's see how many group bitmaps and group descriptors need
566162306a36Sopenharmony_ci	 * to account
566262306a36Sopenharmony_ci	 */
566362306a36Sopenharmony_ci	groups = idxblocks + pextents;
566462306a36Sopenharmony_ci	gdpblocks = groups;
566562306a36Sopenharmony_ci	if (groups > ngroups)
566662306a36Sopenharmony_ci		groups = ngroups;
566762306a36Sopenharmony_ci	if (groups > EXT4_SB(inode->i_sb)->s_gdb_count)
566862306a36Sopenharmony_ci		gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count;
566962306a36Sopenharmony_ci
567062306a36Sopenharmony_ci	/* bitmaps and block group descriptor blocks */
567162306a36Sopenharmony_ci	ret += groups + gdpblocks;
567262306a36Sopenharmony_ci
567362306a36Sopenharmony_ci	/* Blocks for super block, inode, quota and xattr blocks */
567462306a36Sopenharmony_ci	ret += EXT4_META_TRANS_BLOCKS(inode->i_sb);
567562306a36Sopenharmony_ci
567662306a36Sopenharmony_ci	return ret;
567762306a36Sopenharmony_ci}
567862306a36Sopenharmony_ci
567962306a36Sopenharmony_ci/*
568062306a36Sopenharmony_ci * Calculate the total number of credits to reserve to fit
568162306a36Sopenharmony_ci * the modification of a single pages into a single transaction,
568262306a36Sopenharmony_ci * which may include multiple chunks of block allocations.
568362306a36Sopenharmony_ci *
568462306a36Sopenharmony_ci * This could be called via ext4_write_begin()
568562306a36Sopenharmony_ci *
568662306a36Sopenharmony_ci * We need to consider the worse case, when
568762306a36Sopenharmony_ci * one new block per extent.
568862306a36Sopenharmony_ci */
568962306a36Sopenharmony_ciint ext4_writepage_trans_blocks(struct inode *inode)
569062306a36Sopenharmony_ci{
569162306a36Sopenharmony_ci	int bpp = ext4_journal_blocks_per_page(inode);
569262306a36Sopenharmony_ci	int ret;
569362306a36Sopenharmony_ci
569462306a36Sopenharmony_ci	ret = ext4_meta_trans_blocks(inode, bpp, bpp);
569562306a36Sopenharmony_ci
569662306a36Sopenharmony_ci	/* Account for data blocks for journalled mode */
569762306a36Sopenharmony_ci	if (ext4_should_journal_data(inode))
569862306a36Sopenharmony_ci		ret += bpp;
569962306a36Sopenharmony_ci	return ret;
570062306a36Sopenharmony_ci}
570162306a36Sopenharmony_ci
570262306a36Sopenharmony_ci/*
570362306a36Sopenharmony_ci * Calculate the journal credits for a chunk of data modification.
570462306a36Sopenharmony_ci *
570562306a36Sopenharmony_ci * This is called from DIO, fallocate or whoever calling
570662306a36Sopenharmony_ci * ext4_map_blocks() to map/allocate a chunk of contiguous disk blocks.
570762306a36Sopenharmony_ci *
570862306a36Sopenharmony_ci * journal buffers for data blocks are not included here, as DIO
570962306a36Sopenharmony_ci * and fallocate do no need to journal data buffers.
571062306a36Sopenharmony_ci */
571162306a36Sopenharmony_ciint ext4_chunk_trans_blocks(struct inode *inode, int nrblocks)
571262306a36Sopenharmony_ci{
571362306a36Sopenharmony_ci	return ext4_meta_trans_blocks(inode, nrblocks, 1);
571462306a36Sopenharmony_ci}
571562306a36Sopenharmony_ci
571662306a36Sopenharmony_ci/*
571762306a36Sopenharmony_ci * The caller must have previously called ext4_reserve_inode_write().
571862306a36Sopenharmony_ci * Give this, we know that the caller already has write access to iloc->bh.
571962306a36Sopenharmony_ci */
572062306a36Sopenharmony_ciint ext4_mark_iloc_dirty(handle_t *handle,
572162306a36Sopenharmony_ci			 struct inode *inode, struct ext4_iloc *iloc)
572262306a36Sopenharmony_ci{
572362306a36Sopenharmony_ci	int err = 0;
572462306a36Sopenharmony_ci
572562306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb))) {
572662306a36Sopenharmony_ci		put_bh(iloc->bh);
572762306a36Sopenharmony_ci		return -EIO;
572862306a36Sopenharmony_ci	}
572962306a36Sopenharmony_ci	ext4_fc_track_inode(handle, inode);
573062306a36Sopenharmony_ci
573162306a36Sopenharmony_ci	/* the do_update_inode consumes one bh->b_count */
573262306a36Sopenharmony_ci	get_bh(iloc->bh);
573362306a36Sopenharmony_ci
573462306a36Sopenharmony_ci	/* ext4_do_update_inode() does jbd2_journal_dirty_metadata */
573562306a36Sopenharmony_ci	err = ext4_do_update_inode(handle, inode, iloc);
573662306a36Sopenharmony_ci	put_bh(iloc->bh);
573762306a36Sopenharmony_ci	return err;
573862306a36Sopenharmony_ci}
573962306a36Sopenharmony_ci
574062306a36Sopenharmony_ci/*
574162306a36Sopenharmony_ci * On success, We end up with an outstanding reference count against
574262306a36Sopenharmony_ci * iloc->bh.  This _must_ be cleaned up later.
574362306a36Sopenharmony_ci */
574462306a36Sopenharmony_ci
574562306a36Sopenharmony_ciint
574662306a36Sopenharmony_ciext4_reserve_inode_write(handle_t *handle, struct inode *inode,
574762306a36Sopenharmony_ci			 struct ext4_iloc *iloc)
574862306a36Sopenharmony_ci{
574962306a36Sopenharmony_ci	int err;
575062306a36Sopenharmony_ci
575162306a36Sopenharmony_ci	if (unlikely(ext4_forced_shutdown(inode->i_sb)))
575262306a36Sopenharmony_ci		return -EIO;
575362306a36Sopenharmony_ci
575462306a36Sopenharmony_ci	err = ext4_get_inode_loc(inode, iloc);
575562306a36Sopenharmony_ci	if (!err) {
575662306a36Sopenharmony_ci		BUFFER_TRACE(iloc->bh, "get_write_access");
575762306a36Sopenharmony_ci		err = ext4_journal_get_write_access(handle, inode->i_sb,
575862306a36Sopenharmony_ci						    iloc->bh, EXT4_JTR_NONE);
575962306a36Sopenharmony_ci		if (err) {
576062306a36Sopenharmony_ci			brelse(iloc->bh);
576162306a36Sopenharmony_ci			iloc->bh = NULL;
576262306a36Sopenharmony_ci		}
576362306a36Sopenharmony_ci	}
576462306a36Sopenharmony_ci	ext4_std_error(inode->i_sb, err);
576562306a36Sopenharmony_ci	return err;
576662306a36Sopenharmony_ci}
576762306a36Sopenharmony_ci
576862306a36Sopenharmony_cistatic int __ext4_expand_extra_isize(struct inode *inode,
576962306a36Sopenharmony_ci				     unsigned int new_extra_isize,
577062306a36Sopenharmony_ci				     struct ext4_iloc *iloc,
577162306a36Sopenharmony_ci				     handle_t *handle, int *no_expand)
577262306a36Sopenharmony_ci{
577362306a36Sopenharmony_ci	struct ext4_inode *raw_inode;
577462306a36Sopenharmony_ci	struct ext4_xattr_ibody_header *header;
577562306a36Sopenharmony_ci	unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb);
577662306a36Sopenharmony_ci	struct ext4_inode_info *ei = EXT4_I(inode);
577762306a36Sopenharmony_ci	int error;
577862306a36Sopenharmony_ci
577962306a36Sopenharmony_ci	/* this was checked at iget time, but double check for good measure */
578062306a36Sopenharmony_ci	if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) ||
578162306a36Sopenharmony_ci	    (ei->i_extra_isize & 3)) {
578262306a36Sopenharmony_ci		EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)",
578362306a36Sopenharmony_ci				 ei->i_extra_isize,
578462306a36Sopenharmony_ci				 EXT4_INODE_SIZE(inode->i_sb));
578562306a36Sopenharmony_ci		return -EFSCORRUPTED;
578662306a36Sopenharmony_ci	}
578762306a36Sopenharmony_ci	if ((new_extra_isize < ei->i_extra_isize) ||
578862306a36Sopenharmony_ci	    (new_extra_isize < 4) ||
578962306a36Sopenharmony_ci	    (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE))
579062306a36Sopenharmony_ci		return -EINVAL;	/* Should never happen */
579162306a36Sopenharmony_ci
579262306a36Sopenharmony_ci	raw_inode = ext4_raw_inode(iloc);
579362306a36Sopenharmony_ci
579462306a36Sopenharmony_ci	header = IHDR(inode, raw_inode);
579562306a36Sopenharmony_ci
579662306a36Sopenharmony_ci	/* No extended attributes present */
579762306a36Sopenharmony_ci	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
579862306a36Sopenharmony_ci	    header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
579962306a36Sopenharmony_ci		memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
580062306a36Sopenharmony_ci		       EXT4_I(inode)->i_extra_isize, 0,
580162306a36Sopenharmony_ci		       new_extra_isize - EXT4_I(inode)->i_extra_isize);
580262306a36Sopenharmony_ci		EXT4_I(inode)->i_extra_isize = new_extra_isize;
580362306a36Sopenharmony_ci		return 0;
580462306a36Sopenharmony_ci	}
580562306a36Sopenharmony_ci
580662306a36Sopenharmony_ci	/*
580762306a36Sopenharmony_ci	 * We may need to allocate external xattr block so we need quotas
580862306a36Sopenharmony_ci	 * initialized. Here we can be called with various locks held so we
580962306a36Sopenharmony_ci	 * cannot affort to initialize quotas ourselves. So just bail.
581062306a36Sopenharmony_ci	 */
581162306a36Sopenharmony_ci	if (dquot_initialize_needed(inode))
581262306a36Sopenharmony_ci		return -EAGAIN;
581362306a36Sopenharmony_ci
581462306a36Sopenharmony_ci	/* try to expand with EAs present */
581562306a36Sopenharmony_ci	error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
581662306a36Sopenharmony_ci					   raw_inode, handle);
581762306a36Sopenharmony_ci	if (error) {
581862306a36Sopenharmony_ci		/*
581962306a36Sopenharmony_ci		 * Inode size expansion failed; don't try again
582062306a36Sopenharmony_ci		 */
582162306a36Sopenharmony_ci		*no_expand = 1;
582262306a36Sopenharmony_ci	}
582362306a36Sopenharmony_ci
582462306a36Sopenharmony_ci	return error;
582562306a36Sopenharmony_ci}
582662306a36Sopenharmony_ci
582762306a36Sopenharmony_ci/*
582862306a36Sopenharmony_ci * Expand an inode by new_extra_isize bytes.
582962306a36Sopenharmony_ci * Returns 0 on success or negative error number on failure.
583062306a36Sopenharmony_ci */
583162306a36Sopenharmony_cistatic int ext4_try_to_expand_extra_isize(struct inode *inode,
583262306a36Sopenharmony_ci					  unsigned int new_extra_isize,
583362306a36Sopenharmony_ci					  struct ext4_iloc iloc,
583462306a36Sopenharmony_ci					  handle_t *handle)
583562306a36Sopenharmony_ci{
583662306a36Sopenharmony_ci	int no_expand;
583762306a36Sopenharmony_ci	int error;
583862306a36Sopenharmony_ci
583962306a36Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND))
584062306a36Sopenharmony_ci		return -EOVERFLOW;
584162306a36Sopenharmony_ci
584262306a36Sopenharmony_ci	/*
584362306a36Sopenharmony_ci	 * In nojournal mode, we can immediately attempt to expand
584462306a36Sopenharmony_ci	 * the inode.  When journaled, we first need to obtain extra
584562306a36Sopenharmony_ci	 * buffer credits since we may write into the EA block
584662306a36Sopenharmony_ci	 * with this same handle. If journal_extend fails, then it will
584762306a36Sopenharmony_ci	 * only result in a minor loss of functionality for that inode.
584862306a36Sopenharmony_ci	 * If this is felt to be critical, then e2fsck should be run to
584962306a36Sopenharmony_ci	 * force a large enough s_min_extra_isize.
585062306a36Sopenharmony_ci	 */
585162306a36Sopenharmony_ci	if (ext4_journal_extend(handle,
585262306a36Sopenharmony_ci				EXT4_DATA_TRANS_BLOCKS(inode->i_sb), 0) != 0)
585362306a36Sopenharmony_ci		return -ENOSPC;
585462306a36Sopenharmony_ci
585562306a36Sopenharmony_ci	if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
585662306a36Sopenharmony_ci		return -EBUSY;
585762306a36Sopenharmony_ci
585862306a36Sopenharmony_ci	error = __ext4_expand_extra_isize(inode, new_extra_isize, &iloc,
585962306a36Sopenharmony_ci					  handle, &no_expand);
586062306a36Sopenharmony_ci	ext4_write_unlock_xattr(inode, &no_expand);
586162306a36Sopenharmony_ci
586262306a36Sopenharmony_ci	return error;
586362306a36Sopenharmony_ci}
586462306a36Sopenharmony_ci
586562306a36Sopenharmony_ciint ext4_expand_extra_isize(struct inode *inode,
586662306a36Sopenharmony_ci			    unsigned int new_extra_isize,
586762306a36Sopenharmony_ci			    struct ext4_iloc *iloc)
586862306a36Sopenharmony_ci{
586962306a36Sopenharmony_ci	handle_t *handle;
587062306a36Sopenharmony_ci	int no_expand;
587162306a36Sopenharmony_ci	int error, rc;
587262306a36Sopenharmony_ci
587362306a36Sopenharmony_ci	if (ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
587462306a36Sopenharmony_ci		brelse(iloc->bh);
587562306a36Sopenharmony_ci		return -EOVERFLOW;
587662306a36Sopenharmony_ci	}
587762306a36Sopenharmony_ci
587862306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_INODE,
587962306a36Sopenharmony_ci				    EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
588062306a36Sopenharmony_ci	if (IS_ERR(handle)) {
588162306a36Sopenharmony_ci		error = PTR_ERR(handle);
588262306a36Sopenharmony_ci		brelse(iloc->bh);
588362306a36Sopenharmony_ci		return error;
588462306a36Sopenharmony_ci	}
588562306a36Sopenharmony_ci
588662306a36Sopenharmony_ci	ext4_write_lock_xattr(inode, &no_expand);
588762306a36Sopenharmony_ci
588862306a36Sopenharmony_ci	BUFFER_TRACE(iloc->bh, "get_write_access");
588962306a36Sopenharmony_ci	error = ext4_journal_get_write_access(handle, inode->i_sb, iloc->bh,
589062306a36Sopenharmony_ci					      EXT4_JTR_NONE);
589162306a36Sopenharmony_ci	if (error) {
589262306a36Sopenharmony_ci		brelse(iloc->bh);
589362306a36Sopenharmony_ci		goto out_unlock;
589462306a36Sopenharmony_ci	}
589562306a36Sopenharmony_ci
589662306a36Sopenharmony_ci	error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc,
589762306a36Sopenharmony_ci					  handle, &no_expand);
589862306a36Sopenharmony_ci
589962306a36Sopenharmony_ci	rc = ext4_mark_iloc_dirty(handle, inode, iloc);
590062306a36Sopenharmony_ci	if (!error)
590162306a36Sopenharmony_ci		error = rc;
590262306a36Sopenharmony_ci
590362306a36Sopenharmony_ciout_unlock:
590462306a36Sopenharmony_ci	ext4_write_unlock_xattr(inode, &no_expand);
590562306a36Sopenharmony_ci	ext4_journal_stop(handle);
590662306a36Sopenharmony_ci	return error;
590762306a36Sopenharmony_ci}
590862306a36Sopenharmony_ci
590962306a36Sopenharmony_ci/*
591062306a36Sopenharmony_ci * What we do here is to mark the in-core inode as clean with respect to inode
591162306a36Sopenharmony_ci * dirtiness (it may still be data-dirty).
591262306a36Sopenharmony_ci * This means that the in-core inode may be reaped by prune_icache
591362306a36Sopenharmony_ci * without having to perform any I/O.  This is a very good thing,
591462306a36Sopenharmony_ci * because *any* task may call prune_icache - even ones which
591562306a36Sopenharmony_ci * have a transaction open against a different journal.
591662306a36Sopenharmony_ci *
591762306a36Sopenharmony_ci * Is this cheating?  Not really.  Sure, we haven't written the
591862306a36Sopenharmony_ci * inode out, but prune_icache isn't a user-visible syncing function.
591962306a36Sopenharmony_ci * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
592062306a36Sopenharmony_ci * we start and wait on commits.
592162306a36Sopenharmony_ci */
592262306a36Sopenharmony_ciint __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode,
592362306a36Sopenharmony_ci				const char *func, unsigned int line)
592462306a36Sopenharmony_ci{
592562306a36Sopenharmony_ci	struct ext4_iloc iloc;
592662306a36Sopenharmony_ci	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
592762306a36Sopenharmony_ci	int err;
592862306a36Sopenharmony_ci
592962306a36Sopenharmony_ci	might_sleep();
593062306a36Sopenharmony_ci	trace_ext4_mark_inode_dirty(inode, _RET_IP_);
593162306a36Sopenharmony_ci	err = ext4_reserve_inode_write(handle, inode, &iloc);
593262306a36Sopenharmony_ci	if (err)
593362306a36Sopenharmony_ci		goto out;
593462306a36Sopenharmony_ci
593562306a36Sopenharmony_ci	if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize)
593662306a36Sopenharmony_ci		ext4_try_to_expand_extra_isize(inode, sbi->s_want_extra_isize,
593762306a36Sopenharmony_ci					       iloc, handle);
593862306a36Sopenharmony_ci
593962306a36Sopenharmony_ci	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
594062306a36Sopenharmony_ciout:
594162306a36Sopenharmony_ci	if (unlikely(err))
594262306a36Sopenharmony_ci		ext4_error_inode_err(inode, func, line, 0, err,
594362306a36Sopenharmony_ci					"mark_inode_dirty error");
594462306a36Sopenharmony_ci	return err;
594562306a36Sopenharmony_ci}
594662306a36Sopenharmony_ci
594762306a36Sopenharmony_ci/*
594862306a36Sopenharmony_ci * ext4_dirty_inode() is called from __mark_inode_dirty()
594962306a36Sopenharmony_ci *
595062306a36Sopenharmony_ci * We're really interested in the case where a file is being extended.
595162306a36Sopenharmony_ci * i_size has been changed by generic_commit_write() and we thus need
595262306a36Sopenharmony_ci * to include the updated inode in the current transaction.
595362306a36Sopenharmony_ci *
595462306a36Sopenharmony_ci * Also, dquot_alloc_block() will always dirty the inode when blocks
595562306a36Sopenharmony_ci * are allocated to the file.
595662306a36Sopenharmony_ci *
595762306a36Sopenharmony_ci * If the inode is marked synchronous, we don't honour that here - doing
595862306a36Sopenharmony_ci * so would cause a commit on atime updates, which we don't bother doing.
595962306a36Sopenharmony_ci * We handle synchronous inodes at the highest possible level.
596062306a36Sopenharmony_ci */
596162306a36Sopenharmony_civoid ext4_dirty_inode(struct inode *inode, int flags)
596262306a36Sopenharmony_ci{
596362306a36Sopenharmony_ci	handle_t *handle;
596462306a36Sopenharmony_ci
596562306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
596662306a36Sopenharmony_ci	if (IS_ERR(handle))
596762306a36Sopenharmony_ci		return;
596862306a36Sopenharmony_ci	ext4_mark_inode_dirty(handle, inode);
596962306a36Sopenharmony_ci	ext4_journal_stop(handle);
597062306a36Sopenharmony_ci}
597162306a36Sopenharmony_ci
597262306a36Sopenharmony_ciint ext4_change_inode_journal_flag(struct inode *inode, int val)
597362306a36Sopenharmony_ci{
597462306a36Sopenharmony_ci	journal_t *journal;
597562306a36Sopenharmony_ci	handle_t *handle;
597662306a36Sopenharmony_ci	int err;
597762306a36Sopenharmony_ci	int alloc_ctx;
597862306a36Sopenharmony_ci
597962306a36Sopenharmony_ci	/*
598062306a36Sopenharmony_ci	 * We have to be very careful here: changing a data block's
598162306a36Sopenharmony_ci	 * journaling status dynamically is dangerous.  If we write a
598262306a36Sopenharmony_ci	 * data block to the journal, change the status and then delete
598362306a36Sopenharmony_ci	 * that block, we risk forgetting to revoke the old log record
598462306a36Sopenharmony_ci	 * from the journal and so a subsequent replay can corrupt data.
598562306a36Sopenharmony_ci	 * So, first we make sure that the journal is empty and that
598662306a36Sopenharmony_ci	 * nobody is changing anything.
598762306a36Sopenharmony_ci	 */
598862306a36Sopenharmony_ci
598962306a36Sopenharmony_ci	journal = EXT4_JOURNAL(inode);
599062306a36Sopenharmony_ci	if (!journal)
599162306a36Sopenharmony_ci		return 0;
599262306a36Sopenharmony_ci	if (is_journal_aborted(journal))
599362306a36Sopenharmony_ci		return -EROFS;
599462306a36Sopenharmony_ci
599562306a36Sopenharmony_ci	/* Wait for all existing dio workers */
599662306a36Sopenharmony_ci	inode_dio_wait(inode);
599762306a36Sopenharmony_ci
599862306a36Sopenharmony_ci	/*
599962306a36Sopenharmony_ci	 * Before flushing the journal and switching inode's aops, we have
600062306a36Sopenharmony_ci	 * to flush all dirty data the inode has. There can be outstanding
600162306a36Sopenharmony_ci	 * delayed allocations, there can be unwritten extents created by
600262306a36Sopenharmony_ci	 * fallocate or buffered writes in dioread_nolock mode covered by
600362306a36Sopenharmony_ci	 * dirty data which can be converted only after flushing the dirty
600462306a36Sopenharmony_ci	 * data (and journalled aops don't know how to handle these cases).
600562306a36Sopenharmony_ci	 */
600662306a36Sopenharmony_ci	if (val) {
600762306a36Sopenharmony_ci		filemap_invalidate_lock(inode->i_mapping);
600862306a36Sopenharmony_ci		err = filemap_write_and_wait(inode->i_mapping);
600962306a36Sopenharmony_ci		if (err < 0) {
601062306a36Sopenharmony_ci			filemap_invalidate_unlock(inode->i_mapping);
601162306a36Sopenharmony_ci			return err;
601262306a36Sopenharmony_ci		}
601362306a36Sopenharmony_ci	}
601462306a36Sopenharmony_ci
601562306a36Sopenharmony_ci	alloc_ctx = ext4_writepages_down_write(inode->i_sb);
601662306a36Sopenharmony_ci	jbd2_journal_lock_updates(journal);
601762306a36Sopenharmony_ci
601862306a36Sopenharmony_ci	/*
601962306a36Sopenharmony_ci	 * OK, there are no updates running now, and all cached data is
602062306a36Sopenharmony_ci	 * synced to disk.  We are now in a completely consistent state
602162306a36Sopenharmony_ci	 * which doesn't have anything in the journal, and we know that
602262306a36Sopenharmony_ci	 * no filesystem updates are running, so it is safe to modify
602362306a36Sopenharmony_ci	 * the inode's in-core data-journaling state flag now.
602462306a36Sopenharmony_ci	 */
602562306a36Sopenharmony_ci
602662306a36Sopenharmony_ci	if (val)
602762306a36Sopenharmony_ci		ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
602862306a36Sopenharmony_ci	else {
602962306a36Sopenharmony_ci		err = jbd2_journal_flush(journal, 0);
603062306a36Sopenharmony_ci		if (err < 0) {
603162306a36Sopenharmony_ci			jbd2_journal_unlock_updates(journal);
603262306a36Sopenharmony_ci			ext4_writepages_up_write(inode->i_sb, alloc_ctx);
603362306a36Sopenharmony_ci			return err;
603462306a36Sopenharmony_ci		}
603562306a36Sopenharmony_ci		ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
603662306a36Sopenharmony_ci	}
603762306a36Sopenharmony_ci	ext4_set_aops(inode);
603862306a36Sopenharmony_ci
603962306a36Sopenharmony_ci	jbd2_journal_unlock_updates(journal);
604062306a36Sopenharmony_ci	ext4_writepages_up_write(inode->i_sb, alloc_ctx);
604162306a36Sopenharmony_ci
604262306a36Sopenharmony_ci	if (val)
604362306a36Sopenharmony_ci		filemap_invalidate_unlock(inode->i_mapping);
604462306a36Sopenharmony_ci
604562306a36Sopenharmony_ci	/* Finally we can mark the inode as dirty. */
604662306a36Sopenharmony_ci
604762306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
604862306a36Sopenharmony_ci	if (IS_ERR(handle))
604962306a36Sopenharmony_ci		return PTR_ERR(handle);
605062306a36Sopenharmony_ci
605162306a36Sopenharmony_ci	ext4_fc_mark_ineligible(inode->i_sb,
605262306a36Sopenharmony_ci		EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
605362306a36Sopenharmony_ci	err = ext4_mark_inode_dirty(handle, inode);
605462306a36Sopenharmony_ci	ext4_handle_sync(handle);
605562306a36Sopenharmony_ci	ext4_journal_stop(handle);
605662306a36Sopenharmony_ci	ext4_std_error(inode->i_sb, err);
605762306a36Sopenharmony_ci
605862306a36Sopenharmony_ci	return err;
605962306a36Sopenharmony_ci}
606062306a36Sopenharmony_ci
606162306a36Sopenharmony_cistatic int ext4_bh_unmapped(handle_t *handle, struct inode *inode,
606262306a36Sopenharmony_ci			    struct buffer_head *bh)
606362306a36Sopenharmony_ci{
606462306a36Sopenharmony_ci	return !buffer_mapped(bh);
606562306a36Sopenharmony_ci}
606662306a36Sopenharmony_ci
606762306a36Sopenharmony_civm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
606862306a36Sopenharmony_ci{
606962306a36Sopenharmony_ci	struct vm_area_struct *vma = vmf->vma;
607062306a36Sopenharmony_ci	struct folio *folio = page_folio(vmf->page);
607162306a36Sopenharmony_ci	loff_t size;
607262306a36Sopenharmony_ci	unsigned long len;
607362306a36Sopenharmony_ci	int err;
607462306a36Sopenharmony_ci	vm_fault_t ret;
607562306a36Sopenharmony_ci	struct file *file = vma->vm_file;
607662306a36Sopenharmony_ci	struct inode *inode = file_inode(file);
607762306a36Sopenharmony_ci	struct address_space *mapping = inode->i_mapping;
607862306a36Sopenharmony_ci	handle_t *handle;
607962306a36Sopenharmony_ci	get_block_t *get_block;
608062306a36Sopenharmony_ci	int retries = 0;
608162306a36Sopenharmony_ci
608262306a36Sopenharmony_ci	if (unlikely(IS_IMMUTABLE(inode)))
608362306a36Sopenharmony_ci		return VM_FAULT_SIGBUS;
608462306a36Sopenharmony_ci
608562306a36Sopenharmony_ci	sb_start_pagefault(inode->i_sb);
608662306a36Sopenharmony_ci	file_update_time(vma->vm_file);
608762306a36Sopenharmony_ci
608862306a36Sopenharmony_ci	filemap_invalidate_lock_shared(mapping);
608962306a36Sopenharmony_ci
609062306a36Sopenharmony_ci	err = ext4_convert_inline_data(inode);
609162306a36Sopenharmony_ci	if (err)
609262306a36Sopenharmony_ci		goto out_ret;
609362306a36Sopenharmony_ci
609462306a36Sopenharmony_ci	/*
609562306a36Sopenharmony_ci	 * On data journalling we skip straight to the transaction handle:
609662306a36Sopenharmony_ci	 * there's no delalloc; page truncated will be checked later; the
609762306a36Sopenharmony_ci	 * early return w/ all buffers mapped (calculates size/len) can't
609862306a36Sopenharmony_ci	 * be used; and there's no dioread_nolock, so only ext4_get_block.
609962306a36Sopenharmony_ci	 */
610062306a36Sopenharmony_ci	if (ext4_should_journal_data(inode))
610162306a36Sopenharmony_ci		goto retry_alloc;
610262306a36Sopenharmony_ci
610362306a36Sopenharmony_ci	/* Delalloc case is easy... */
610462306a36Sopenharmony_ci	if (test_opt(inode->i_sb, DELALLOC) &&
610562306a36Sopenharmony_ci	    !ext4_nonda_switch(inode->i_sb)) {
610662306a36Sopenharmony_ci		do {
610762306a36Sopenharmony_ci			err = block_page_mkwrite(vma, vmf,
610862306a36Sopenharmony_ci						   ext4_da_get_block_prep);
610962306a36Sopenharmony_ci		} while (err == -ENOSPC &&
611062306a36Sopenharmony_ci		       ext4_should_retry_alloc(inode->i_sb, &retries));
611162306a36Sopenharmony_ci		goto out_ret;
611262306a36Sopenharmony_ci	}
611362306a36Sopenharmony_ci
611462306a36Sopenharmony_ci	folio_lock(folio);
611562306a36Sopenharmony_ci	size = i_size_read(inode);
611662306a36Sopenharmony_ci	/* Page got truncated from under us? */
611762306a36Sopenharmony_ci	if (folio->mapping != mapping || folio_pos(folio) > size) {
611862306a36Sopenharmony_ci		folio_unlock(folio);
611962306a36Sopenharmony_ci		ret = VM_FAULT_NOPAGE;
612062306a36Sopenharmony_ci		goto out;
612162306a36Sopenharmony_ci	}
612262306a36Sopenharmony_ci
612362306a36Sopenharmony_ci	len = folio_size(folio);
612462306a36Sopenharmony_ci	if (folio_pos(folio) + len > size)
612562306a36Sopenharmony_ci		len = size - folio_pos(folio);
612662306a36Sopenharmony_ci	/*
612762306a36Sopenharmony_ci	 * Return if we have all the buffers mapped. This avoids the need to do
612862306a36Sopenharmony_ci	 * journal_start/journal_stop which can block and take a long time
612962306a36Sopenharmony_ci	 *
613062306a36Sopenharmony_ci	 * This cannot be done for data journalling, as we have to add the
613162306a36Sopenharmony_ci	 * inode to the transaction's list to writeprotect pages on commit.
613262306a36Sopenharmony_ci	 */
613362306a36Sopenharmony_ci	if (folio_buffers(folio)) {
613462306a36Sopenharmony_ci		if (!ext4_walk_page_buffers(NULL, inode, folio_buffers(folio),
613562306a36Sopenharmony_ci					    0, len, NULL,
613662306a36Sopenharmony_ci					    ext4_bh_unmapped)) {
613762306a36Sopenharmony_ci			/* Wait so that we don't change page under IO */
613862306a36Sopenharmony_ci			folio_wait_stable(folio);
613962306a36Sopenharmony_ci			ret = VM_FAULT_LOCKED;
614062306a36Sopenharmony_ci			goto out;
614162306a36Sopenharmony_ci		}
614262306a36Sopenharmony_ci	}
614362306a36Sopenharmony_ci	folio_unlock(folio);
614462306a36Sopenharmony_ci	/* OK, we need to fill the hole... */
614562306a36Sopenharmony_ci	if (ext4_should_dioread_nolock(inode))
614662306a36Sopenharmony_ci		get_block = ext4_get_block_unwritten;
614762306a36Sopenharmony_ci	else
614862306a36Sopenharmony_ci		get_block = ext4_get_block;
614962306a36Sopenharmony_ciretry_alloc:
615062306a36Sopenharmony_ci	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
615162306a36Sopenharmony_ci				    ext4_writepage_trans_blocks(inode));
615262306a36Sopenharmony_ci	if (IS_ERR(handle)) {
615362306a36Sopenharmony_ci		ret = VM_FAULT_SIGBUS;
615462306a36Sopenharmony_ci		goto out;
615562306a36Sopenharmony_ci	}
615662306a36Sopenharmony_ci	/*
615762306a36Sopenharmony_ci	 * Data journalling can't use block_page_mkwrite() because it
615862306a36Sopenharmony_ci	 * will set_buffer_dirty() before do_journal_get_write_access()
615962306a36Sopenharmony_ci	 * thus might hit warning messages for dirty metadata buffers.
616062306a36Sopenharmony_ci	 */
616162306a36Sopenharmony_ci	if (!ext4_should_journal_data(inode)) {
616262306a36Sopenharmony_ci		err = block_page_mkwrite(vma, vmf, get_block);
616362306a36Sopenharmony_ci	} else {
616462306a36Sopenharmony_ci		folio_lock(folio);
616562306a36Sopenharmony_ci		size = i_size_read(inode);
616662306a36Sopenharmony_ci		/* Page got truncated from under us? */
616762306a36Sopenharmony_ci		if (folio->mapping != mapping || folio_pos(folio) > size) {
616862306a36Sopenharmony_ci			ret = VM_FAULT_NOPAGE;
616962306a36Sopenharmony_ci			goto out_error;
617062306a36Sopenharmony_ci		}
617162306a36Sopenharmony_ci
617262306a36Sopenharmony_ci		len = folio_size(folio);
617362306a36Sopenharmony_ci		if (folio_pos(folio) + len > size)
617462306a36Sopenharmony_ci			len = size - folio_pos(folio);
617562306a36Sopenharmony_ci
617662306a36Sopenharmony_ci		err = __block_write_begin(&folio->page, 0, len, ext4_get_block);
617762306a36Sopenharmony_ci		if (!err) {
617862306a36Sopenharmony_ci			ret = VM_FAULT_SIGBUS;
617962306a36Sopenharmony_ci			if (ext4_journal_folio_buffers(handle, folio, len))
618062306a36Sopenharmony_ci				goto out_error;
618162306a36Sopenharmony_ci		} else {
618262306a36Sopenharmony_ci			folio_unlock(folio);
618362306a36Sopenharmony_ci		}
618462306a36Sopenharmony_ci	}
618562306a36Sopenharmony_ci	ext4_journal_stop(handle);
618662306a36Sopenharmony_ci	if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
618762306a36Sopenharmony_ci		goto retry_alloc;
618862306a36Sopenharmony_ciout_ret:
618962306a36Sopenharmony_ci	ret = vmf_fs_error(err);
619062306a36Sopenharmony_ciout:
619162306a36Sopenharmony_ci	filemap_invalidate_unlock_shared(mapping);
619262306a36Sopenharmony_ci	sb_end_pagefault(inode->i_sb);
619362306a36Sopenharmony_ci	return ret;
619462306a36Sopenharmony_ciout_error:
619562306a36Sopenharmony_ci	folio_unlock(folio);
619662306a36Sopenharmony_ci	ext4_journal_stop(handle);
619762306a36Sopenharmony_ci	goto out;
619862306a36Sopenharmony_ci}
6199