162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc.
462306a36Sopenharmony_ci * Copyright (c) 2016-2018 Christoph Hellwig.
562306a36Sopenharmony_ci * All Rights Reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#include "xfs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_log_format.h"
1162306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1262306a36Sopenharmony_ci#include "xfs_mount.h"
1362306a36Sopenharmony_ci#include "xfs_inode.h"
1462306a36Sopenharmony_ci#include "xfs_trans.h"
1562306a36Sopenharmony_ci#include "xfs_iomap.h"
1662306a36Sopenharmony_ci#include "xfs_trace.h"
1762306a36Sopenharmony_ci#include "xfs_bmap.h"
1862306a36Sopenharmony_ci#include "xfs_bmap_util.h"
1962306a36Sopenharmony_ci#include "xfs_reflink.h"
2062306a36Sopenharmony_ci#include "xfs_errortag.h"
2162306a36Sopenharmony_ci#include "xfs_error.h"
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_cistruct xfs_writepage_ctx {
2462306a36Sopenharmony_ci	struct iomap_writepage_ctx ctx;
2562306a36Sopenharmony_ci	unsigned int		data_seq;
2662306a36Sopenharmony_ci	unsigned int		cow_seq;
2762306a36Sopenharmony_ci};
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic inline struct xfs_writepage_ctx *
3062306a36Sopenharmony_ciXFS_WPC(struct iomap_writepage_ctx *ctx)
3162306a36Sopenharmony_ci{
3262306a36Sopenharmony_ci	return container_of(ctx, struct xfs_writepage_ctx, ctx);
3362306a36Sopenharmony_ci}
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci/*
3662306a36Sopenharmony_ci * Fast and loose check if this write could update the on-disk inode size.
3762306a36Sopenharmony_ci */
3862306a36Sopenharmony_cistatic inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	return ioend->io_offset + ioend->io_size >
4162306a36Sopenharmony_ci		XFS_I(ioend->io_inode)->i_disk_size;
4262306a36Sopenharmony_ci}
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci/*
4562306a36Sopenharmony_ci * Update on-disk file size now that data has been written to disk.
4662306a36Sopenharmony_ci */
4762306a36Sopenharmony_ciint
4862306a36Sopenharmony_cixfs_setfilesize(
4962306a36Sopenharmony_ci	struct xfs_inode	*ip,
5062306a36Sopenharmony_ci	xfs_off_t		offset,
5162306a36Sopenharmony_ci	size_t			size)
5262306a36Sopenharmony_ci{
5362306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
5462306a36Sopenharmony_ci	struct xfs_trans	*tp;
5562306a36Sopenharmony_ci	xfs_fsize_t		isize;
5662306a36Sopenharmony_ci	int			error;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
5962306a36Sopenharmony_ci	if (error)
6062306a36Sopenharmony_ci		return error;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	xfs_ilock(ip, XFS_ILOCK_EXCL);
6362306a36Sopenharmony_ci	isize = xfs_new_eof(ip, offset + size);
6462306a36Sopenharmony_ci	if (!isize) {
6562306a36Sopenharmony_ci		xfs_iunlock(ip, XFS_ILOCK_EXCL);
6662306a36Sopenharmony_ci		xfs_trans_cancel(tp);
6762306a36Sopenharmony_ci		return 0;
6862306a36Sopenharmony_ci	}
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	trace_xfs_setfilesize(ip, offset, size);
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	ip->i_disk_size = isize;
7362306a36Sopenharmony_ci	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
7462306a36Sopenharmony_ci	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	return xfs_trans_commit(tp);
7762306a36Sopenharmony_ci}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci/*
8062306a36Sopenharmony_ci * IO write completion.
8162306a36Sopenharmony_ci */
8262306a36Sopenharmony_ciSTATIC void
8362306a36Sopenharmony_cixfs_end_ioend(
8462306a36Sopenharmony_ci	struct iomap_ioend	*ioend)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
8762306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
8862306a36Sopenharmony_ci	xfs_off_t		offset = ioend->io_offset;
8962306a36Sopenharmony_ci	size_t			size = ioend->io_size;
9062306a36Sopenharmony_ci	unsigned int		nofs_flag;
9162306a36Sopenharmony_ci	int			error;
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	/*
9462306a36Sopenharmony_ci	 * We can allocate memory here while doing writeback on behalf of
9562306a36Sopenharmony_ci	 * memory reclaim.  To avoid memory allocation deadlocks set the
9662306a36Sopenharmony_ci	 * task-wide nofs context for the following operations.
9762306a36Sopenharmony_ci	 */
9862306a36Sopenharmony_ci	nofs_flag = memalloc_nofs_save();
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	/*
10162306a36Sopenharmony_ci	 * Just clean up the in-memory structures if the fs has been shut down.
10262306a36Sopenharmony_ci	 */
10362306a36Sopenharmony_ci	if (xfs_is_shutdown(mp)) {
10462306a36Sopenharmony_ci		error = -EIO;
10562306a36Sopenharmony_ci		goto done;
10662306a36Sopenharmony_ci	}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	/*
10962306a36Sopenharmony_ci	 * Clean up all COW blocks and underlying data fork delalloc blocks on
11062306a36Sopenharmony_ci	 * I/O error. The delalloc punch is required because this ioend was
11162306a36Sopenharmony_ci	 * mapped to blocks in the COW fork and the associated pages are no
11262306a36Sopenharmony_ci	 * longer dirty. If we don't remove delalloc blocks here, they become
11362306a36Sopenharmony_ci	 * stale and can corrupt free space accounting on unmount.
11462306a36Sopenharmony_ci	 */
11562306a36Sopenharmony_ci	error = blk_status_to_errno(ioend->io_bio->bi_status);
11662306a36Sopenharmony_ci	if (unlikely(error)) {
11762306a36Sopenharmony_ci		if (ioend->io_flags & IOMAP_F_SHARED) {
11862306a36Sopenharmony_ci			xfs_reflink_cancel_cow_range(ip, offset, size, true);
11962306a36Sopenharmony_ci			xfs_bmap_punch_delalloc_range(ip, offset,
12062306a36Sopenharmony_ci					offset + size);
12162306a36Sopenharmony_ci		}
12262306a36Sopenharmony_ci		goto done;
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	/*
12662306a36Sopenharmony_ci	 * Success: commit the COW or unwritten blocks if needed.
12762306a36Sopenharmony_ci	 */
12862306a36Sopenharmony_ci	if (ioend->io_flags & IOMAP_F_SHARED)
12962306a36Sopenharmony_ci		error = xfs_reflink_end_cow(ip, offset, size);
13062306a36Sopenharmony_ci	else if (ioend->io_type == IOMAP_UNWRITTEN)
13162306a36Sopenharmony_ci		error = xfs_iomap_write_unwritten(ip, offset, size, false);
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	if (!error && xfs_ioend_is_append(ioend))
13462306a36Sopenharmony_ci		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
13562306a36Sopenharmony_cidone:
13662306a36Sopenharmony_ci	iomap_finish_ioends(ioend, error);
13762306a36Sopenharmony_ci	memalloc_nofs_restore(nofs_flag);
13862306a36Sopenharmony_ci}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci/*
14162306a36Sopenharmony_ci * Finish all pending IO completions that require transactional modifications.
14262306a36Sopenharmony_ci *
14362306a36Sopenharmony_ci * We try to merge physical and logically contiguous ioends before completion to
14462306a36Sopenharmony_ci * minimise the number of transactions we need to perform during IO completion.
14562306a36Sopenharmony_ci * Both unwritten extent conversion and COW remapping need to iterate and modify
14662306a36Sopenharmony_ci * one physical extent at a time, so we gain nothing by merging physically
14762306a36Sopenharmony_ci * discontiguous extents here.
14862306a36Sopenharmony_ci *
14962306a36Sopenharmony_ci * The ioend chain length that we can be processing here is largely unbound in
15062306a36Sopenharmony_ci * length and we may have to perform significant amounts of work on each ioend
15162306a36Sopenharmony_ci * to complete it. Hence we have to be careful about holding the CPU for too
15262306a36Sopenharmony_ci * long in this loop.
15362306a36Sopenharmony_ci */
15462306a36Sopenharmony_civoid
15562306a36Sopenharmony_cixfs_end_io(
15662306a36Sopenharmony_ci	struct work_struct	*work)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	struct xfs_inode	*ip =
15962306a36Sopenharmony_ci		container_of(work, struct xfs_inode, i_ioend_work);
16062306a36Sopenharmony_ci	struct iomap_ioend	*ioend;
16162306a36Sopenharmony_ci	struct list_head	tmp;
16262306a36Sopenharmony_ci	unsigned long		flags;
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci	spin_lock_irqsave(&ip->i_ioend_lock, flags);
16562306a36Sopenharmony_ci	list_replace_init(&ip->i_ioend_list, &tmp);
16662306a36Sopenharmony_ci	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	iomap_sort_ioends(&tmp);
16962306a36Sopenharmony_ci	while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
17062306a36Sopenharmony_ci			io_list))) {
17162306a36Sopenharmony_ci		list_del_init(&ioend->io_list);
17262306a36Sopenharmony_ci		iomap_ioend_try_merge(ioend, &tmp);
17362306a36Sopenharmony_ci		xfs_end_ioend(ioend);
17462306a36Sopenharmony_ci		cond_resched();
17562306a36Sopenharmony_ci	}
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ciSTATIC void
17962306a36Sopenharmony_cixfs_end_bio(
18062306a36Sopenharmony_ci	struct bio		*bio)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci	struct iomap_ioend	*ioend = bio->bi_private;
18362306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
18462306a36Sopenharmony_ci	unsigned long		flags;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	spin_lock_irqsave(&ip->i_ioend_lock, flags);
18762306a36Sopenharmony_ci	if (list_empty(&ip->i_ioend_list))
18862306a36Sopenharmony_ci		WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
18962306a36Sopenharmony_ci					 &ip->i_ioend_work));
19062306a36Sopenharmony_ci	list_add_tail(&ioend->io_list, &ip->i_ioend_list);
19162306a36Sopenharmony_ci	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
19262306a36Sopenharmony_ci}
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci/*
19562306a36Sopenharmony_ci * Fast revalidation of the cached writeback mapping. Return true if the current
19662306a36Sopenharmony_ci * mapping is valid, false otherwise.
19762306a36Sopenharmony_ci */
19862306a36Sopenharmony_cistatic bool
19962306a36Sopenharmony_cixfs_imap_valid(
20062306a36Sopenharmony_ci	struct iomap_writepage_ctx	*wpc,
20162306a36Sopenharmony_ci	struct xfs_inode		*ip,
20262306a36Sopenharmony_ci	loff_t				offset)
20362306a36Sopenharmony_ci{
20462306a36Sopenharmony_ci	if (offset < wpc->iomap.offset ||
20562306a36Sopenharmony_ci	    offset >= wpc->iomap.offset + wpc->iomap.length)
20662306a36Sopenharmony_ci		return false;
20762306a36Sopenharmony_ci	/*
20862306a36Sopenharmony_ci	 * If this is a COW mapping, it is sufficient to check that the mapping
20962306a36Sopenharmony_ci	 * covers the offset. Be careful to check this first because the caller
21062306a36Sopenharmony_ci	 * can revalidate a COW mapping without updating the data seqno.
21162306a36Sopenharmony_ci	 */
21262306a36Sopenharmony_ci	if (wpc->iomap.flags & IOMAP_F_SHARED)
21362306a36Sopenharmony_ci		return true;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	/*
21662306a36Sopenharmony_ci	 * This is not a COW mapping. Check the sequence number of the data fork
21762306a36Sopenharmony_ci	 * because concurrent changes could have invalidated the extent. Check
21862306a36Sopenharmony_ci	 * the COW fork because concurrent changes since the last time we
21962306a36Sopenharmony_ci	 * checked (and found nothing at this offset) could have added
22062306a36Sopenharmony_ci	 * overlapping blocks.
22162306a36Sopenharmony_ci	 */
22262306a36Sopenharmony_ci	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) {
22362306a36Sopenharmony_ci		trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap,
22462306a36Sopenharmony_ci				XFS_WPC(wpc)->data_seq, XFS_DATA_FORK);
22562306a36Sopenharmony_ci		return false;
22662306a36Sopenharmony_ci	}
22762306a36Sopenharmony_ci	if (xfs_inode_has_cow_data(ip) &&
22862306a36Sopenharmony_ci	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) {
22962306a36Sopenharmony_ci		trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap,
23062306a36Sopenharmony_ci				XFS_WPC(wpc)->cow_seq, XFS_COW_FORK);
23162306a36Sopenharmony_ci		return false;
23262306a36Sopenharmony_ci	}
23362306a36Sopenharmony_ci	return true;
23462306a36Sopenharmony_ci}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci/*
23762306a36Sopenharmony_ci * Pass in a dellalloc extent and convert it to real extents, return the real
23862306a36Sopenharmony_ci * extent that maps offset_fsb in wpc->iomap.
23962306a36Sopenharmony_ci *
24062306a36Sopenharmony_ci * The current page is held locked so nothing could have removed the block
24162306a36Sopenharmony_ci * backing offset_fsb, although it could have moved from the COW to the data
24262306a36Sopenharmony_ci * fork by another thread.
24362306a36Sopenharmony_ci */
24462306a36Sopenharmony_cistatic int
24562306a36Sopenharmony_cixfs_convert_blocks(
24662306a36Sopenharmony_ci	struct iomap_writepage_ctx *wpc,
24762306a36Sopenharmony_ci	struct xfs_inode	*ip,
24862306a36Sopenharmony_ci	int			whichfork,
24962306a36Sopenharmony_ci	loff_t			offset)
25062306a36Sopenharmony_ci{
25162306a36Sopenharmony_ci	int			error;
25262306a36Sopenharmony_ci	unsigned		*seq;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	if (whichfork == XFS_COW_FORK)
25562306a36Sopenharmony_ci		seq = &XFS_WPC(wpc)->cow_seq;
25662306a36Sopenharmony_ci	else
25762306a36Sopenharmony_ci		seq = &XFS_WPC(wpc)->data_seq;
25862306a36Sopenharmony_ci
25962306a36Sopenharmony_ci	/*
26062306a36Sopenharmony_ci	 * Attempt to allocate whatever delalloc extent currently backs offset
26162306a36Sopenharmony_ci	 * and put the result into wpc->iomap.  Allocate in a loop because it
26262306a36Sopenharmony_ci	 * may take several attempts to allocate real blocks for a contiguous
26362306a36Sopenharmony_ci	 * delalloc extent if free space is sufficiently fragmented.
26462306a36Sopenharmony_ci	 */
26562306a36Sopenharmony_ci	do {
26662306a36Sopenharmony_ci		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
26762306a36Sopenharmony_ci				&wpc->iomap, seq);
26862306a36Sopenharmony_ci		if (error)
26962306a36Sopenharmony_ci			return error;
27062306a36Sopenharmony_ci	} while (wpc->iomap.offset + wpc->iomap.length <= offset);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	return 0;
27362306a36Sopenharmony_ci}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_cistatic int
27662306a36Sopenharmony_cixfs_map_blocks(
27762306a36Sopenharmony_ci	struct iomap_writepage_ctx *wpc,
27862306a36Sopenharmony_ci	struct inode		*inode,
27962306a36Sopenharmony_ci	loff_t			offset)
28062306a36Sopenharmony_ci{
28162306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(inode);
28262306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
28362306a36Sopenharmony_ci	ssize_t			count = i_blocksize(inode);
28462306a36Sopenharmony_ci	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
28562306a36Sopenharmony_ci	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
28662306a36Sopenharmony_ci	xfs_fileoff_t		cow_fsb;
28762306a36Sopenharmony_ci	int			whichfork;
28862306a36Sopenharmony_ci	struct xfs_bmbt_irec	imap;
28962306a36Sopenharmony_ci	struct xfs_iext_cursor	icur;
29062306a36Sopenharmony_ci	int			retries = 0;
29162306a36Sopenharmony_ci	int			error = 0;
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	if (xfs_is_shutdown(mp))
29462306a36Sopenharmony_ci		return -EIO;
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	/*
29962306a36Sopenharmony_ci	 * COW fork blocks can overlap data fork blocks even if the blocks
30062306a36Sopenharmony_ci	 * aren't shared.  COW I/O always takes precedent, so we must always
30162306a36Sopenharmony_ci	 * check for overlap on reflink inodes unless the mapping is already a
30262306a36Sopenharmony_ci	 * COW one, or the COW fork hasn't changed from the last time we looked
30362306a36Sopenharmony_ci	 * at it.
30462306a36Sopenharmony_ci	 *
30562306a36Sopenharmony_ci	 * It's safe to check the COW fork if_seq here without the ILOCK because
30662306a36Sopenharmony_ci	 * we've indirectly protected against concurrent updates: writeback has
30762306a36Sopenharmony_ci	 * the page locked, which prevents concurrent invalidations by reflink
30862306a36Sopenharmony_ci	 * and directio and prevents concurrent buffered writes to the same
30962306a36Sopenharmony_ci	 * page.  Changes to if_seq always happen under i_lock, which protects
31062306a36Sopenharmony_ci	 * against concurrent updates and provides a memory barrier on the way
31162306a36Sopenharmony_ci	 * out that ensures that we always see the current value.
31262306a36Sopenharmony_ci	 */
31362306a36Sopenharmony_ci	if (xfs_imap_valid(wpc, ip, offset))
31462306a36Sopenharmony_ci		return 0;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	/*
31762306a36Sopenharmony_ci	 * If we don't have a valid map, now it's time to get a new one for this
31862306a36Sopenharmony_ci	 * offset.  This will convert delayed allocations (including COW ones)
31962306a36Sopenharmony_ci	 * into real extents.  If we return without a valid map, it means we
32062306a36Sopenharmony_ci	 * landed in a hole and we skip the block.
32162306a36Sopenharmony_ci	 */
32262306a36Sopenharmony_ciretry:
32362306a36Sopenharmony_ci	cow_fsb = NULLFILEOFF;
32462306a36Sopenharmony_ci	whichfork = XFS_DATA_FORK;
32562306a36Sopenharmony_ci	xfs_ilock(ip, XFS_ILOCK_SHARED);
32662306a36Sopenharmony_ci	ASSERT(!xfs_need_iread_extents(&ip->i_df));
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	/*
32962306a36Sopenharmony_ci	 * Check if this is offset is covered by a COW extents, and if yes use
33062306a36Sopenharmony_ci	 * it directly instead of looking up anything in the data fork.
33162306a36Sopenharmony_ci	 */
33262306a36Sopenharmony_ci	if (xfs_inode_has_cow_data(ip) &&
33362306a36Sopenharmony_ci	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
33462306a36Sopenharmony_ci		cow_fsb = imap.br_startoff;
33562306a36Sopenharmony_ci	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
33662306a36Sopenharmony_ci		XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
33762306a36Sopenharmony_ci		xfs_iunlock(ip, XFS_ILOCK_SHARED);
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci		whichfork = XFS_COW_FORK;
34062306a36Sopenharmony_ci		goto allocate_blocks;
34162306a36Sopenharmony_ci	}
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	/*
34462306a36Sopenharmony_ci	 * No COW extent overlap. Revalidate now that we may have updated
34562306a36Sopenharmony_ci	 * ->cow_seq. If the data mapping is still valid, we're done.
34662306a36Sopenharmony_ci	 */
34762306a36Sopenharmony_ci	if (xfs_imap_valid(wpc, ip, offset)) {
34862306a36Sopenharmony_ci		xfs_iunlock(ip, XFS_ILOCK_SHARED);
34962306a36Sopenharmony_ci		return 0;
35062306a36Sopenharmony_ci	}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_ci	/*
35362306a36Sopenharmony_ci	 * If we don't have a valid map, now it's time to get a new one for this
35462306a36Sopenharmony_ci	 * offset.  This will convert delayed allocations (including COW ones)
35562306a36Sopenharmony_ci	 * into real extents.
35662306a36Sopenharmony_ci	 */
35762306a36Sopenharmony_ci	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
35862306a36Sopenharmony_ci		imap.br_startoff = end_fsb;	/* fake a hole past EOF */
35962306a36Sopenharmony_ci	XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
36062306a36Sopenharmony_ci	xfs_iunlock(ip, XFS_ILOCK_SHARED);
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	/* landed in a hole or beyond EOF? */
36362306a36Sopenharmony_ci	if (imap.br_startoff > offset_fsb) {
36462306a36Sopenharmony_ci		imap.br_blockcount = imap.br_startoff - offset_fsb;
36562306a36Sopenharmony_ci		imap.br_startoff = offset_fsb;
36662306a36Sopenharmony_ci		imap.br_startblock = HOLESTARTBLOCK;
36762306a36Sopenharmony_ci		imap.br_state = XFS_EXT_NORM;
36862306a36Sopenharmony_ci	}
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci	/*
37162306a36Sopenharmony_ci	 * Truncate to the next COW extent if there is one.  This is the only
37262306a36Sopenharmony_ci	 * opportunity to do this because we can skip COW fork lookups for the
37362306a36Sopenharmony_ci	 * subsequent blocks in the mapping; however, the requirement to treat
37462306a36Sopenharmony_ci	 * the COW range separately remains.
37562306a36Sopenharmony_ci	 */
37662306a36Sopenharmony_ci	if (cow_fsb != NULLFILEOFF &&
37762306a36Sopenharmony_ci	    cow_fsb < imap.br_startoff + imap.br_blockcount)
37862306a36Sopenharmony_ci		imap.br_blockcount = cow_fsb - imap.br_startoff;
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	/* got a delalloc extent? */
38162306a36Sopenharmony_ci	if (imap.br_startblock != HOLESTARTBLOCK &&
38262306a36Sopenharmony_ci	    isnullstartblock(imap.br_startblock))
38362306a36Sopenharmony_ci		goto allocate_blocks;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
38662306a36Sopenharmony_ci	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
38762306a36Sopenharmony_ci	return 0;
38862306a36Sopenharmony_ciallocate_blocks:
38962306a36Sopenharmony_ci	error = xfs_convert_blocks(wpc, ip, whichfork, offset);
39062306a36Sopenharmony_ci	if (error) {
39162306a36Sopenharmony_ci		/*
39262306a36Sopenharmony_ci		 * If we failed to find the extent in the COW fork we might have
39362306a36Sopenharmony_ci		 * raced with a COW to data fork conversion or truncate.
39462306a36Sopenharmony_ci		 * Restart the lookup to catch the extent in the data fork for
39562306a36Sopenharmony_ci		 * the former case, but prevent additional retries to avoid
39662306a36Sopenharmony_ci		 * looping forever for the latter case.
39762306a36Sopenharmony_ci		 */
39862306a36Sopenharmony_ci		if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
39962306a36Sopenharmony_ci			goto retry;
40062306a36Sopenharmony_ci		ASSERT(error != -EAGAIN);
40162306a36Sopenharmony_ci		return error;
40262306a36Sopenharmony_ci	}
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	/*
40562306a36Sopenharmony_ci	 * Due to merging the return real extent might be larger than the
40662306a36Sopenharmony_ci	 * original delalloc one.  Trim the return extent to the next COW
40762306a36Sopenharmony_ci	 * boundary again to force a re-lookup.
40862306a36Sopenharmony_ci	 */
40962306a36Sopenharmony_ci	if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
41062306a36Sopenharmony_ci		loff_t		cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci		if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
41362306a36Sopenharmony_ci			wpc->iomap.length = cow_offset - wpc->iomap.offset;
41462306a36Sopenharmony_ci	}
41562306a36Sopenharmony_ci
41662306a36Sopenharmony_ci	ASSERT(wpc->iomap.offset <= offset);
41762306a36Sopenharmony_ci	ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
41862306a36Sopenharmony_ci	trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
41962306a36Sopenharmony_ci	return 0;
42062306a36Sopenharmony_ci}
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_cistatic int
42362306a36Sopenharmony_cixfs_prepare_ioend(
42462306a36Sopenharmony_ci	struct iomap_ioend	*ioend,
42562306a36Sopenharmony_ci	int			status)
42662306a36Sopenharmony_ci{
42762306a36Sopenharmony_ci	unsigned int		nofs_flag;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	/*
43062306a36Sopenharmony_ci	 * We can allocate memory here while doing writeback on behalf of
43162306a36Sopenharmony_ci	 * memory reclaim.  To avoid memory allocation deadlocks set the
43262306a36Sopenharmony_ci	 * task-wide nofs context for the following operations.
43362306a36Sopenharmony_ci	 */
43462306a36Sopenharmony_ci	nofs_flag = memalloc_nofs_save();
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	/* Convert CoW extents to regular */
43762306a36Sopenharmony_ci	if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
43862306a36Sopenharmony_ci		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
43962306a36Sopenharmony_ci				ioend->io_offset, ioend->io_size);
44062306a36Sopenharmony_ci	}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	memalloc_nofs_restore(nofs_flag);
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci	/* send ioends that might require a transaction to the completion wq */
44562306a36Sopenharmony_ci	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
44662306a36Sopenharmony_ci	    (ioend->io_flags & IOMAP_F_SHARED))
44762306a36Sopenharmony_ci		ioend->io_bio->bi_end_io = xfs_end_bio;
44862306a36Sopenharmony_ci	return status;
44962306a36Sopenharmony_ci}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci/*
45262306a36Sopenharmony_ci * If the folio has delalloc blocks on it, the caller is asking us to punch them
45362306a36Sopenharmony_ci * out. If we don't, we can leave a stale delalloc mapping covered by a clean
45462306a36Sopenharmony_ci * page that needs to be dirtied again before the delalloc mapping can be
45562306a36Sopenharmony_ci * converted. This stale delalloc mapping can trip up a later direct I/O read
45662306a36Sopenharmony_ci * operation on the same region.
45762306a36Sopenharmony_ci *
45862306a36Sopenharmony_ci * We prevent this by truncating away the delalloc regions on the folio. Because
45962306a36Sopenharmony_ci * they are delalloc, we can do this without needing a transaction. Indeed - if
46062306a36Sopenharmony_ci * we get ENOSPC errors, we have to be able to do this truncation without a
46162306a36Sopenharmony_ci * transaction as there is no space left for block reservation (typically why
46262306a36Sopenharmony_ci * we see a ENOSPC in writeback).
46362306a36Sopenharmony_ci */
46462306a36Sopenharmony_cistatic void
46562306a36Sopenharmony_cixfs_discard_folio(
46662306a36Sopenharmony_ci	struct folio		*folio,
46762306a36Sopenharmony_ci	loff_t			pos)
46862306a36Sopenharmony_ci{
46962306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(folio->mapping->host);
47062306a36Sopenharmony_ci	struct xfs_mount	*mp = ip->i_mount;
47162306a36Sopenharmony_ci	int			error;
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	if (xfs_is_shutdown(mp))
47462306a36Sopenharmony_ci		return;
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	xfs_alert_ratelimited(mp,
47762306a36Sopenharmony_ci		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
47862306a36Sopenharmony_ci			folio, ip->i_ino, pos);
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci	/*
48162306a36Sopenharmony_ci	 * The end of the punch range is always the offset of the first
48262306a36Sopenharmony_ci	 * byte of the next folio. Hence the end offset is only dependent on the
48362306a36Sopenharmony_ci	 * folio itself and not the start offset that is passed in.
48462306a36Sopenharmony_ci	 */
48562306a36Sopenharmony_ci	error = xfs_bmap_punch_delalloc_range(ip, pos,
48662306a36Sopenharmony_ci				folio_pos(folio) + folio_size(folio));
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	if (error && !xfs_is_shutdown(mp))
48962306a36Sopenharmony_ci		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
49062306a36Sopenharmony_ci}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_cistatic const struct iomap_writeback_ops xfs_writeback_ops = {
49362306a36Sopenharmony_ci	.map_blocks		= xfs_map_blocks,
49462306a36Sopenharmony_ci	.prepare_ioend		= xfs_prepare_ioend,
49562306a36Sopenharmony_ci	.discard_folio		= xfs_discard_folio,
49662306a36Sopenharmony_ci};
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ciSTATIC int
49962306a36Sopenharmony_cixfs_vm_writepages(
50062306a36Sopenharmony_ci	struct address_space	*mapping,
50162306a36Sopenharmony_ci	struct writeback_control *wbc)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	struct xfs_writepage_ctx wpc = { };
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	/*
50662306a36Sopenharmony_ci	 * Writing back data in a transaction context can result in recursive
50762306a36Sopenharmony_ci	 * transactions. This is bad, so issue a warning and get out of here.
50862306a36Sopenharmony_ci	 */
50962306a36Sopenharmony_ci	if (WARN_ON_ONCE(current->journal_info))
51062306a36Sopenharmony_ci		return 0;
51162306a36Sopenharmony_ci
51262306a36Sopenharmony_ci	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
51362306a36Sopenharmony_ci	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
51462306a36Sopenharmony_ci}
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ciSTATIC int
51762306a36Sopenharmony_cixfs_dax_writepages(
51862306a36Sopenharmony_ci	struct address_space	*mapping,
51962306a36Sopenharmony_ci	struct writeback_control *wbc)
52062306a36Sopenharmony_ci{
52162306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(mapping->host);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	xfs_iflags_clear(ip, XFS_ITRUNCATED);
52462306a36Sopenharmony_ci	return dax_writeback_mapping_range(mapping,
52562306a36Sopenharmony_ci			xfs_inode_buftarg(ip)->bt_daxdev, wbc);
52662306a36Sopenharmony_ci}
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ciSTATIC sector_t
52962306a36Sopenharmony_cixfs_vm_bmap(
53062306a36Sopenharmony_ci	struct address_space	*mapping,
53162306a36Sopenharmony_ci	sector_t		block)
53262306a36Sopenharmony_ci{
53362306a36Sopenharmony_ci	struct xfs_inode	*ip = XFS_I(mapping->host);
53462306a36Sopenharmony_ci
53562306a36Sopenharmony_ci	trace_xfs_vm_bmap(ip);
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	/*
53862306a36Sopenharmony_ci	 * The swap code (ab-)uses ->bmap to get a block mapping and then
53962306a36Sopenharmony_ci	 * bypasses the file system for actual I/O.  We really can't allow
54062306a36Sopenharmony_ci	 * that on reflinks inodes, so we have to skip out here.  And yes,
54162306a36Sopenharmony_ci	 * 0 is the magic code for a bmap error.
54262306a36Sopenharmony_ci	 *
54362306a36Sopenharmony_ci	 * Since we don't pass back blockdev info, we can't return bmap
54462306a36Sopenharmony_ci	 * information for rt files either.
54562306a36Sopenharmony_ci	 */
54662306a36Sopenharmony_ci	if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
54762306a36Sopenharmony_ci		return 0;
54862306a36Sopenharmony_ci	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
54962306a36Sopenharmony_ci}
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ciSTATIC int
55262306a36Sopenharmony_cixfs_vm_read_folio(
55362306a36Sopenharmony_ci	struct file		*unused,
55462306a36Sopenharmony_ci	struct folio		*folio)
55562306a36Sopenharmony_ci{
55662306a36Sopenharmony_ci	return iomap_read_folio(folio, &xfs_read_iomap_ops);
55762306a36Sopenharmony_ci}
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ciSTATIC void
56062306a36Sopenharmony_cixfs_vm_readahead(
56162306a36Sopenharmony_ci	struct readahead_control	*rac)
56262306a36Sopenharmony_ci{
56362306a36Sopenharmony_ci	iomap_readahead(rac, &xfs_read_iomap_ops);
56462306a36Sopenharmony_ci}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_cistatic int
56762306a36Sopenharmony_cixfs_iomap_swapfile_activate(
56862306a36Sopenharmony_ci	struct swap_info_struct		*sis,
56962306a36Sopenharmony_ci	struct file			*swap_file,
57062306a36Sopenharmony_ci	sector_t			*span)
57162306a36Sopenharmony_ci{
57262306a36Sopenharmony_ci	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
57362306a36Sopenharmony_ci	return iomap_swapfile_activate(sis, swap_file, span,
57462306a36Sopenharmony_ci			&xfs_read_iomap_ops);
57562306a36Sopenharmony_ci}
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ciconst struct address_space_operations xfs_address_space_operations = {
57862306a36Sopenharmony_ci	.read_folio		= xfs_vm_read_folio,
57962306a36Sopenharmony_ci	.readahead		= xfs_vm_readahead,
58062306a36Sopenharmony_ci	.writepages		= xfs_vm_writepages,
58162306a36Sopenharmony_ci	.dirty_folio		= iomap_dirty_folio,
58262306a36Sopenharmony_ci	.release_folio		= iomap_release_folio,
58362306a36Sopenharmony_ci	.invalidate_folio	= iomap_invalidate_folio,
58462306a36Sopenharmony_ci	.bmap			= xfs_vm_bmap,
58562306a36Sopenharmony_ci	.migrate_folio		= filemap_migrate_folio,
58662306a36Sopenharmony_ci	.is_partially_uptodate  = iomap_is_partially_uptodate,
58762306a36Sopenharmony_ci	.error_remove_page	= generic_error_remove_page,
58862306a36Sopenharmony_ci	.swap_activate		= xfs_iomap_swapfile_activate,
58962306a36Sopenharmony_ci};
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ciconst struct address_space_operations xfs_dax_aops = {
59262306a36Sopenharmony_ci	.writepages		= xfs_dax_writepages,
59362306a36Sopenharmony_ci	.dirty_folio		= noop_dirty_folio,
59462306a36Sopenharmony_ci	.swap_activate		= xfs_iomap_swapfile_activate,
59562306a36Sopenharmony_ci};
596