162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * Copyright (c) 2016-2018 Christoph Hellwig. 562306a36Sopenharmony_ci * All Rights Reserved. 662306a36Sopenharmony_ci */ 762306a36Sopenharmony_ci#include "xfs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_log_format.h" 1162306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1262306a36Sopenharmony_ci#include "xfs_mount.h" 1362306a36Sopenharmony_ci#include "xfs_inode.h" 1462306a36Sopenharmony_ci#include "xfs_trans.h" 1562306a36Sopenharmony_ci#include "xfs_iomap.h" 1662306a36Sopenharmony_ci#include "xfs_trace.h" 1762306a36Sopenharmony_ci#include "xfs_bmap.h" 1862306a36Sopenharmony_ci#include "xfs_bmap_util.h" 1962306a36Sopenharmony_ci#include "xfs_reflink.h" 2062306a36Sopenharmony_ci#include "xfs_errortag.h" 2162306a36Sopenharmony_ci#include "xfs_error.h" 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cistruct xfs_writepage_ctx { 2462306a36Sopenharmony_ci struct iomap_writepage_ctx ctx; 2562306a36Sopenharmony_ci unsigned int data_seq; 2662306a36Sopenharmony_ci unsigned int cow_seq; 2762306a36Sopenharmony_ci}; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic inline struct xfs_writepage_ctx * 3062306a36Sopenharmony_ciXFS_WPC(struct iomap_writepage_ctx *ctx) 3162306a36Sopenharmony_ci{ 3262306a36Sopenharmony_ci return container_of(ctx, struct xfs_writepage_ctx, ctx); 3362306a36Sopenharmony_ci} 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci/* 3662306a36Sopenharmony_ci * Fast and loose check if this write could update the on-disk inode size. 3762306a36Sopenharmony_ci */ 3862306a36Sopenharmony_cistatic inline bool xfs_ioend_is_append(struct iomap_ioend *ioend) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci return ioend->io_offset + ioend->io_size > 4162306a36Sopenharmony_ci XFS_I(ioend->io_inode)->i_disk_size; 4262306a36Sopenharmony_ci} 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/* 4562306a36Sopenharmony_ci * Update on-disk file size now that data has been written to disk. 4662306a36Sopenharmony_ci */ 4762306a36Sopenharmony_ciint 4862306a36Sopenharmony_cixfs_setfilesize( 4962306a36Sopenharmony_ci struct xfs_inode *ip, 5062306a36Sopenharmony_ci xfs_off_t offset, 5162306a36Sopenharmony_ci size_t size) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 5462306a36Sopenharmony_ci struct xfs_trans *tp; 5562306a36Sopenharmony_ci xfs_fsize_t isize; 5662306a36Sopenharmony_ci int error; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 5962306a36Sopenharmony_ci if (error) 6062306a36Sopenharmony_ci return error; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_EXCL); 6362306a36Sopenharmony_ci isize = xfs_new_eof(ip, offset + size); 6462306a36Sopenharmony_ci if (!isize) { 6562306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_EXCL); 6662306a36Sopenharmony_ci xfs_trans_cancel(tp); 6762306a36Sopenharmony_ci return 0; 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci trace_xfs_setfilesize(ip, offset, size); 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci ip->i_disk_size = isize; 7362306a36Sopenharmony_ci xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 7462306a36Sopenharmony_ci xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci return xfs_trans_commit(tp); 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci/* 8062306a36Sopenharmony_ci * IO write completion. 8162306a36Sopenharmony_ci */ 8262306a36Sopenharmony_ciSTATIC void 8362306a36Sopenharmony_cixfs_end_ioend( 8462306a36Sopenharmony_ci struct iomap_ioend *ioend) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(ioend->io_inode); 8762306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 8862306a36Sopenharmony_ci xfs_off_t offset = ioend->io_offset; 8962306a36Sopenharmony_ci size_t size = ioend->io_size; 9062306a36Sopenharmony_ci unsigned int nofs_flag; 9162306a36Sopenharmony_ci int error; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci /* 9462306a36Sopenharmony_ci * We can allocate memory here while doing writeback on behalf of 9562306a36Sopenharmony_ci * memory reclaim. To avoid memory allocation deadlocks set the 9662306a36Sopenharmony_ci * task-wide nofs context for the following operations. 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci /* 10162306a36Sopenharmony_ci * Just clean up the in-memory structures if the fs has been shut down. 10262306a36Sopenharmony_ci */ 10362306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) { 10462306a36Sopenharmony_ci error = -EIO; 10562306a36Sopenharmony_ci goto done; 10662306a36Sopenharmony_ci } 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci /* 10962306a36Sopenharmony_ci * Clean up all COW blocks and underlying data fork delalloc blocks on 11062306a36Sopenharmony_ci * I/O error. The delalloc punch is required because this ioend was 11162306a36Sopenharmony_ci * mapped to blocks in the COW fork and the associated pages are no 11262306a36Sopenharmony_ci * longer dirty. If we don't remove delalloc blocks here, they become 11362306a36Sopenharmony_ci * stale and can corrupt free space accounting on unmount. 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci error = blk_status_to_errno(ioend->io_bio->bi_status); 11662306a36Sopenharmony_ci if (unlikely(error)) { 11762306a36Sopenharmony_ci if (ioend->io_flags & IOMAP_F_SHARED) { 11862306a36Sopenharmony_ci xfs_reflink_cancel_cow_range(ip, offset, size, true); 11962306a36Sopenharmony_ci xfs_bmap_punch_delalloc_range(ip, offset, 12062306a36Sopenharmony_ci offset + size); 12162306a36Sopenharmony_ci } 12262306a36Sopenharmony_ci goto done; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci /* 12662306a36Sopenharmony_ci * Success: commit the COW or unwritten blocks if needed. 12762306a36Sopenharmony_ci */ 12862306a36Sopenharmony_ci if (ioend->io_flags & IOMAP_F_SHARED) 12962306a36Sopenharmony_ci error = xfs_reflink_end_cow(ip, offset, size); 13062306a36Sopenharmony_ci else if (ioend->io_type == IOMAP_UNWRITTEN) 13162306a36Sopenharmony_ci error = xfs_iomap_write_unwritten(ip, offset, size, false); 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci if (!error && xfs_ioend_is_append(ioend)) 13462306a36Sopenharmony_ci error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 13562306a36Sopenharmony_cidone: 13662306a36Sopenharmony_ci iomap_finish_ioends(ioend, error); 13762306a36Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci/* 14162306a36Sopenharmony_ci * Finish all pending IO completions that require transactional modifications. 14262306a36Sopenharmony_ci * 14362306a36Sopenharmony_ci * We try to merge physical and logically contiguous ioends before completion to 14462306a36Sopenharmony_ci * minimise the number of transactions we need to perform during IO completion. 14562306a36Sopenharmony_ci * Both unwritten extent conversion and COW remapping need to iterate and modify 14662306a36Sopenharmony_ci * one physical extent at a time, so we gain nothing by merging physically 14762306a36Sopenharmony_ci * discontiguous extents here. 14862306a36Sopenharmony_ci * 14962306a36Sopenharmony_ci * The ioend chain length that we can be processing here is largely unbound in 15062306a36Sopenharmony_ci * length and we may have to perform significant amounts of work on each ioend 15162306a36Sopenharmony_ci * to complete it. Hence we have to be careful about holding the CPU for too 15262306a36Sopenharmony_ci * long in this loop. 15362306a36Sopenharmony_ci */ 15462306a36Sopenharmony_civoid 15562306a36Sopenharmony_cixfs_end_io( 15662306a36Sopenharmony_ci struct work_struct *work) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci struct xfs_inode *ip = 15962306a36Sopenharmony_ci container_of(work, struct xfs_inode, i_ioend_work); 16062306a36Sopenharmony_ci struct iomap_ioend *ioend; 16162306a36Sopenharmony_ci struct list_head tmp; 16262306a36Sopenharmony_ci unsigned long flags; 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci spin_lock_irqsave(&ip->i_ioend_lock, flags); 16562306a36Sopenharmony_ci list_replace_init(&ip->i_ioend_list, &tmp); 16662306a36Sopenharmony_ci spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci iomap_sort_ioends(&tmp); 16962306a36Sopenharmony_ci while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend, 17062306a36Sopenharmony_ci io_list))) { 17162306a36Sopenharmony_ci list_del_init(&ioend->io_list); 17262306a36Sopenharmony_ci iomap_ioend_try_merge(ioend, &tmp); 17362306a36Sopenharmony_ci xfs_end_ioend(ioend); 17462306a36Sopenharmony_ci cond_resched(); 17562306a36Sopenharmony_ci } 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ciSTATIC void 17962306a36Sopenharmony_cixfs_end_bio( 18062306a36Sopenharmony_ci struct bio *bio) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci struct iomap_ioend *ioend = bio->bi_private; 18362306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(ioend->io_inode); 18462306a36Sopenharmony_ci unsigned long flags; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci spin_lock_irqsave(&ip->i_ioend_lock, flags); 18762306a36Sopenharmony_ci if (list_empty(&ip->i_ioend_list)) 18862306a36Sopenharmony_ci WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue, 18962306a36Sopenharmony_ci &ip->i_ioend_work)); 19062306a36Sopenharmony_ci list_add_tail(&ioend->io_list, &ip->i_ioend_list); 19162306a36Sopenharmony_ci spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 19262306a36Sopenharmony_ci} 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci/* 19562306a36Sopenharmony_ci * Fast revalidation of the cached writeback mapping. Return true if the current 19662306a36Sopenharmony_ci * mapping is valid, false otherwise. 19762306a36Sopenharmony_ci */ 19862306a36Sopenharmony_cistatic bool 19962306a36Sopenharmony_cixfs_imap_valid( 20062306a36Sopenharmony_ci struct iomap_writepage_ctx *wpc, 20162306a36Sopenharmony_ci struct xfs_inode *ip, 20262306a36Sopenharmony_ci loff_t offset) 20362306a36Sopenharmony_ci{ 20462306a36Sopenharmony_ci if (offset < wpc->iomap.offset || 20562306a36Sopenharmony_ci offset >= wpc->iomap.offset + wpc->iomap.length) 20662306a36Sopenharmony_ci return false; 20762306a36Sopenharmony_ci /* 20862306a36Sopenharmony_ci * If this is a COW mapping, it is sufficient to check that the mapping 20962306a36Sopenharmony_ci * covers the offset. Be careful to check this first because the caller 21062306a36Sopenharmony_ci * can revalidate a COW mapping without updating the data seqno. 21162306a36Sopenharmony_ci */ 21262306a36Sopenharmony_ci if (wpc->iomap.flags & IOMAP_F_SHARED) 21362306a36Sopenharmony_ci return true; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci /* 21662306a36Sopenharmony_ci * This is not a COW mapping. Check the sequence number of the data fork 21762306a36Sopenharmony_ci * because concurrent changes could have invalidated the extent. Check 21862306a36Sopenharmony_ci * the COW fork because concurrent changes since the last time we 21962306a36Sopenharmony_ci * checked (and found nothing at this offset) could have added 22062306a36Sopenharmony_ci * overlapping blocks. 22162306a36Sopenharmony_ci */ 22262306a36Sopenharmony_ci if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) { 22362306a36Sopenharmony_ci trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap, 22462306a36Sopenharmony_ci XFS_WPC(wpc)->data_seq, XFS_DATA_FORK); 22562306a36Sopenharmony_ci return false; 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci if (xfs_inode_has_cow_data(ip) && 22862306a36Sopenharmony_ci XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) { 22962306a36Sopenharmony_ci trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap, 23062306a36Sopenharmony_ci XFS_WPC(wpc)->cow_seq, XFS_COW_FORK); 23162306a36Sopenharmony_ci return false; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci return true; 23462306a36Sopenharmony_ci} 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci/* 23762306a36Sopenharmony_ci * Pass in a dellalloc extent and convert it to real extents, return the real 23862306a36Sopenharmony_ci * extent that maps offset_fsb in wpc->iomap. 23962306a36Sopenharmony_ci * 24062306a36Sopenharmony_ci * The current page is held locked so nothing could have removed the block 24162306a36Sopenharmony_ci * backing offset_fsb, although it could have moved from the COW to the data 24262306a36Sopenharmony_ci * fork by another thread. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_cistatic int 24562306a36Sopenharmony_cixfs_convert_blocks( 24662306a36Sopenharmony_ci struct iomap_writepage_ctx *wpc, 24762306a36Sopenharmony_ci struct xfs_inode *ip, 24862306a36Sopenharmony_ci int whichfork, 24962306a36Sopenharmony_ci loff_t offset) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci int error; 25262306a36Sopenharmony_ci unsigned *seq; 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci if (whichfork == XFS_COW_FORK) 25562306a36Sopenharmony_ci seq = &XFS_WPC(wpc)->cow_seq; 25662306a36Sopenharmony_ci else 25762306a36Sopenharmony_ci seq = &XFS_WPC(wpc)->data_seq; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci /* 26062306a36Sopenharmony_ci * Attempt to allocate whatever delalloc extent currently backs offset 26162306a36Sopenharmony_ci * and put the result into wpc->iomap. Allocate in a loop because it 26262306a36Sopenharmony_ci * may take several attempts to allocate real blocks for a contiguous 26362306a36Sopenharmony_ci * delalloc extent if free space is sufficiently fragmented. 26462306a36Sopenharmony_ci */ 26562306a36Sopenharmony_ci do { 26662306a36Sopenharmony_ci error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, 26762306a36Sopenharmony_ci &wpc->iomap, seq); 26862306a36Sopenharmony_ci if (error) 26962306a36Sopenharmony_ci return error; 27062306a36Sopenharmony_ci } while (wpc->iomap.offset + wpc->iomap.length <= offset); 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci return 0; 27362306a36Sopenharmony_ci} 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_cistatic int 27662306a36Sopenharmony_cixfs_map_blocks( 27762306a36Sopenharmony_ci struct iomap_writepage_ctx *wpc, 27862306a36Sopenharmony_ci struct inode *inode, 27962306a36Sopenharmony_ci loff_t offset) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(inode); 28262306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 28362306a36Sopenharmony_ci ssize_t count = i_blocksize(inode); 28462306a36Sopenharmony_ci xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 28562306a36Sopenharmony_ci xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 28662306a36Sopenharmony_ci xfs_fileoff_t cow_fsb; 28762306a36Sopenharmony_ci int whichfork; 28862306a36Sopenharmony_ci struct xfs_bmbt_irec imap; 28962306a36Sopenharmony_ci struct xfs_iext_cursor icur; 29062306a36Sopenharmony_ci int retries = 0; 29162306a36Sopenharmony_ci int error = 0; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) 29462306a36Sopenharmony_ci return -EIO; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci /* 29962306a36Sopenharmony_ci * COW fork blocks can overlap data fork blocks even if the blocks 30062306a36Sopenharmony_ci * aren't shared. COW I/O always takes precedent, so we must always 30162306a36Sopenharmony_ci * check for overlap on reflink inodes unless the mapping is already a 30262306a36Sopenharmony_ci * COW one, or the COW fork hasn't changed from the last time we looked 30362306a36Sopenharmony_ci * at it. 30462306a36Sopenharmony_ci * 30562306a36Sopenharmony_ci * It's safe to check the COW fork if_seq here without the ILOCK because 30662306a36Sopenharmony_ci * we've indirectly protected against concurrent updates: writeback has 30762306a36Sopenharmony_ci * the page locked, which prevents concurrent invalidations by reflink 30862306a36Sopenharmony_ci * and directio and prevents concurrent buffered writes to the same 30962306a36Sopenharmony_ci * page. Changes to if_seq always happen under i_lock, which protects 31062306a36Sopenharmony_ci * against concurrent updates and provides a memory barrier on the way 31162306a36Sopenharmony_ci * out that ensures that we always see the current value. 31262306a36Sopenharmony_ci */ 31362306a36Sopenharmony_ci if (xfs_imap_valid(wpc, ip, offset)) 31462306a36Sopenharmony_ci return 0; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* 31762306a36Sopenharmony_ci * If we don't have a valid map, now it's time to get a new one for this 31862306a36Sopenharmony_ci * offset. This will convert delayed allocations (including COW ones) 31962306a36Sopenharmony_ci * into real extents. If we return without a valid map, it means we 32062306a36Sopenharmony_ci * landed in a hole and we skip the block. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ciretry: 32362306a36Sopenharmony_ci cow_fsb = NULLFILEOFF; 32462306a36Sopenharmony_ci whichfork = XFS_DATA_FORK; 32562306a36Sopenharmony_ci xfs_ilock(ip, XFS_ILOCK_SHARED); 32662306a36Sopenharmony_ci ASSERT(!xfs_need_iread_extents(&ip->i_df)); 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ci /* 32962306a36Sopenharmony_ci * Check if this is offset is covered by a COW extents, and if yes use 33062306a36Sopenharmony_ci * it directly instead of looking up anything in the data fork. 33162306a36Sopenharmony_ci */ 33262306a36Sopenharmony_ci if (xfs_inode_has_cow_data(ip) && 33362306a36Sopenharmony_ci xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) 33462306a36Sopenharmony_ci cow_fsb = imap.br_startoff; 33562306a36Sopenharmony_ci if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 33662306a36Sopenharmony_ci XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); 33762306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci whichfork = XFS_COW_FORK; 34062306a36Sopenharmony_ci goto allocate_blocks; 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci /* 34462306a36Sopenharmony_ci * No COW extent overlap. Revalidate now that we may have updated 34562306a36Sopenharmony_ci * ->cow_seq. If the data mapping is still valid, we're done. 34662306a36Sopenharmony_ci */ 34762306a36Sopenharmony_ci if (xfs_imap_valid(wpc, ip, offset)) { 34862306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 34962306a36Sopenharmony_ci return 0; 35062306a36Sopenharmony_ci } 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci /* 35362306a36Sopenharmony_ci * If we don't have a valid map, now it's time to get a new one for this 35462306a36Sopenharmony_ci * offset. This will convert delayed allocations (including COW ones) 35562306a36Sopenharmony_ci * into real extents. 35662306a36Sopenharmony_ci */ 35762306a36Sopenharmony_ci if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) 35862306a36Sopenharmony_ci imap.br_startoff = end_fsb; /* fake a hole past EOF */ 35962306a36Sopenharmony_ci XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq); 36062306a36Sopenharmony_ci xfs_iunlock(ip, XFS_ILOCK_SHARED); 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci /* landed in a hole or beyond EOF? */ 36362306a36Sopenharmony_ci if (imap.br_startoff > offset_fsb) { 36462306a36Sopenharmony_ci imap.br_blockcount = imap.br_startoff - offset_fsb; 36562306a36Sopenharmony_ci imap.br_startoff = offset_fsb; 36662306a36Sopenharmony_ci imap.br_startblock = HOLESTARTBLOCK; 36762306a36Sopenharmony_ci imap.br_state = XFS_EXT_NORM; 36862306a36Sopenharmony_ci } 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci /* 37162306a36Sopenharmony_ci * Truncate to the next COW extent if there is one. This is the only 37262306a36Sopenharmony_ci * opportunity to do this because we can skip COW fork lookups for the 37362306a36Sopenharmony_ci * subsequent blocks in the mapping; however, the requirement to treat 37462306a36Sopenharmony_ci * the COW range separately remains. 37562306a36Sopenharmony_ci */ 37662306a36Sopenharmony_ci if (cow_fsb != NULLFILEOFF && 37762306a36Sopenharmony_ci cow_fsb < imap.br_startoff + imap.br_blockcount) 37862306a36Sopenharmony_ci imap.br_blockcount = cow_fsb - imap.br_startoff; 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci /* got a delalloc extent? */ 38162306a36Sopenharmony_ci if (imap.br_startblock != HOLESTARTBLOCK && 38262306a36Sopenharmony_ci isnullstartblock(imap.br_startblock)) 38362306a36Sopenharmony_ci goto allocate_blocks; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); 38662306a36Sopenharmony_ci trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); 38762306a36Sopenharmony_ci return 0; 38862306a36Sopenharmony_ciallocate_blocks: 38962306a36Sopenharmony_ci error = xfs_convert_blocks(wpc, ip, whichfork, offset); 39062306a36Sopenharmony_ci if (error) { 39162306a36Sopenharmony_ci /* 39262306a36Sopenharmony_ci * If we failed to find the extent in the COW fork we might have 39362306a36Sopenharmony_ci * raced with a COW to data fork conversion or truncate. 39462306a36Sopenharmony_ci * Restart the lookup to catch the extent in the data fork for 39562306a36Sopenharmony_ci * the former case, but prevent additional retries to avoid 39662306a36Sopenharmony_ci * looping forever for the latter case. 39762306a36Sopenharmony_ci */ 39862306a36Sopenharmony_ci if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++) 39962306a36Sopenharmony_ci goto retry; 40062306a36Sopenharmony_ci ASSERT(error != -EAGAIN); 40162306a36Sopenharmony_ci return error; 40262306a36Sopenharmony_ci } 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci /* 40562306a36Sopenharmony_ci * Due to merging the return real extent might be larger than the 40662306a36Sopenharmony_ci * original delalloc one. Trim the return extent to the next COW 40762306a36Sopenharmony_ci * boundary again to force a re-lookup. 40862306a36Sopenharmony_ci */ 40962306a36Sopenharmony_ci if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) { 41062306a36Sopenharmony_ci loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb); 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci if (cow_offset < wpc->iomap.offset + wpc->iomap.length) 41362306a36Sopenharmony_ci wpc->iomap.length = cow_offset - wpc->iomap.offset; 41462306a36Sopenharmony_ci } 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci ASSERT(wpc->iomap.offset <= offset); 41762306a36Sopenharmony_ci ASSERT(wpc->iomap.offset + wpc->iomap.length > offset); 41862306a36Sopenharmony_ci trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap); 41962306a36Sopenharmony_ci return 0; 42062306a36Sopenharmony_ci} 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_cistatic int 42362306a36Sopenharmony_cixfs_prepare_ioend( 42462306a36Sopenharmony_ci struct iomap_ioend *ioend, 42562306a36Sopenharmony_ci int status) 42662306a36Sopenharmony_ci{ 42762306a36Sopenharmony_ci unsigned int nofs_flag; 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci /* 43062306a36Sopenharmony_ci * We can allocate memory here while doing writeback on behalf of 43162306a36Sopenharmony_ci * memory reclaim. To avoid memory allocation deadlocks set the 43262306a36Sopenharmony_ci * task-wide nofs context for the following operations. 43362306a36Sopenharmony_ci */ 43462306a36Sopenharmony_ci nofs_flag = memalloc_nofs_save(); 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci /* Convert CoW extents to regular */ 43762306a36Sopenharmony_ci if (!status && (ioend->io_flags & IOMAP_F_SHARED)) { 43862306a36Sopenharmony_ci status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 43962306a36Sopenharmony_ci ioend->io_offset, ioend->io_size); 44062306a36Sopenharmony_ci } 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci memalloc_nofs_restore(nofs_flag); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci /* send ioends that might require a transaction to the completion wq */ 44562306a36Sopenharmony_ci if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || 44662306a36Sopenharmony_ci (ioend->io_flags & IOMAP_F_SHARED)) 44762306a36Sopenharmony_ci ioend->io_bio->bi_end_io = xfs_end_bio; 44862306a36Sopenharmony_ci return status; 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci/* 45262306a36Sopenharmony_ci * If the folio has delalloc blocks on it, the caller is asking us to punch them 45362306a36Sopenharmony_ci * out. If we don't, we can leave a stale delalloc mapping covered by a clean 45462306a36Sopenharmony_ci * page that needs to be dirtied again before the delalloc mapping can be 45562306a36Sopenharmony_ci * converted. This stale delalloc mapping can trip up a later direct I/O read 45662306a36Sopenharmony_ci * operation on the same region. 45762306a36Sopenharmony_ci * 45862306a36Sopenharmony_ci * We prevent this by truncating away the delalloc regions on the folio. Because 45962306a36Sopenharmony_ci * they are delalloc, we can do this without needing a transaction. Indeed - if 46062306a36Sopenharmony_ci * we get ENOSPC errors, we have to be able to do this truncation without a 46162306a36Sopenharmony_ci * transaction as there is no space left for block reservation (typically why 46262306a36Sopenharmony_ci * we see a ENOSPC in writeback). 46362306a36Sopenharmony_ci */ 46462306a36Sopenharmony_cistatic void 46562306a36Sopenharmony_cixfs_discard_folio( 46662306a36Sopenharmony_ci struct folio *folio, 46762306a36Sopenharmony_ci loff_t pos) 46862306a36Sopenharmony_ci{ 46962306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(folio->mapping->host); 47062306a36Sopenharmony_ci struct xfs_mount *mp = ip->i_mount; 47162306a36Sopenharmony_ci int error; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (xfs_is_shutdown(mp)) 47462306a36Sopenharmony_ci return; 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci xfs_alert_ratelimited(mp, 47762306a36Sopenharmony_ci "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", 47862306a36Sopenharmony_ci folio, ip->i_ino, pos); 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci /* 48162306a36Sopenharmony_ci * The end of the punch range is always the offset of the first 48262306a36Sopenharmony_ci * byte of the next folio. Hence the end offset is only dependent on the 48362306a36Sopenharmony_ci * folio itself and not the start offset that is passed in. 48462306a36Sopenharmony_ci */ 48562306a36Sopenharmony_ci error = xfs_bmap_punch_delalloc_range(ip, pos, 48662306a36Sopenharmony_ci folio_pos(folio) + folio_size(folio)); 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci if (error && !xfs_is_shutdown(mp)) 48962306a36Sopenharmony_ci xfs_alert(mp, "page discard unable to remove delalloc mapping."); 49062306a36Sopenharmony_ci} 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_cistatic const struct iomap_writeback_ops xfs_writeback_ops = { 49362306a36Sopenharmony_ci .map_blocks = xfs_map_blocks, 49462306a36Sopenharmony_ci .prepare_ioend = xfs_prepare_ioend, 49562306a36Sopenharmony_ci .discard_folio = xfs_discard_folio, 49662306a36Sopenharmony_ci}; 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ciSTATIC int 49962306a36Sopenharmony_cixfs_vm_writepages( 50062306a36Sopenharmony_ci struct address_space *mapping, 50162306a36Sopenharmony_ci struct writeback_control *wbc) 50262306a36Sopenharmony_ci{ 50362306a36Sopenharmony_ci struct xfs_writepage_ctx wpc = { }; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci /* 50662306a36Sopenharmony_ci * Writing back data in a transaction context can result in recursive 50762306a36Sopenharmony_ci * transactions. This is bad, so issue a warning and get out of here. 50862306a36Sopenharmony_ci */ 50962306a36Sopenharmony_ci if (WARN_ON_ONCE(current->journal_info)) 51062306a36Sopenharmony_ci return 0; 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 51362306a36Sopenharmony_ci return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); 51462306a36Sopenharmony_ci} 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ciSTATIC int 51762306a36Sopenharmony_cixfs_dax_writepages( 51862306a36Sopenharmony_ci struct address_space *mapping, 51962306a36Sopenharmony_ci struct writeback_control *wbc) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(mapping->host); 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_ITRUNCATED); 52462306a36Sopenharmony_ci return dax_writeback_mapping_range(mapping, 52562306a36Sopenharmony_ci xfs_inode_buftarg(ip)->bt_daxdev, wbc); 52662306a36Sopenharmony_ci} 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ciSTATIC sector_t 52962306a36Sopenharmony_cixfs_vm_bmap( 53062306a36Sopenharmony_ci struct address_space *mapping, 53162306a36Sopenharmony_ci sector_t block) 53262306a36Sopenharmony_ci{ 53362306a36Sopenharmony_ci struct xfs_inode *ip = XFS_I(mapping->host); 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci trace_xfs_vm_bmap(ip); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci /* 53862306a36Sopenharmony_ci * The swap code (ab-)uses ->bmap to get a block mapping and then 53962306a36Sopenharmony_ci * bypasses the file system for actual I/O. We really can't allow 54062306a36Sopenharmony_ci * that on reflinks inodes, so we have to skip out here. And yes, 54162306a36Sopenharmony_ci * 0 is the magic code for a bmap error. 54262306a36Sopenharmony_ci * 54362306a36Sopenharmony_ci * Since we don't pass back blockdev info, we can't return bmap 54462306a36Sopenharmony_ci * information for rt files either. 54562306a36Sopenharmony_ci */ 54662306a36Sopenharmony_ci if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)) 54762306a36Sopenharmony_ci return 0; 54862306a36Sopenharmony_ci return iomap_bmap(mapping, block, &xfs_read_iomap_ops); 54962306a36Sopenharmony_ci} 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ciSTATIC int 55262306a36Sopenharmony_cixfs_vm_read_folio( 55362306a36Sopenharmony_ci struct file *unused, 55462306a36Sopenharmony_ci struct folio *folio) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci return iomap_read_folio(folio, &xfs_read_iomap_ops); 55762306a36Sopenharmony_ci} 55862306a36Sopenharmony_ci 55962306a36Sopenharmony_ciSTATIC void 56062306a36Sopenharmony_cixfs_vm_readahead( 56162306a36Sopenharmony_ci struct readahead_control *rac) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci iomap_readahead(rac, &xfs_read_iomap_ops); 56462306a36Sopenharmony_ci} 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_cistatic int 56762306a36Sopenharmony_cixfs_iomap_swapfile_activate( 56862306a36Sopenharmony_ci struct swap_info_struct *sis, 56962306a36Sopenharmony_ci struct file *swap_file, 57062306a36Sopenharmony_ci sector_t *span) 57162306a36Sopenharmony_ci{ 57262306a36Sopenharmony_ci sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; 57362306a36Sopenharmony_ci return iomap_swapfile_activate(sis, swap_file, span, 57462306a36Sopenharmony_ci &xfs_read_iomap_ops); 57562306a36Sopenharmony_ci} 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ciconst struct address_space_operations xfs_address_space_operations = { 57862306a36Sopenharmony_ci .read_folio = xfs_vm_read_folio, 57962306a36Sopenharmony_ci .readahead = xfs_vm_readahead, 58062306a36Sopenharmony_ci .writepages = xfs_vm_writepages, 58162306a36Sopenharmony_ci .dirty_folio = iomap_dirty_folio, 58262306a36Sopenharmony_ci .release_folio = iomap_release_folio, 58362306a36Sopenharmony_ci .invalidate_folio = iomap_invalidate_folio, 58462306a36Sopenharmony_ci .bmap = xfs_vm_bmap, 58562306a36Sopenharmony_ci .migrate_folio = filemap_migrate_folio, 58662306a36Sopenharmony_ci .is_partially_uptodate = iomap_is_partially_uptodate, 58762306a36Sopenharmony_ci .error_remove_page = generic_error_remove_page, 58862306a36Sopenharmony_ci .swap_activate = xfs_iomap_swapfile_activate, 58962306a36Sopenharmony_ci}; 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ciconst struct address_space_operations xfs_dax_aops = { 59262306a36Sopenharmony_ci .writepages = xfs_dax_writepages, 59362306a36Sopenharmony_ci .dirty_folio = noop_dirty_folio, 59462306a36Sopenharmony_ci .swap_activate = xfs_iomap_swapfile_activate, 59562306a36Sopenharmony_ci}; 596