162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2005 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_log_format.h" 1162306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1262306a36Sopenharmony_ci#include "xfs_bit.h" 1362306a36Sopenharmony_ci#include "xfs_mount.h" 1462306a36Sopenharmony_ci#include "xfs_trans.h" 1562306a36Sopenharmony_ci#include "xfs_trans_priv.h" 1662306a36Sopenharmony_ci#include "xfs_buf_item.h" 1762306a36Sopenharmony_ci#include "xfs_inode.h" 1862306a36Sopenharmony_ci#include "xfs_inode_item.h" 1962306a36Sopenharmony_ci#include "xfs_quota.h" 2062306a36Sopenharmony_ci#include "xfs_dquot_item.h" 2162306a36Sopenharmony_ci#include "xfs_dquot.h" 2262306a36Sopenharmony_ci#include "xfs_trace.h" 2362306a36Sopenharmony_ci#include "xfs_log.h" 2462306a36Sopenharmony_ci#include "xfs_log_priv.h" 2562306a36Sopenharmony_ci 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistruct kmem_cache *xfs_buf_item_cache; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) 3062306a36Sopenharmony_ci{ 3162306a36Sopenharmony_ci return container_of(lip, struct xfs_buf_log_item, bli_item); 3262306a36Sopenharmony_ci} 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci/* Is this log iovec plausibly large enough to contain the buffer log format? */ 3562306a36Sopenharmony_cibool 3662306a36Sopenharmony_cixfs_buf_log_check_iovec( 3762306a36Sopenharmony_ci struct xfs_log_iovec *iovec) 3862306a36Sopenharmony_ci{ 3962306a36Sopenharmony_ci struct xfs_buf_log_format *blfp = iovec->i_addr; 4062306a36Sopenharmony_ci char *bmp_end; 4162306a36Sopenharmony_ci char *item_end; 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len) 4462306a36Sopenharmony_ci return false; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci item_end = (char *)iovec->i_addr + iovec->i_len; 4762306a36Sopenharmony_ci bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size]; 4862306a36Sopenharmony_ci return bmp_end <= item_end; 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic inline int 5262306a36Sopenharmony_cixfs_buf_log_format_size( 5362306a36Sopenharmony_ci struct xfs_buf_log_format *blfp) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci return offsetof(struct xfs_buf_log_format, blf_data_map) + 5662306a36Sopenharmony_ci (blfp->blf_map_size * sizeof(blfp->blf_data_map[0])); 5762306a36Sopenharmony_ci} 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_cistatic inline bool 6062306a36Sopenharmony_cixfs_buf_item_straddle( 6162306a36Sopenharmony_ci struct xfs_buf *bp, 6262306a36Sopenharmony_ci uint offset, 6362306a36Sopenharmony_ci int first_bit, 6462306a36Sopenharmony_ci int nbits) 6562306a36Sopenharmony_ci{ 6662306a36Sopenharmony_ci void *first, *last; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci first = xfs_buf_offset(bp, offset + (first_bit << XFS_BLF_SHIFT)); 6962306a36Sopenharmony_ci last = xfs_buf_offset(bp, 7062306a36Sopenharmony_ci offset + ((first_bit + nbits) << XFS_BLF_SHIFT)); 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci if (last - first != nbits * XFS_BLF_CHUNK) 7362306a36Sopenharmony_ci return true; 7462306a36Sopenharmony_ci return false; 7562306a36Sopenharmony_ci} 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci/* 7862306a36Sopenharmony_ci * Return the number of log iovecs and space needed to log the given buf log 7962306a36Sopenharmony_ci * item segment. 8062306a36Sopenharmony_ci * 8162306a36Sopenharmony_ci * It calculates this as 1 iovec for the buf log format structure and 1 for each 8262306a36Sopenharmony_ci * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged 8362306a36Sopenharmony_ci * in a single iovec. 8462306a36Sopenharmony_ci */ 8562306a36Sopenharmony_ciSTATIC void 8662306a36Sopenharmony_cixfs_buf_item_size_segment( 8762306a36Sopenharmony_ci struct xfs_buf_log_item *bip, 8862306a36Sopenharmony_ci struct xfs_buf_log_format *blfp, 8962306a36Sopenharmony_ci uint offset, 9062306a36Sopenharmony_ci int *nvecs, 9162306a36Sopenharmony_ci int *nbytes) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 9462306a36Sopenharmony_ci int first_bit; 9562306a36Sopenharmony_ci int nbits; 9662306a36Sopenharmony_ci int next_bit; 9762306a36Sopenharmony_ci int last_bit; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 10062306a36Sopenharmony_ci if (first_bit == -1) 10162306a36Sopenharmony_ci return; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci (*nvecs)++; 10462306a36Sopenharmony_ci *nbytes += xfs_buf_log_format_size(blfp); 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci do { 10762306a36Sopenharmony_ci nbits = xfs_contig_bits(blfp->blf_data_map, 10862306a36Sopenharmony_ci blfp->blf_map_size, first_bit); 10962306a36Sopenharmony_ci ASSERT(nbits > 0); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci /* 11262306a36Sopenharmony_ci * Straddling a page is rare because we don't log contiguous 11362306a36Sopenharmony_ci * chunks of unmapped buffers anywhere. 11462306a36Sopenharmony_ci */ 11562306a36Sopenharmony_ci if (nbits > 1 && 11662306a36Sopenharmony_ci xfs_buf_item_straddle(bp, offset, first_bit, nbits)) 11762306a36Sopenharmony_ci goto slow_scan; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci (*nvecs)++; 12062306a36Sopenharmony_ci *nbytes += nbits * XFS_BLF_CHUNK; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci /* 12362306a36Sopenharmony_ci * This takes the bit number to start looking from and 12462306a36Sopenharmony_ci * returns the next set bit from there. It returns -1 12562306a36Sopenharmony_ci * if there are no more bits set or the start bit is 12662306a36Sopenharmony_ci * beyond the end of the bitmap. 12762306a36Sopenharmony_ci */ 12862306a36Sopenharmony_ci first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 12962306a36Sopenharmony_ci (uint)first_bit + nbits + 1); 13062306a36Sopenharmony_ci } while (first_bit != -1); 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci return; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_cislow_scan: 13562306a36Sopenharmony_ci /* Count the first bit we jumped out of the above loop from */ 13662306a36Sopenharmony_ci (*nvecs)++; 13762306a36Sopenharmony_ci *nbytes += XFS_BLF_CHUNK; 13862306a36Sopenharmony_ci last_bit = first_bit; 13962306a36Sopenharmony_ci while (last_bit != -1) { 14062306a36Sopenharmony_ci /* 14162306a36Sopenharmony_ci * This takes the bit number to start looking from and 14262306a36Sopenharmony_ci * returns the next set bit from there. It returns -1 14362306a36Sopenharmony_ci * if there are no more bits set or the start bit is 14462306a36Sopenharmony_ci * beyond the end of the bitmap. 14562306a36Sopenharmony_ci */ 14662306a36Sopenharmony_ci next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 14762306a36Sopenharmony_ci last_bit + 1); 14862306a36Sopenharmony_ci /* 14962306a36Sopenharmony_ci * If we run out of bits, leave the loop, 15062306a36Sopenharmony_ci * else if we find a new set of bits bump the number of vecs, 15162306a36Sopenharmony_ci * else keep scanning the current set of bits. 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_ci if (next_bit == -1) { 15462306a36Sopenharmony_ci break; 15562306a36Sopenharmony_ci } else if (next_bit != last_bit + 1 || 15662306a36Sopenharmony_ci xfs_buf_item_straddle(bp, offset, first_bit, nbits)) { 15762306a36Sopenharmony_ci last_bit = next_bit; 15862306a36Sopenharmony_ci first_bit = next_bit; 15962306a36Sopenharmony_ci (*nvecs)++; 16062306a36Sopenharmony_ci nbits = 1; 16162306a36Sopenharmony_ci } else { 16262306a36Sopenharmony_ci last_bit++; 16362306a36Sopenharmony_ci nbits++; 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci *nbytes += XFS_BLF_CHUNK; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci/* 17062306a36Sopenharmony_ci * Return the number of log iovecs and space needed to log the given buf log 17162306a36Sopenharmony_ci * item. 17262306a36Sopenharmony_ci * 17362306a36Sopenharmony_ci * Discontiguous buffers need a format structure per region that is being 17462306a36Sopenharmony_ci * logged. This makes the changes in the buffer appear to log recovery as though 17562306a36Sopenharmony_ci * they came from separate buffers, just like would occur if multiple buffers 17662306a36Sopenharmony_ci * were used instead of a single discontiguous buffer. This enables 17762306a36Sopenharmony_ci * discontiguous buffers to be in-memory constructs, completely transparent to 17862306a36Sopenharmony_ci * what ends up on disk. 17962306a36Sopenharmony_ci * 18062306a36Sopenharmony_ci * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log 18162306a36Sopenharmony_ci * format structures. If the item has previously been logged and has dirty 18262306a36Sopenharmony_ci * regions, we do not relog them in stale buffers. This has the effect of 18362306a36Sopenharmony_ci * reducing the size of the relogged item by the amount of dirty data tracked 18462306a36Sopenharmony_ci * by the log item. This can result in the committing transaction reducing the 18562306a36Sopenharmony_ci * amount of space being consumed by the CIL. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_ciSTATIC void 18862306a36Sopenharmony_cixfs_buf_item_size( 18962306a36Sopenharmony_ci struct xfs_log_item *lip, 19062306a36Sopenharmony_ci int *nvecs, 19162306a36Sopenharmony_ci int *nbytes) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 19462306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 19562306a36Sopenharmony_ci int i; 19662306a36Sopenharmony_ci int bytes; 19762306a36Sopenharmony_ci uint offset = 0; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci ASSERT(atomic_read(&bip->bli_refcount) > 0); 20062306a36Sopenharmony_ci if (bip->bli_flags & XFS_BLI_STALE) { 20162306a36Sopenharmony_ci /* 20262306a36Sopenharmony_ci * The buffer is stale, so all we need to log is the buf log 20362306a36Sopenharmony_ci * format structure with the cancel flag in it as we are never 20462306a36Sopenharmony_ci * going to replay the changes tracked in the log item. 20562306a36Sopenharmony_ci */ 20662306a36Sopenharmony_ci trace_xfs_buf_item_size_stale(bip); 20762306a36Sopenharmony_ci ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 20862306a36Sopenharmony_ci *nvecs += bip->bli_format_count; 20962306a36Sopenharmony_ci for (i = 0; i < bip->bli_format_count; i++) { 21062306a36Sopenharmony_ci *nbytes += xfs_buf_log_format_size(&bip->bli_formats[i]); 21162306a36Sopenharmony_ci } 21262306a36Sopenharmony_ci return; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci ASSERT(bip->bli_flags & XFS_BLI_LOGGED); 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci if (bip->bli_flags & XFS_BLI_ORDERED) { 21862306a36Sopenharmony_ci /* 21962306a36Sopenharmony_ci * The buffer has been logged just to order it. It is not being 22062306a36Sopenharmony_ci * included in the transaction commit, so no vectors are used at 22162306a36Sopenharmony_ci * all. 22262306a36Sopenharmony_ci */ 22362306a36Sopenharmony_ci trace_xfs_buf_item_size_ordered(bip); 22462306a36Sopenharmony_ci *nvecs = XFS_LOG_VEC_ORDERED; 22562306a36Sopenharmony_ci return; 22662306a36Sopenharmony_ci } 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci /* 22962306a36Sopenharmony_ci * The vector count is based on the number of buffer vectors we have 23062306a36Sopenharmony_ci * dirty bits in. This will only be greater than one when we have a 23162306a36Sopenharmony_ci * compound buffer with more than one segment dirty. Hence for compound 23262306a36Sopenharmony_ci * buffers we need to track which segment the dirty bits correspond to, 23362306a36Sopenharmony_ci * and when we move from one segment to the next increment the vector 23462306a36Sopenharmony_ci * count for the extra buf log format structure that will need to be 23562306a36Sopenharmony_ci * written. 23662306a36Sopenharmony_ci */ 23762306a36Sopenharmony_ci bytes = 0; 23862306a36Sopenharmony_ci for (i = 0; i < bip->bli_format_count; i++) { 23962306a36Sopenharmony_ci xfs_buf_item_size_segment(bip, &bip->bli_formats[i], offset, 24062306a36Sopenharmony_ci nvecs, &bytes); 24162306a36Sopenharmony_ci offset += BBTOB(bp->b_maps[i].bm_len); 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci /* 24562306a36Sopenharmony_ci * Round up the buffer size required to minimise the number of memory 24662306a36Sopenharmony_ci * allocations that need to be done as this item grows when relogged by 24762306a36Sopenharmony_ci * repeated modifications. 24862306a36Sopenharmony_ci */ 24962306a36Sopenharmony_ci *nbytes = round_up(bytes, 512); 25062306a36Sopenharmony_ci trace_xfs_buf_item_size(bip); 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cistatic inline void 25462306a36Sopenharmony_cixfs_buf_item_copy_iovec( 25562306a36Sopenharmony_ci struct xfs_log_vec *lv, 25662306a36Sopenharmony_ci struct xfs_log_iovec **vecp, 25762306a36Sopenharmony_ci struct xfs_buf *bp, 25862306a36Sopenharmony_ci uint offset, 25962306a36Sopenharmony_ci int first_bit, 26062306a36Sopenharmony_ci uint nbits) 26162306a36Sopenharmony_ci{ 26262306a36Sopenharmony_ci offset += first_bit * XFS_BLF_CHUNK; 26362306a36Sopenharmony_ci xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, 26462306a36Sopenharmony_ci xfs_buf_offset(bp, offset), 26562306a36Sopenharmony_ci nbits * XFS_BLF_CHUNK); 26662306a36Sopenharmony_ci} 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_cistatic void 26962306a36Sopenharmony_cixfs_buf_item_format_segment( 27062306a36Sopenharmony_ci struct xfs_buf_log_item *bip, 27162306a36Sopenharmony_ci struct xfs_log_vec *lv, 27262306a36Sopenharmony_ci struct xfs_log_iovec **vecp, 27362306a36Sopenharmony_ci uint offset, 27462306a36Sopenharmony_ci struct xfs_buf_log_format *blfp) 27562306a36Sopenharmony_ci{ 27662306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 27762306a36Sopenharmony_ci uint base_size; 27862306a36Sopenharmony_ci int first_bit; 27962306a36Sopenharmony_ci int last_bit; 28062306a36Sopenharmony_ci int next_bit; 28162306a36Sopenharmony_ci uint nbits; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci /* copy the flags across from the base format item */ 28462306a36Sopenharmony_ci blfp->blf_flags = bip->__bli_format.blf_flags; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci /* 28762306a36Sopenharmony_ci * Base size is the actual size of the ondisk structure - it reflects 28862306a36Sopenharmony_ci * the actual size of the dirty bitmap rather than the size of the in 28962306a36Sopenharmony_ci * memory structure. 29062306a36Sopenharmony_ci */ 29162306a36Sopenharmony_ci base_size = xfs_buf_log_format_size(blfp); 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0); 29462306a36Sopenharmony_ci if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) { 29562306a36Sopenharmony_ci /* 29662306a36Sopenharmony_ci * If the map is not be dirty in the transaction, mark 29762306a36Sopenharmony_ci * the size as zero and do not advance the vector pointer. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci return; 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); 30362306a36Sopenharmony_ci blfp->blf_size = 1; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (bip->bli_flags & XFS_BLI_STALE) { 30662306a36Sopenharmony_ci /* 30762306a36Sopenharmony_ci * The buffer is stale, so all we need to log 30862306a36Sopenharmony_ci * is the buf log format structure with the 30962306a36Sopenharmony_ci * cancel flag in it. 31062306a36Sopenharmony_ci */ 31162306a36Sopenharmony_ci trace_xfs_buf_item_format_stale(bip); 31262306a36Sopenharmony_ci ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); 31362306a36Sopenharmony_ci return; 31462306a36Sopenharmony_ci } 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci /* 31862306a36Sopenharmony_ci * Fill in an iovec for each set of contiguous chunks. 31962306a36Sopenharmony_ci */ 32062306a36Sopenharmony_ci do { 32162306a36Sopenharmony_ci ASSERT(first_bit >= 0); 32262306a36Sopenharmony_ci nbits = xfs_contig_bits(blfp->blf_data_map, 32362306a36Sopenharmony_ci blfp->blf_map_size, first_bit); 32462306a36Sopenharmony_ci ASSERT(nbits > 0); 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci /* 32762306a36Sopenharmony_ci * Straddling a page is rare because we don't log contiguous 32862306a36Sopenharmony_ci * chunks of unmapped buffers anywhere. 32962306a36Sopenharmony_ci */ 33062306a36Sopenharmony_ci if (nbits > 1 && 33162306a36Sopenharmony_ci xfs_buf_item_straddle(bp, offset, first_bit, nbits)) 33262306a36Sopenharmony_ci goto slow_scan; 33362306a36Sopenharmony_ci 33462306a36Sopenharmony_ci xfs_buf_item_copy_iovec(lv, vecp, bp, offset, 33562306a36Sopenharmony_ci first_bit, nbits); 33662306a36Sopenharmony_ci blfp->blf_size++; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci /* 33962306a36Sopenharmony_ci * This takes the bit number to start looking from and 34062306a36Sopenharmony_ci * returns the next set bit from there. It returns -1 34162306a36Sopenharmony_ci * if there are no more bits set or the start bit is 34262306a36Sopenharmony_ci * beyond the end of the bitmap. 34362306a36Sopenharmony_ci */ 34462306a36Sopenharmony_ci first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 34562306a36Sopenharmony_ci (uint)first_bit + nbits + 1); 34662306a36Sopenharmony_ci } while (first_bit != -1); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci return; 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_cislow_scan: 35162306a36Sopenharmony_ci ASSERT(bp->b_addr == NULL); 35262306a36Sopenharmony_ci last_bit = first_bit; 35362306a36Sopenharmony_ci nbits = 1; 35462306a36Sopenharmony_ci for (;;) { 35562306a36Sopenharmony_ci /* 35662306a36Sopenharmony_ci * This takes the bit number to start looking from and 35762306a36Sopenharmony_ci * returns the next set bit from there. It returns -1 35862306a36Sopenharmony_ci * if there are no more bits set or the start bit is 35962306a36Sopenharmony_ci * beyond the end of the bitmap. 36062306a36Sopenharmony_ci */ 36162306a36Sopenharmony_ci next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 36262306a36Sopenharmony_ci (uint)last_bit + 1); 36362306a36Sopenharmony_ci /* 36462306a36Sopenharmony_ci * If we run out of bits fill in the last iovec and get out of 36562306a36Sopenharmony_ci * the loop. Else if we start a new set of bits then fill in 36662306a36Sopenharmony_ci * the iovec for the series we were looking at and start 36762306a36Sopenharmony_ci * counting the bits in the new one. Else we're still in the 36862306a36Sopenharmony_ci * same set of bits so just keep counting and scanning. 36962306a36Sopenharmony_ci */ 37062306a36Sopenharmony_ci if (next_bit == -1) { 37162306a36Sopenharmony_ci xfs_buf_item_copy_iovec(lv, vecp, bp, offset, 37262306a36Sopenharmony_ci first_bit, nbits); 37362306a36Sopenharmony_ci blfp->blf_size++; 37462306a36Sopenharmony_ci break; 37562306a36Sopenharmony_ci } else if (next_bit != last_bit + 1 || 37662306a36Sopenharmony_ci xfs_buf_item_straddle(bp, offset, first_bit, nbits)) { 37762306a36Sopenharmony_ci xfs_buf_item_copy_iovec(lv, vecp, bp, offset, 37862306a36Sopenharmony_ci first_bit, nbits); 37962306a36Sopenharmony_ci blfp->blf_size++; 38062306a36Sopenharmony_ci first_bit = next_bit; 38162306a36Sopenharmony_ci last_bit = next_bit; 38262306a36Sopenharmony_ci nbits = 1; 38362306a36Sopenharmony_ci } else { 38462306a36Sopenharmony_ci last_bit++; 38562306a36Sopenharmony_ci nbits++; 38662306a36Sopenharmony_ci } 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci} 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci/* 39162306a36Sopenharmony_ci * This is called to fill in the vector of log iovecs for the 39262306a36Sopenharmony_ci * given log buf item. It fills the first entry with a buf log 39362306a36Sopenharmony_ci * format structure, and the rest point to contiguous chunks 39462306a36Sopenharmony_ci * within the buffer. 39562306a36Sopenharmony_ci */ 39662306a36Sopenharmony_ciSTATIC void 39762306a36Sopenharmony_cixfs_buf_item_format( 39862306a36Sopenharmony_ci struct xfs_log_item *lip, 39962306a36Sopenharmony_ci struct xfs_log_vec *lv) 40062306a36Sopenharmony_ci{ 40162306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 40262306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 40362306a36Sopenharmony_ci struct xfs_log_iovec *vecp = NULL; 40462306a36Sopenharmony_ci uint offset = 0; 40562306a36Sopenharmony_ci int i; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci ASSERT(atomic_read(&bip->bli_refcount) > 0); 40862306a36Sopenharmony_ci ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 40962306a36Sopenharmony_ci (bip->bli_flags & XFS_BLI_STALE)); 41062306a36Sopenharmony_ci ASSERT((bip->bli_flags & XFS_BLI_STALE) || 41162306a36Sopenharmony_ci (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF 41262306a36Sopenharmony_ci && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF)); 41362306a36Sopenharmony_ci ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED) || 41462306a36Sopenharmony_ci (bip->bli_flags & XFS_BLI_STALE)); 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci /* 41862306a36Sopenharmony_ci * If it is an inode buffer, transfer the in-memory state to the 41962306a36Sopenharmony_ci * format flags and clear the in-memory state. 42062306a36Sopenharmony_ci * 42162306a36Sopenharmony_ci * For buffer based inode allocation, we do not transfer 42262306a36Sopenharmony_ci * this state if the inode buffer allocation has not yet been committed 42362306a36Sopenharmony_ci * to the log as setting the XFS_BLI_INODE_BUF flag will prevent 42462306a36Sopenharmony_ci * correct replay of the inode allocation. 42562306a36Sopenharmony_ci * 42662306a36Sopenharmony_ci * For icreate item based inode allocation, the buffers aren't written 42762306a36Sopenharmony_ci * to the journal during allocation, and hence we should always tag the 42862306a36Sopenharmony_ci * buffer as an inode buffer so that the correct unlinked list replay 42962306a36Sopenharmony_ci * occurs during recovery. 43062306a36Sopenharmony_ci */ 43162306a36Sopenharmony_ci if (bip->bli_flags & XFS_BLI_INODE_BUF) { 43262306a36Sopenharmony_ci if (xfs_has_v3inodes(lip->li_log->l_mp) || 43362306a36Sopenharmony_ci !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && 43462306a36Sopenharmony_ci xfs_log_item_in_current_chkpt(lip))) 43562306a36Sopenharmony_ci bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF; 43662306a36Sopenharmony_ci bip->bli_flags &= ~XFS_BLI_INODE_BUF; 43762306a36Sopenharmony_ci } 43862306a36Sopenharmony_ci 43962306a36Sopenharmony_ci for (i = 0; i < bip->bli_format_count; i++) { 44062306a36Sopenharmony_ci xfs_buf_item_format_segment(bip, lv, &vecp, offset, 44162306a36Sopenharmony_ci &bip->bli_formats[i]); 44262306a36Sopenharmony_ci offset += BBTOB(bp->b_maps[i].bm_len); 44362306a36Sopenharmony_ci } 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci /* 44662306a36Sopenharmony_ci * Check to make sure everything is consistent. 44762306a36Sopenharmony_ci */ 44862306a36Sopenharmony_ci trace_xfs_buf_item_format(bip); 44962306a36Sopenharmony_ci} 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ci/* 45262306a36Sopenharmony_ci * This is called to pin the buffer associated with the buf log item in memory 45362306a36Sopenharmony_ci * so it cannot be written out. 45462306a36Sopenharmony_ci * 45562306a36Sopenharmony_ci * We take a reference to the buffer log item here so that the BLI life cycle 45662306a36Sopenharmony_ci * extends at least until the buffer is unpinned via xfs_buf_item_unpin() and 45762306a36Sopenharmony_ci * inserted into the AIL. 45862306a36Sopenharmony_ci * 45962306a36Sopenharmony_ci * We also need to take a reference to the buffer itself as the BLI unpin 46062306a36Sopenharmony_ci * processing requires accessing the buffer after the BLI has dropped the final 46162306a36Sopenharmony_ci * BLI reference. See xfs_buf_item_unpin() for an explanation. 46262306a36Sopenharmony_ci * If unpins race to drop the final BLI reference and only the 46362306a36Sopenharmony_ci * BLI owns a reference to the buffer, then the loser of the race can have the 46462306a36Sopenharmony_ci * buffer fgreed from under it (e.g. on shutdown). Taking a buffer reference per 46562306a36Sopenharmony_ci * pin count ensures the life cycle of the buffer extends for as 46662306a36Sopenharmony_ci * long as we hold the buffer pin reference in xfs_buf_item_unpin(). 46762306a36Sopenharmony_ci */ 46862306a36Sopenharmony_ciSTATIC void 46962306a36Sopenharmony_cixfs_buf_item_pin( 47062306a36Sopenharmony_ci struct xfs_log_item *lip) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci ASSERT(atomic_read(&bip->bli_refcount) > 0); 47562306a36Sopenharmony_ci ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || 47662306a36Sopenharmony_ci (bip->bli_flags & XFS_BLI_ORDERED) || 47762306a36Sopenharmony_ci (bip->bli_flags & XFS_BLI_STALE)); 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ci trace_xfs_buf_item_pin(bip); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci xfs_buf_hold(bip->bli_buf); 48262306a36Sopenharmony_ci atomic_inc(&bip->bli_refcount); 48362306a36Sopenharmony_ci atomic_inc(&bip->bli_buf->b_pin_count); 48462306a36Sopenharmony_ci} 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci/* 48762306a36Sopenharmony_ci * This is called to unpin the buffer associated with the buf log item which was 48862306a36Sopenharmony_ci * previously pinned with a call to xfs_buf_item_pin(). We enter this function 48962306a36Sopenharmony_ci * with a buffer pin count, a buffer reference and a BLI reference. 49062306a36Sopenharmony_ci * 49162306a36Sopenharmony_ci * We must drop the BLI reference before we unpin the buffer because the AIL 49262306a36Sopenharmony_ci * doesn't acquire a BLI reference whenever it accesses it. Therefore if the 49362306a36Sopenharmony_ci * refcount drops to zero, the bli could still be AIL resident and the buffer 49462306a36Sopenharmony_ci * submitted for I/O at any point before we return. This can result in IO 49562306a36Sopenharmony_ci * completion freeing the buffer while we are still trying to access it here. 49662306a36Sopenharmony_ci * This race condition can also occur in shutdown situations where we abort and 49762306a36Sopenharmony_ci * unpin buffers from contexts other that journal IO completion. 49862306a36Sopenharmony_ci * 49962306a36Sopenharmony_ci * Hence we have to hold a buffer reference per pin count to ensure that the 50062306a36Sopenharmony_ci * buffer cannot be freed until we have finished processing the unpin operation. 50162306a36Sopenharmony_ci * The reference is taken in xfs_buf_item_pin(), and we must hold it until we 50262306a36Sopenharmony_ci * are done processing the buffer state. In the case of an abort (remove = 50362306a36Sopenharmony_ci * true) then we re-use the current pin reference as the IO reference we hand 50462306a36Sopenharmony_ci * off to IO failure handling. 50562306a36Sopenharmony_ci */ 50662306a36Sopenharmony_ciSTATIC void 50762306a36Sopenharmony_cixfs_buf_item_unpin( 50862306a36Sopenharmony_ci struct xfs_log_item *lip, 50962306a36Sopenharmony_ci int remove) 51062306a36Sopenharmony_ci{ 51162306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 51262306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 51362306a36Sopenharmony_ci int stale = bip->bli_flags & XFS_BLI_STALE; 51462306a36Sopenharmony_ci int freed; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci ASSERT(bp->b_log_item == bip); 51762306a36Sopenharmony_ci ASSERT(atomic_read(&bip->bli_refcount) > 0); 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci trace_xfs_buf_item_unpin(bip); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci freed = atomic_dec_and_test(&bip->bli_refcount); 52262306a36Sopenharmony_ci if (atomic_dec_and_test(&bp->b_pin_count)) 52362306a36Sopenharmony_ci wake_up_all(&bp->b_waiters); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci /* 52662306a36Sopenharmony_ci * Nothing to do but drop the buffer pin reference if the BLI is 52762306a36Sopenharmony_ci * still active. 52862306a36Sopenharmony_ci */ 52962306a36Sopenharmony_ci if (!freed) { 53062306a36Sopenharmony_ci xfs_buf_rele(bp); 53162306a36Sopenharmony_ci return; 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci if (stale) { 53562306a36Sopenharmony_ci ASSERT(bip->bli_flags & XFS_BLI_STALE); 53662306a36Sopenharmony_ci ASSERT(xfs_buf_islocked(bp)); 53762306a36Sopenharmony_ci ASSERT(bp->b_flags & XBF_STALE); 53862306a36Sopenharmony_ci ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL); 53962306a36Sopenharmony_ci ASSERT(list_empty(&lip->li_trans)); 54062306a36Sopenharmony_ci ASSERT(!bp->b_transp); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci trace_xfs_buf_item_unpin_stale(bip); 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci /* 54562306a36Sopenharmony_ci * The buffer has been locked and referenced since it was marked 54662306a36Sopenharmony_ci * stale so we own both lock and reference exclusively here. We 54762306a36Sopenharmony_ci * do not need the pin reference any more, so drop it now so 54862306a36Sopenharmony_ci * that we only have one reference to drop once item completion 54962306a36Sopenharmony_ci * processing is complete. 55062306a36Sopenharmony_ci */ 55162306a36Sopenharmony_ci xfs_buf_rele(bp); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci /* 55462306a36Sopenharmony_ci * If we get called here because of an IO error, we may or may 55562306a36Sopenharmony_ci * not have the item on the AIL. xfs_trans_ail_delete() will 55662306a36Sopenharmony_ci * take care of that situation. xfs_trans_ail_delete() drops 55762306a36Sopenharmony_ci * the AIL lock. 55862306a36Sopenharmony_ci */ 55962306a36Sopenharmony_ci if (bip->bli_flags & XFS_BLI_STALE_INODE) { 56062306a36Sopenharmony_ci xfs_buf_item_done(bp); 56162306a36Sopenharmony_ci xfs_buf_inode_iodone(bp); 56262306a36Sopenharmony_ci ASSERT(list_empty(&bp->b_li_list)); 56362306a36Sopenharmony_ci } else { 56462306a36Sopenharmony_ci xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); 56562306a36Sopenharmony_ci xfs_buf_item_relse(bp); 56662306a36Sopenharmony_ci ASSERT(bp->b_log_item == NULL); 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_ci xfs_buf_relse(bp); 56962306a36Sopenharmony_ci return; 57062306a36Sopenharmony_ci } 57162306a36Sopenharmony_ci 57262306a36Sopenharmony_ci if (remove) { 57362306a36Sopenharmony_ci /* 57462306a36Sopenharmony_ci * We need to simulate an async IO failures here to ensure that 57562306a36Sopenharmony_ci * the correct error completion is run on this buffer. This 57662306a36Sopenharmony_ci * requires a reference to the buffer and for the buffer to be 57762306a36Sopenharmony_ci * locked. We can safely pass ownership of the pin reference to 57862306a36Sopenharmony_ci * the IO to ensure that nothing can free the buffer while we 57962306a36Sopenharmony_ci * wait for the lock and then run the IO failure completion. 58062306a36Sopenharmony_ci */ 58162306a36Sopenharmony_ci xfs_buf_lock(bp); 58262306a36Sopenharmony_ci bp->b_flags |= XBF_ASYNC; 58362306a36Sopenharmony_ci xfs_buf_ioend_fail(bp); 58462306a36Sopenharmony_ci return; 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci /* 58862306a36Sopenharmony_ci * BLI has no more active references - it will be moved to the AIL to 58962306a36Sopenharmony_ci * manage the remaining BLI/buffer life cycle. There is nothing left for 59062306a36Sopenharmony_ci * us to do here so drop the pin reference to the buffer. 59162306a36Sopenharmony_ci */ 59262306a36Sopenharmony_ci xfs_buf_rele(bp); 59362306a36Sopenharmony_ci} 59462306a36Sopenharmony_ci 59562306a36Sopenharmony_ciSTATIC uint 59662306a36Sopenharmony_cixfs_buf_item_push( 59762306a36Sopenharmony_ci struct xfs_log_item *lip, 59862306a36Sopenharmony_ci struct list_head *buffer_list) 59962306a36Sopenharmony_ci{ 60062306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 60162306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 60262306a36Sopenharmony_ci uint rval = XFS_ITEM_SUCCESS; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci if (xfs_buf_ispinned(bp)) 60562306a36Sopenharmony_ci return XFS_ITEM_PINNED; 60662306a36Sopenharmony_ci if (!xfs_buf_trylock(bp)) { 60762306a36Sopenharmony_ci /* 60862306a36Sopenharmony_ci * If we have just raced with a buffer being pinned and it has 60962306a36Sopenharmony_ci * been marked stale, we could end up stalling until someone else 61062306a36Sopenharmony_ci * issues a log force to unpin the stale buffer. Check for the 61162306a36Sopenharmony_ci * race condition here so xfsaild recognizes the buffer is pinned 61262306a36Sopenharmony_ci * and queues a log force to move it along. 61362306a36Sopenharmony_ci */ 61462306a36Sopenharmony_ci if (xfs_buf_ispinned(bp)) 61562306a36Sopenharmony_ci return XFS_ITEM_PINNED; 61662306a36Sopenharmony_ci return XFS_ITEM_LOCKED; 61762306a36Sopenharmony_ci } 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci trace_xfs_buf_item_push(bip); 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci /* has a previous flush failed due to IO errors? */ 62462306a36Sopenharmony_ci if (bp->b_flags & XBF_WRITE_FAIL) { 62562306a36Sopenharmony_ci xfs_buf_alert_ratelimited(bp, "XFS: Failing async write", 62662306a36Sopenharmony_ci "Failing async write on buffer block 0x%llx. Retrying async write.", 62762306a36Sopenharmony_ci (long long)xfs_buf_daddr(bp)); 62862306a36Sopenharmony_ci } 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci if (!xfs_buf_delwri_queue(bp, buffer_list)) 63162306a36Sopenharmony_ci rval = XFS_ITEM_FLUSHING; 63262306a36Sopenharmony_ci xfs_buf_unlock(bp); 63362306a36Sopenharmony_ci return rval; 63462306a36Sopenharmony_ci} 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci/* 63762306a36Sopenharmony_ci * Drop the buffer log item refcount and take appropriate action. This helper 63862306a36Sopenharmony_ci * determines whether the bli must be freed or not, since a decrement to zero 63962306a36Sopenharmony_ci * does not necessarily mean the bli is unused. 64062306a36Sopenharmony_ci * 64162306a36Sopenharmony_ci * Return true if the bli is freed, false otherwise. 64262306a36Sopenharmony_ci */ 64362306a36Sopenharmony_cibool 64462306a36Sopenharmony_cixfs_buf_item_put( 64562306a36Sopenharmony_ci struct xfs_buf_log_item *bip) 64662306a36Sopenharmony_ci{ 64762306a36Sopenharmony_ci struct xfs_log_item *lip = &bip->bli_item; 64862306a36Sopenharmony_ci bool aborted; 64962306a36Sopenharmony_ci bool dirty; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci /* drop the bli ref and return if it wasn't the last one */ 65262306a36Sopenharmony_ci if (!atomic_dec_and_test(&bip->bli_refcount)) 65362306a36Sopenharmony_ci return false; 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci /* 65662306a36Sopenharmony_ci * We dropped the last ref and must free the item if clean or aborted. 65762306a36Sopenharmony_ci * If the bli is dirty and non-aborted, the buffer was clean in the 65862306a36Sopenharmony_ci * transaction but still awaiting writeback from previous changes. In 65962306a36Sopenharmony_ci * that case, the bli is freed on buffer writeback completion. 66062306a36Sopenharmony_ci */ 66162306a36Sopenharmony_ci aborted = test_bit(XFS_LI_ABORTED, &lip->li_flags) || 66262306a36Sopenharmony_ci xlog_is_shutdown(lip->li_log); 66362306a36Sopenharmony_ci dirty = bip->bli_flags & XFS_BLI_DIRTY; 66462306a36Sopenharmony_ci if (dirty && !aborted) 66562306a36Sopenharmony_ci return false; 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci /* 66862306a36Sopenharmony_ci * The bli is aborted or clean. An aborted item may be in the AIL 66962306a36Sopenharmony_ci * regardless of dirty state. For example, consider an aborted 67062306a36Sopenharmony_ci * transaction that invalidated a dirty bli and cleared the dirty 67162306a36Sopenharmony_ci * state. 67262306a36Sopenharmony_ci */ 67362306a36Sopenharmony_ci if (aborted) 67462306a36Sopenharmony_ci xfs_trans_ail_delete(lip, 0); 67562306a36Sopenharmony_ci xfs_buf_item_relse(bip->bli_buf); 67662306a36Sopenharmony_ci return true; 67762306a36Sopenharmony_ci} 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci/* 68062306a36Sopenharmony_ci * Release the buffer associated with the buf log item. If there is no dirty 68162306a36Sopenharmony_ci * logged data associated with the buffer recorded in the buf log item, then 68262306a36Sopenharmony_ci * free the buf log item and remove the reference to it in the buffer. 68362306a36Sopenharmony_ci * 68462306a36Sopenharmony_ci * This call ignores the recursion count. It is only called when the buffer 68562306a36Sopenharmony_ci * should REALLY be unlocked, regardless of the recursion count. 68662306a36Sopenharmony_ci * 68762306a36Sopenharmony_ci * We unconditionally drop the transaction's reference to the log item. If the 68862306a36Sopenharmony_ci * item was logged, then another reference was taken when it was pinned, so we 68962306a36Sopenharmony_ci * can safely drop the transaction reference now. This also allows us to avoid 69062306a36Sopenharmony_ci * potential races with the unpin code freeing the bli by not referencing the 69162306a36Sopenharmony_ci * bli after we've dropped the reference count. 69262306a36Sopenharmony_ci * 69362306a36Sopenharmony_ci * If the XFS_BLI_HOLD flag is set in the buf log item, then free the log item 69462306a36Sopenharmony_ci * if necessary but do not unlock the buffer. This is for support of 69562306a36Sopenharmony_ci * xfs_trans_bhold(). Make sure the XFS_BLI_HOLD field is cleared if we don't 69662306a36Sopenharmony_ci * free the item. 69762306a36Sopenharmony_ci */ 69862306a36Sopenharmony_ciSTATIC void 69962306a36Sopenharmony_cixfs_buf_item_release( 70062306a36Sopenharmony_ci struct xfs_log_item *lip) 70162306a36Sopenharmony_ci{ 70262306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 70362306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 70462306a36Sopenharmony_ci bool released; 70562306a36Sopenharmony_ci bool hold = bip->bli_flags & XFS_BLI_HOLD; 70662306a36Sopenharmony_ci bool stale = bip->bli_flags & XFS_BLI_STALE; 70762306a36Sopenharmony_ci#if defined(DEBUG) || defined(XFS_WARN) 70862306a36Sopenharmony_ci bool ordered = bip->bli_flags & XFS_BLI_ORDERED; 70962306a36Sopenharmony_ci bool dirty = bip->bli_flags & XFS_BLI_DIRTY; 71062306a36Sopenharmony_ci bool aborted = test_bit(XFS_LI_ABORTED, 71162306a36Sopenharmony_ci &lip->li_flags); 71262306a36Sopenharmony_ci#endif 71362306a36Sopenharmony_ci 71462306a36Sopenharmony_ci trace_xfs_buf_item_release(bip); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci /* 71762306a36Sopenharmony_ci * The bli dirty state should match whether the blf has logged segments 71862306a36Sopenharmony_ci * except for ordered buffers, where only the bli should be dirty. 71962306a36Sopenharmony_ci */ 72062306a36Sopenharmony_ci ASSERT((!ordered && dirty == xfs_buf_item_dirty_format(bip)) || 72162306a36Sopenharmony_ci (ordered && dirty && !xfs_buf_item_dirty_format(bip))); 72262306a36Sopenharmony_ci ASSERT(!stale || (bip->__bli_format.blf_flags & XFS_BLF_CANCEL)); 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci /* 72562306a36Sopenharmony_ci * Clear the buffer's association with this transaction and 72662306a36Sopenharmony_ci * per-transaction state from the bli, which has been copied above. 72762306a36Sopenharmony_ci */ 72862306a36Sopenharmony_ci bp->b_transp = NULL; 72962306a36Sopenharmony_ci bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci /* 73262306a36Sopenharmony_ci * Unref the item and unlock the buffer unless held or stale. Stale 73362306a36Sopenharmony_ci * buffers remain locked until final unpin unless the bli is freed by 73462306a36Sopenharmony_ci * the unref call. The latter implies shutdown because buffer 73562306a36Sopenharmony_ci * invalidation dirties the bli and transaction. 73662306a36Sopenharmony_ci */ 73762306a36Sopenharmony_ci released = xfs_buf_item_put(bip); 73862306a36Sopenharmony_ci if (hold || (stale && !released)) 73962306a36Sopenharmony_ci return; 74062306a36Sopenharmony_ci ASSERT(!stale || aborted); 74162306a36Sopenharmony_ci xfs_buf_relse(bp); 74262306a36Sopenharmony_ci} 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ciSTATIC void 74562306a36Sopenharmony_cixfs_buf_item_committing( 74662306a36Sopenharmony_ci struct xfs_log_item *lip, 74762306a36Sopenharmony_ci xfs_csn_t seq) 74862306a36Sopenharmony_ci{ 74962306a36Sopenharmony_ci return xfs_buf_item_release(lip); 75062306a36Sopenharmony_ci} 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci/* 75362306a36Sopenharmony_ci * This is called to find out where the oldest active copy of the 75462306a36Sopenharmony_ci * buf log item in the on disk log resides now that the last log 75562306a36Sopenharmony_ci * write of it completed at the given lsn. 75662306a36Sopenharmony_ci * We always re-log all the dirty data in a buffer, so usually the 75762306a36Sopenharmony_ci * latest copy in the on disk log is the only one that matters. For 75862306a36Sopenharmony_ci * those cases we simply return the given lsn. 75962306a36Sopenharmony_ci * 76062306a36Sopenharmony_ci * The one exception to this is for buffers full of newly allocated 76162306a36Sopenharmony_ci * inodes. These buffers are only relogged with the XFS_BLI_INODE_BUF 76262306a36Sopenharmony_ci * flag set, indicating that only the di_next_unlinked fields from the 76362306a36Sopenharmony_ci * inodes in the buffers will be replayed during recovery. If the 76462306a36Sopenharmony_ci * original newly allocated inode images have not yet been flushed 76562306a36Sopenharmony_ci * when the buffer is so relogged, then we need to make sure that we 76662306a36Sopenharmony_ci * keep the old images in the 'active' portion of the log. We do this 76762306a36Sopenharmony_ci * by returning the original lsn of that transaction here rather than 76862306a36Sopenharmony_ci * the current one. 76962306a36Sopenharmony_ci */ 77062306a36Sopenharmony_ciSTATIC xfs_lsn_t 77162306a36Sopenharmony_cixfs_buf_item_committed( 77262306a36Sopenharmony_ci struct xfs_log_item *lip, 77362306a36Sopenharmony_ci xfs_lsn_t lsn) 77462306a36Sopenharmony_ci{ 77562306a36Sopenharmony_ci struct xfs_buf_log_item *bip = BUF_ITEM(lip); 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci trace_xfs_buf_item_committed(bip); 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci if ((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) && lip->li_lsn != 0) 78062306a36Sopenharmony_ci return lip->li_lsn; 78162306a36Sopenharmony_ci return lsn; 78262306a36Sopenharmony_ci} 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_cistatic const struct xfs_item_ops xfs_buf_item_ops = { 78562306a36Sopenharmony_ci .iop_size = xfs_buf_item_size, 78662306a36Sopenharmony_ci .iop_format = xfs_buf_item_format, 78762306a36Sopenharmony_ci .iop_pin = xfs_buf_item_pin, 78862306a36Sopenharmony_ci .iop_unpin = xfs_buf_item_unpin, 78962306a36Sopenharmony_ci .iop_release = xfs_buf_item_release, 79062306a36Sopenharmony_ci .iop_committing = xfs_buf_item_committing, 79162306a36Sopenharmony_ci .iop_committed = xfs_buf_item_committed, 79262306a36Sopenharmony_ci .iop_push = xfs_buf_item_push, 79362306a36Sopenharmony_ci}; 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ciSTATIC void 79662306a36Sopenharmony_cixfs_buf_item_get_format( 79762306a36Sopenharmony_ci struct xfs_buf_log_item *bip, 79862306a36Sopenharmony_ci int count) 79962306a36Sopenharmony_ci{ 80062306a36Sopenharmony_ci ASSERT(bip->bli_formats == NULL); 80162306a36Sopenharmony_ci bip->bli_format_count = count; 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci if (count == 1) { 80462306a36Sopenharmony_ci bip->bli_formats = &bip->__bli_format; 80562306a36Sopenharmony_ci return; 80662306a36Sopenharmony_ci } 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format), 80962306a36Sopenharmony_ci 0); 81062306a36Sopenharmony_ci} 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ciSTATIC void 81362306a36Sopenharmony_cixfs_buf_item_free_format( 81462306a36Sopenharmony_ci struct xfs_buf_log_item *bip) 81562306a36Sopenharmony_ci{ 81662306a36Sopenharmony_ci if (bip->bli_formats != &bip->__bli_format) { 81762306a36Sopenharmony_ci kmem_free(bip->bli_formats); 81862306a36Sopenharmony_ci bip->bli_formats = NULL; 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci} 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci/* 82362306a36Sopenharmony_ci * Allocate a new buf log item to go with the given buffer. 82462306a36Sopenharmony_ci * Set the buffer's b_log_item field to point to the new 82562306a36Sopenharmony_ci * buf log item. 82662306a36Sopenharmony_ci */ 82762306a36Sopenharmony_ciint 82862306a36Sopenharmony_cixfs_buf_item_init( 82962306a36Sopenharmony_ci struct xfs_buf *bp, 83062306a36Sopenharmony_ci struct xfs_mount *mp) 83162306a36Sopenharmony_ci{ 83262306a36Sopenharmony_ci struct xfs_buf_log_item *bip = bp->b_log_item; 83362306a36Sopenharmony_ci int chunks; 83462306a36Sopenharmony_ci int map_size; 83562306a36Sopenharmony_ci int i; 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci /* 83862306a36Sopenharmony_ci * Check to see if there is already a buf log item for 83962306a36Sopenharmony_ci * this buffer. If we do already have one, there is 84062306a36Sopenharmony_ci * nothing to do here so return. 84162306a36Sopenharmony_ci */ 84262306a36Sopenharmony_ci ASSERT(bp->b_mount == mp); 84362306a36Sopenharmony_ci if (bip) { 84462306a36Sopenharmony_ci ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 84562306a36Sopenharmony_ci ASSERT(!bp->b_transp); 84662306a36Sopenharmony_ci ASSERT(bip->bli_buf == bp); 84762306a36Sopenharmony_ci return 0; 84862306a36Sopenharmony_ci } 84962306a36Sopenharmony_ci 85062306a36Sopenharmony_ci bip = kmem_cache_zalloc(xfs_buf_item_cache, GFP_KERNEL | __GFP_NOFAIL); 85162306a36Sopenharmony_ci xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); 85262306a36Sopenharmony_ci bip->bli_buf = bp; 85362306a36Sopenharmony_ci 85462306a36Sopenharmony_ci /* 85562306a36Sopenharmony_ci * chunks is the number of XFS_BLF_CHUNK size pieces the buffer 85662306a36Sopenharmony_ci * can be divided into. Make sure not to truncate any pieces. 85762306a36Sopenharmony_ci * map_size is the size of the bitmap needed to describe the 85862306a36Sopenharmony_ci * chunks of the buffer. 85962306a36Sopenharmony_ci * 86062306a36Sopenharmony_ci * Discontiguous buffer support follows the layout of the underlying 86162306a36Sopenharmony_ci * buffer. This makes the implementation as simple as possible. 86262306a36Sopenharmony_ci */ 86362306a36Sopenharmony_ci xfs_buf_item_get_format(bip, bp->b_map_count); 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci for (i = 0; i < bip->bli_format_count; i++) { 86662306a36Sopenharmony_ci chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len), 86762306a36Sopenharmony_ci XFS_BLF_CHUNK); 86862306a36Sopenharmony_ci map_size = DIV_ROUND_UP(chunks, NBWORD); 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci if (map_size > XFS_BLF_DATAMAP_SIZE) { 87162306a36Sopenharmony_ci kmem_cache_free(xfs_buf_item_cache, bip); 87262306a36Sopenharmony_ci xfs_err(mp, 87362306a36Sopenharmony_ci "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!", 87462306a36Sopenharmony_ci map_size, 87562306a36Sopenharmony_ci BBTOB(bp->b_maps[i].bm_len)); 87662306a36Sopenharmony_ci return -EFSCORRUPTED; 87762306a36Sopenharmony_ci } 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci bip->bli_formats[i].blf_type = XFS_LI_BUF; 88062306a36Sopenharmony_ci bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn; 88162306a36Sopenharmony_ci bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len; 88262306a36Sopenharmony_ci bip->bli_formats[i].blf_map_size = map_size; 88362306a36Sopenharmony_ci } 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci bp->b_log_item = bip; 88662306a36Sopenharmony_ci xfs_buf_hold(bp); 88762306a36Sopenharmony_ci return 0; 88862306a36Sopenharmony_ci} 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci/* 89262306a36Sopenharmony_ci * Mark bytes first through last inclusive as dirty in the buf 89362306a36Sopenharmony_ci * item's bitmap. 89462306a36Sopenharmony_ci */ 89562306a36Sopenharmony_cistatic void 89662306a36Sopenharmony_cixfs_buf_item_log_segment( 89762306a36Sopenharmony_ci uint first, 89862306a36Sopenharmony_ci uint last, 89962306a36Sopenharmony_ci uint *map) 90062306a36Sopenharmony_ci{ 90162306a36Sopenharmony_ci uint first_bit; 90262306a36Sopenharmony_ci uint last_bit; 90362306a36Sopenharmony_ci uint bits_to_set; 90462306a36Sopenharmony_ci uint bits_set; 90562306a36Sopenharmony_ci uint word_num; 90662306a36Sopenharmony_ci uint *wordp; 90762306a36Sopenharmony_ci uint bit; 90862306a36Sopenharmony_ci uint end_bit; 90962306a36Sopenharmony_ci uint mask; 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD); 91262306a36Sopenharmony_ci ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD); 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci /* 91562306a36Sopenharmony_ci * Convert byte offsets to bit numbers. 91662306a36Sopenharmony_ci */ 91762306a36Sopenharmony_ci first_bit = first >> XFS_BLF_SHIFT; 91862306a36Sopenharmony_ci last_bit = last >> XFS_BLF_SHIFT; 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci /* 92162306a36Sopenharmony_ci * Calculate the total number of bits to be set. 92262306a36Sopenharmony_ci */ 92362306a36Sopenharmony_ci bits_to_set = last_bit - first_bit + 1; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci /* 92662306a36Sopenharmony_ci * Get a pointer to the first word in the bitmap 92762306a36Sopenharmony_ci * to set a bit in. 92862306a36Sopenharmony_ci */ 92962306a36Sopenharmony_ci word_num = first_bit >> BIT_TO_WORD_SHIFT; 93062306a36Sopenharmony_ci wordp = &map[word_num]; 93162306a36Sopenharmony_ci 93262306a36Sopenharmony_ci /* 93362306a36Sopenharmony_ci * Calculate the starting bit in the first word. 93462306a36Sopenharmony_ci */ 93562306a36Sopenharmony_ci bit = first_bit & (uint)(NBWORD - 1); 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci /* 93862306a36Sopenharmony_ci * First set any bits in the first word of our range. 93962306a36Sopenharmony_ci * If it starts at bit 0 of the word, it will be 94062306a36Sopenharmony_ci * set below rather than here. That is what the variable 94162306a36Sopenharmony_ci * bit tells us. The variable bits_set tracks the number 94262306a36Sopenharmony_ci * of bits that have been set so far. End_bit is the number 94362306a36Sopenharmony_ci * of the last bit to be set in this word plus one. 94462306a36Sopenharmony_ci */ 94562306a36Sopenharmony_ci if (bit) { 94662306a36Sopenharmony_ci end_bit = min(bit + bits_to_set, (uint)NBWORD); 94762306a36Sopenharmony_ci mask = ((1U << (end_bit - bit)) - 1) << bit; 94862306a36Sopenharmony_ci *wordp |= mask; 94962306a36Sopenharmony_ci wordp++; 95062306a36Sopenharmony_ci bits_set = end_bit - bit; 95162306a36Sopenharmony_ci } else { 95262306a36Sopenharmony_ci bits_set = 0; 95362306a36Sopenharmony_ci } 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci /* 95662306a36Sopenharmony_ci * Now set bits a whole word at a time that are between 95762306a36Sopenharmony_ci * first_bit and last_bit. 95862306a36Sopenharmony_ci */ 95962306a36Sopenharmony_ci while ((bits_to_set - bits_set) >= NBWORD) { 96062306a36Sopenharmony_ci *wordp = 0xffffffff; 96162306a36Sopenharmony_ci bits_set += NBWORD; 96262306a36Sopenharmony_ci wordp++; 96362306a36Sopenharmony_ci } 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci /* 96662306a36Sopenharmony_ci * Finally, set any bits left to be set in one last partial word. 96762306a36Sopenharmony_ci */ 96862306a36Sopenharmony_ci end_bit = bits_to_set - bits_set; 96962306a36Sopenharmony_ci if (end_bit) { 97062306a36Sopenharmony_ci mask = (1U << end_bit) - 1; 97162306a36Sopenharmony_ci *wordp |= mask; 97262306a36Sopenharmony_ci } 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci/* 97662306a36Sopenharmony_ci * Mark bytes first through last inclusive as dirty in the buf 97762306a36Sopenharmony_ci * item's bitmap. 97862306a36Sopenharmony_ci */ 97962306a36Sopenharmony_civoid 98062306a36Sopenharmony_cixfs_buf_item_log( 98162306a36Sopenharmony_ci struct xfs_buf_log_item *bip, 98262306a36Sopenharmony_ci uint first, 98362306a36Sopenharmony_ci uint last) 98462306a36Sopenharmony_ci{ 98562306a36Sopenharmony_ci int i; 98662306a36Sopenharmony_ci uint start; 98762306a36Sopenharmony_ci uint end; 98862306a36Sopenharmony_ci struct xfs_buf *bp = bip->bli_buf; 98962306a36Sopenharmony_ci 99062306a36Sopenharmony_ci /* 99162306a36Sopenharmony_ci * walk each buffer segment and mark them dirty appropriately. 99262306a36Sopenharmony_ci */ 99362306a36Sopenharmony_ci start = 0; 99462306a36Sopenharmony_ci for (i = 0; i < bip->bli_format_count; i++) { 99562306a36Sopenharmony_ci if (start > last) 99662306a36Sopenharmony_ci break; 99762306a36Sopenharmony_ci end = start + BBTOB(bp->b_maps[i].bm_len) - 1; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci /* skip to the map that includes the first byte to log */ 100062306a36Sopenharmony_ci if (first > end) { 100162306a36Sopenharmony_ci start += BBTOB(bp->b_maps[i].bm_len); 100262306a36Sopenharmony_ci continue; 100362306a36Sopenharmony_ci } 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci /* 100662306a36Sopenharmony_ci * Trim the range to this segment and mark it in the bitmap. 100762306a36Sopenharmony_ci * Note that we must convert buffer offsets to segment relative 100862306a36Sopenharmony_ci * offsets (e.g., the first byte of each segment is byte 0 of 100962306a36Sopenharmony_ci * that segment). 101062306a36Sopenharmony_ci */ 101162306a36Sopenharmony_ci if (first < start) 101262306a36Sopenharmony_ci first = start; 101362306a36Sopenharmony_ci if (end > last) 101462306a36Sopenharmony_ci end = last; 101562306a36Sopenharmony_ci xfs_buf_item_log_segment(first - start, end - start, 101662306a36Sopenharmony_ci &bip->bli_formats[i].blf_data_map[0]); 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci start += BBTOB(bp->b_maps[i].bm_len); 101962306a36Sopenharmony_ci } 102062306a36Sopenharmony_ci} 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci 102362306a36Sopenharmony_ci/* 102462306a36Sopenharmony_ci * Return true if the buffer has any ranges logged/dirtied by a transaction, 102562306a36Sopenharmony_ci * false otherwise. 102662306a36Sopenharmony_ci */ 102762306a36Sopenharmony_cibool 102862306a36Sopenharmony_cixfs_buf_item_dirty_format( 102962306a36Sopenharmony_ci struct xfs_buf_log_item *bip) 103062306a36Sopenharmony_ci{ 103162306a36Sopenharmony_ci int i; 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci for (i = 0; i < bip->bli_format_count; i++) { 103462306a36Sopenharmony_ci if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map, 103562306a36Sopenharmony_ci bip->bli_formats[i].blf_map_size)) 103662306a36Sopenharmony_ci return true; 103762306a36Sopenharmony_ci } 103862306a36Sopenharmony_ci 103962306a36Sopenharmony_ci return false; 104062306a36Sopenharmony_ci} 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ciSTATIC void 104362306a36Sopenharmony_cixfs_buf_item_free( 104462306a36Sopenharmony_ci struct xfs_buf_log_item *bip) 104562306a36Sopenharmony_ci{ 104662306a36Sopenharmony_ci xfs_buf_item_free_format(bip); 104762306a36Sopenharmony_ci kmem_free(bip->bli_item.li_lv_shadow); 104862306a36Sopenharmony_ci kmem_cache_free(xfs_buf_item_cache, bip); 104962306a36Sopenharmony_ci} 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci/* 105262306a36Sopenharmony_ci * xfs_buf_item_relse() is called when the buf log item is no longer needed. 105362306a36Sopenharmony_ci */ 105462306a36Sopenharmony_civoid 105562306a36Sopenharmony_cixfs_buf_item_relse( 105662306a36Sopenharmony_ci struct xfs_buf *bp) 105762306a36Sopenharmony_ci{ 105862306a36Sopenharmony_ci struct xfs_buf_log_item *bip = bp->b_log_item; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci trace_xfs_buf_item_relse(bp, _RET_IP_); 106162306a36Sopenharmony_ci ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci if (atomic_read(&bip->bli_refcount)) 106462306a36Sopenharmony_ci return; 106562306a36Sopenharmony_ci bp->b_log_item = NULL; 106662306a36Sopenharmony_ci xfs_buf_rele(bp); 106762306a36Sopenharmony_ci xfs_buf_item_free(bip); 106862306a36Sopenharmony_ci} 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_civoid 107162306a36Sopenharmony_cixfs_buf_item_done( 107262306a36Sopenharmony_ci struct xfs_buf *bp) 107362306a36Sopenharmony_ci{ 107462306a36Sopenharmony_ci /* 107562306a36Sopenharmony_ci * If we are forcibly shutting down, this may well be off the AIL 107662306a36Sopenharmony_ci * already. That's because we simulate the log-committed callbacks to 107762306a36Sopenharmony_ci * unpin these buffers. Or we may never have put this item on AIL 107862306a36Sopenharmony_ci * because of the transaction was aborted forcibly. 107962306a36Sopenharmony_ci * xfs_trans_ail_delete() takes care of these. 108062306a36Sopenharmony_ci * 108162306a36Sopenharmony_ci * Either way, AIL is useless if we're forcing a shutdown. 108262306a36Sopenharmony_ci * 108362306a36Sopenharmony_ci * Note that log recovery writes might have buffer items that are not on 108462306a36Sopenharmony_ci * the AIL even when the file system is not shut down. 108562306a36Sopenharmony_ci */ 108662306a36Sopenharmony_ci xfs_trans_ail_delete(&bp->b_log_item->bli_item, 108762306a36Sopenharmony_ci (bp->b_flags & _XBF_LOGRECOVERY) ? 0 : 108862306a36Sopenharmony_ci SHUTDOWN_CORRUPT_INCORE); 108962306a36Sopenharmony_ci xfs_buf_item_relse(bp); 109062306a36Sopenharmony_ci} 1091