162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2000-2006 Silicon Graphics, Inc. 462306a36Sopenharmony_ci * All Rights Reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_log_format.h" 1162306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1262306a36Sopenharmony_ci#include "xfs_bit.h" 1362306a36Sopenharmony_ci#include "xfs_sb.h" 1462306a36Sopenharmony_ci#include "xfs_mount.h" 1562306a36Sopenharmony_ci#include "xfs_defer.h" 1662306a36Sopenharmony_ci#include "xfs_inode.h" 1762306a36Sopenharmony_ci#include "xfs_trans.h" 1862306a36Sopenharmony_ci#include "xfs_log.h" 1962306a36Sopenharmony_ci#include "xfs_log_priv.h" 2062306a36Sopenharmony_ci#include "xfs_log_recover.h" 2162306a36Sopenharmony_ci#include "xfs_trans_priv.h" 2262306a36Sopenharmony_ci#include "xfs_alloc.h" 2362306a36Sopenharmony_ci#include "xfs_ialloc.h" 2462306a36Sopenharmony_ci#include "xfs_trace.h" 2562306a36Sopenharmony_ci#include "xfs_icache.h" 2662306a36Sopenharmony_ci#include "xfs_error.h" 2762306a36Sopenharmony_ci#include "xfs_buf_item.h" 2862306a36Sopenharmony_ci#include "xfs_ag.h" 2962306a36Sopenharmony_ci#include "xfs_quota.h" 3062306a36Sopenharmony_ci#include "xfs_reflink.h" 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ciSTATIC int 3562306a36Sopenharmony_cixlog_find_zeroed( 3662306a36Sopenharmony_ci struct xlog *, 3762306a36Sopenharmony_ci xfs_daddr_t *); 3862306a36Sopenharmony_ciSTATIC int 3962306a36Sopenharmony_cixlog_clear_stale_blocks( 4062306a36Sopenharmony_ci struct xlog *, 4162306a36Sopenharmony_ci xfs_lsn_t); 4262306a36Sopenharmony_ciSTATIC int 4362306a36Sopenharmony_cixlog_do_recovery_pass( 4462306a36Sopenharmony_ci struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *); 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci * Sector aligned buffer routines for buffer create/read/write/access 4862306a36Sopenharmony_ci */ 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci/* 5162306a36Sopenharmony_ci * Verify the log-relative block number and length in basic blocks are valid for 5262306a36Sopenharmony_ci * an operation involving the given XFS log buffer. Returns true if the fields 5362306a36Sopenharmony_ci * are valid, false otherwise. 5462306a36Sopenharmony_ci */ 5562306a36Sopenharmony_cistatic inline bool 5662306a36Sopenharmony_cixlog_verify_bno( 5762306a36Sopenharmony_ci struct xlog *log, 5862306a36Sopenharmony_ci xfs_daddr_t blk_no, 5962306a36Sopenharmony_ci int bbcount) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci if (blk_no < 0 || blk_no >= log->l_logBBsize) 6262306a36Sopenharmony_ci return false; 6362306a36Sopenharmony_ci if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize) 6462306a36Sopenharmony_ci return false; 6562306a36Sopenharmony_ci return true; 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci/* 6962306a36Sopenharmony_ci * Allocate a buffer to hold log data. The buffer needs to be able to map to 7062306a36Sopenharmony_ci * a range of nbblks basic blocks at any valid offset within the log. 7162306a36Sopenharmony_ci */ 7262306a36Sopenharmony_cistatic char * 7362306a36Sopenharmony_cixlog_alloc_buffer( 7462306a36Sopenharmony_ci struct xlog *log, 7562306a36Sopenharmony_ci int nbblks) 7662306a36Sopenharmony_ci{ 7762306a36Sopenharmony_ci /* 7862306a36Sopenharmony_ci * Pass log block 0 since we don't have an addr yet, buffer will be 7962306a36Sopenharmony_ci * verified on read. 8062306a36Sopenharmony_ci */ 8162306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) { 8262306a36Sopenharmony_ci xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 8362306a36Sopenharmony_ci nbblks); 8462306a36Sopenharmony_ci return NULL; 8562306a36Sopenharmony_ci } 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci /* 8862306a36Sopenharmony_ci * We do log I/O in units of log sectors (a power-of-2 multiple of the 8962306a36Sopenharmony_ci * basic block size), so we round up the requested size to accommodate 9062306a36Sopenharmony_ci * the basic blocks required for complete log sectors. 9162306a36Sopenharmony_ci * 9262306a36Sopenharmony_ci * In addition, the buffer may be used for a non-sector-aligned block 9362306a36Sopenharmony_ci * offset, in which case an I/O of the requested size could extend 9462306a36Sopenharmony_ci * beyond the end of the buffer. If the requested size is only 1 basic 9562306a36Sopenharmony_ci * block it will never straddle a sector boundary, so this won't be an 9662306a36Sopenharmony_ci * issue. Nor will this be a problem if the log I/O is done in basic 9762306a36Sopenharmony_ci * blocks (sector size 1). But otherwise we extend the buffer by one 9862306a36Sopenharmony_ci * extra log sector to ensure there's space to accommodate this 9962306a36Sopenharmony_ci * possibility. 10062306a36Sopenharmony_ci */ 10162306a36Sopenharmony_ci if (nbblks > 1 && log->l_sectBBsize > 1) 10262306a36Sopenharmony_ci nbblks += log->l_sectBBsize; 10362306a36Sopenharmony_ci nbblks = round_up(nbblks, log->l_sectBBsize); 10462306a36Sopenharmony_ci return kvzalloc(BBTOB(nbblks), GFP_KERNEL | __GFP_RETRY_MAYFAIL); 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci/* 10862306a36Sopenharmony_ci * Return the address of the start of the given block number's data 10962306a36Sopenharmony_ci * in a log buffer. The buffer covers a log sector-aligned region. 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_cistatic inline unsigned int 11262306a36Sopenharmony_cixlog_align( 11362306a36Sopenharmony_ci struct xlog *log, 11462306a36Sopenharmony_ci xfs_daddr_t blk_no) 11562306a36Sopenharmony_ci{ 11662306a36Sopenharmony_ci return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); 11762306a36Sopenharmony_ci} 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_cistatic int 12062306a36Sopenharmony_cixlog_do_io( 12162306a36Sopenharmony_ci struct xlog *log, 12262306a36Sopenharmony_ci xfs_daddr_t blk_no, 12362306a36Sopenharmony_ci unsigned int nbblks, 12462306a36Sopenharmony_ci char *data, 12562306a36Sopenharmony_ci enum req_op op) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci int error; 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) { 13062306a36Sopenharmony_ci xfs_warn(log->l_mp, 13162306a36Sopenharmony_ci "Invalid log block/length (0x%llx, 0x%x) for buffer", 13262306a36Sopenharmony_ci blk_no, nbblks); 13362306a36Sopenharmony_ci return -EFSCORRUPTED; 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci blk_no = round_down(blk_no, log->l_sectBBsize); 13762306a36Sopenharmony_ci nbblks = round_up(nbblks, log->l_sectBBsize); 13862306a36Sopenharmony_ci ASSERT(nbblks > 0); 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, 14162306a36Sopenharmony_ci BBTOB(nbblks), data, op); 14262306a36Sopenharmony_ci if (error && !xlog_is_shutdown(log)) { 14362306a36Sopenharmony_ci xfs_alert(log->l_mp, 14462306a36Sopenharmony_ci "log recovery %s I/O error at daddr 0x%llx len %d error %d", 14562306a36Sopenharmony_ci op == REQ_OP_WRITE ? "write" : "read", 14662306a36Sopenharmony_ci blk_no, nbblks, error); 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci return error; 14962306a36Sopenharmony_ci} 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ciSTATIC int 15262306a36Sopenharmony_cixlog_bread_noalign( 15362306a36Sopenharmony_ci struct xlog *log, 15462306a36Sopenharmony_ci xfs_daddr_t blk_no, 15562306a36Sopenharmony_ci int nbblks, 15662306a36Sopenharmony_ci char *data) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); 15962306a36Sopenharmony_ci} 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ciSTATIC int 16262306a36Sopenharmony_cixlog_bread( 16362306a36Sopenharmony_ci struct xlog *log, 16462306a36Sopenharmony_ci xfs_daddr_t blk_no, 16562306a36Sopenharmony_ci int nbblks, 16662306a36Sopenharmony_ci char *data, 16762306a36Sopenharmony_ci char **offset) 16862306a36Sopenharmony_ci{ 16962306a36Sopenharmony_ci int error; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); 17262306a36Sopenharmony_ci if (!error) 17362306a36Sopenharmony_ci *offset = data + xlog_align(log, blk_no); 17462306a36Sopenharmony_ci return error; 17562306a36Sopenharmony_ci} 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ciSTATIC int 17862306a36Sopenharmony_cixlog_bwrite( 17962306a36Sopenharmony_ci struct xlog *log, 18062306a36Sopenharmony_ci xfs_daddr_t blk_no, 18162306a36Sopenharmony_ci int nbblks, 18262306a36Sopenharmony_ci char *data) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci#ifdef DEBUG 18862306a36Sopenharmony_ci/* 18962306a36Sopenharmony_ci * dump debug superblock and log record information 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ciSTATIC void 19262306a36Sopenharmony_cixlog_header_check_dump( 19362306a36Sopenharmony_ci xfs_mount_t *mp, 19462306a36Sopenharmony_ci xlog_rec_header_t *head) 19562306a36Sopenharmony_ci{ 19662306a36Sopenharmony_ci xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d", 19762306a36Sopenharmony_ci __func__, &mp->m_sb.sb_uuid, XLOG_FMT); 19862306a36Sopenharmony_ci xfs_debug(mp, " log : uuid = %pU, fmt = %d", 19962306a36Sopenharmony_ci &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); 20062306a36Sopenharmony_ci} 20162306a36Sopenharmony_ci#else 20262306a36Sopenharmony_ci#define xlog_header_check_dump(mp, head) 20362306a36Sopenharmony_ci#endif 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci/* 20662306a36Sopenharmony_ci * check log record header for recovery 20762306a36Sopenharmony_ci */ 20862306a36Sopenharmony_ciSTATIC int 20962306a36Sopenharmony_cixlog_header_check_recover( 21062306a36Sopenharmony_ci xfs_mount_t *mp, 21162306a36Sopenharmony_ci xlog_rec_header_t *head) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci /* 21662306a36Sopenharmony_ci * IRIX doesn't write the h_fmt field and leaves it zeroed 21762306a36Sopenharmony_ci * (XLOG_FMT_UNKNOWN). This stops us from trying to recover 21862306a36Sopenharmony_ci * a dirty log created in IRIX. 21962306a36Sopenharmony_ci */ 22062306a36Sopenharmony_ci if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) { 22162306a36Sopenharmony_ci xfs_warn(mp, 22262306a36Sopenharmony_ci "dirty log written in incompatible format - can't recover"); 22362306a36Sopenharmony_ci xlog_header_check_dump(mp, head); 22462306a36Sopenharmony_ci return -EFSCORRUPTED; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, 22762306a36Sopenharmony_ci &head->h_fs_uuid))) { 22862306a36Sopenharmony_ci xfs_warn(mp, 22962306a36Sopenharmony_ci "dirty log entry has mismatched uuid - can't recover"); 23062306a36Sopenharmony_ci xlog_header_check_dump(mp, head); 23162306a36Sopenharmony_ci return -EFSCORRUPTED; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci return 0; 23462306a36Sopenharmony_ci} 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci/* 23762306a36Sopenharmony_ci * read the head block of the log and check the header 23862306a36Sopenharmony_ci */ 23962306a36Sopenharmony_ciSTATIC int 24062306a36Sopenharmony_cixlog_header_check_mount( 24162306a36Sopenharmony_ci xfs_mount_t *mp, 24262306a36Sopenharmony_ci xlog_rec_header_t *head) 24362306a36Sopenharmony_ci{ 24462306a36Sopenharmony_ci ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci if (uuid_is_null(&head->h_fs_uuid)) { 24762306a36Sopenharmony_ci /* 24862306a36Sopenharmony_ci * IRIX doesn't write the h_fs_uuid or h_fmt fields. If 24962306a36Sopenharmony_ci * h_fs_uuid is null, we assume this log was last mounted 25062306a36Sopenharmony_ci * by IRIX and continue. 25162306a36Sopenharmony_ci */ 25262306a36Sopenharmony_ci xfs_warn(mp, "null uuid in log - IRIX style log"); 25362306a36Sopenharmony_ci } else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, 25462306a36Sopenharmony_ci &head->h_fs_uuid))) { 25562306a36Sopenharmony_ci xfs_warn(mp, "log has mismatched uuid - can't recover"); 25662306a36Sopenharmony_ci xlog_header_check_dump(mp, head); 25762306a36Sopenharmony_ci return -EFSCORRUPTED; 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci return 0; 26062306a36Sopenharmony_ci} 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci/* 26362306a36Sopenharmony_ci * This routine finds (to an approximation) the first block in the physical 26462306a36Sopenharmony_ci * log which contains the given cycle. It uses a binary search algorithm. 26562306a36Sopenharmony_ci * Note that the algorithm can not be perfect because the disk will not 26662306a36Sopenharmony_ci * necessarily be perfect. 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_ciSTATIC int 26962306a36Sopenharmony_cixlog_find_cycle_start( 27062306a36Sopenharmony_ci struct xlog *log, 27162306a36Sopenharmony_ci char *buffer, 27262306a36Sopenharmony_ci xfs_daddr_t first_blk, 27362306a36Sopenharmony_ci xfs_daddr_t *last_blk, 27462306a36Sopenharmony_ci uint cycle) 27562306a36Sopenharmony_ci{ 27662306a36Sopenharmony_ci char *offset; 27762306a36Sopenharmony_ci xfs_daddr_t mid_blk; 27862306a36Sopenharmony_ci xfs_daddr_t end_blk; 27962306a36Sopenharmony_ci uint mid_cycle; 28062306a36Sopenharmony_ci int error; 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci end_blk = *last_blk; 28362306a36Sopenharmony_ci mid_blk = BLK_AVG(first_blk, end_blk); 28462306a36Sopenharmony_ci while (mid_blk != first_blk && mid_blk != end_blk) { 28562306a36Sopenharmony_ci error = xlog_bread(log, mid_blk, 1, buffer, &offset); 28662306a36Sopenharmony_ci if (error) 28762306a36Sopenharmony_ci return error; 28862306a36Sopenharmony_ci mid_cycle = xlog_get_cycle(offset); 28962306a36Sopenharmony_ci if (mid_cycle == cycle) 29062306a36Sopenharmony_ci end_blk = mid_blk; /* last_half_cycle == mid_cycle */ 29162306a36Sopenharmony_ci else 29262306a36Sopenharmony_ci first_blk = mid_blk; /* first_half_cycle == mid_cycle */ 29362306a36Sopenharmony_ci mid_blk = BLK_AVG(first_blk, end_blk); 29462306a36Sopenharmony_ci } 29562306a36Sopenharmony_ci ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || 29662306a36Sopenharmony_ci (mid_blk == end_blk && mid_blk-1 == first_blk)); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci *last_blk = end_blk; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci return 0; 30162306a36Sopenharmony_ci} 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci/* 30462306a36Sopenharmony_ci * Check that a range of blocks does not contain stop_on_cycle_no. 30562306a36Sopenharmony_ci * Fill in *new_blk with the block offset where such a block is 30662306a36Sopenharmony_ci * found, or with -1 (an invalid block number) if there is no such 30762306a36Sopenharmony_ci * block in the range. The scan needs to occur from front to back 30862306a36Sopenharmony_ci * and the pointer into the region must be updated since a later 30962306a36Sopenharmony_ci * routine will need to perform another test. 31062306a36Sopenharmony_ci */ 31162306a36Sopenharmony_ciSTATIC int 31262306a36Sopenharmony_cixlog_find_verify_cycle( 31362306a36Sopenharmony_ci struct xlog *log, 31462306a36Sopenharmony_ci xfs_daddr_t start_blk, 31562306a36Sopenharmony_ci int nbblks, 31662306a36Sopenharmony_ci uint stop_on_cycle_no, 31762306a36Sopenharmony_ci xfs_daddr_t *new_blk) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci xfs_daddr_t i, j; 32062306a36Sopenharmony_ci uint cycle; 32162306a36Sopenharmony_ci char *buffer; 32262306a36Sopenharmony_ci xfs_daddr_t bufblks; 32362306a36Sopenharmony_ci char *buf = NULL; 32462306a36Sopenharmony_ci int error = 0; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci /* 32762306a36Sopenharmony_ci * Greedily allocate a buffer big enough to handle the full 32862306a36Sopenharmony_ci * range of basic blocks we'll be examining. If that fails, 32962306a36Sopenharmony_ci * try a smaller size. We need to be able to read at least 33062306a36Sopenharmony_ci * a log sector, or we're out of luck. 33162306a36Sopenharmony_ci */ 33262306a36Sopenharmony_ci bufblks = roundup_pow_of_two(nbblks); 33362306a36Sopenharmony_ci while (bufblks > log->l_logBBsize) 33462306a36Sopenharmony_ci bufblks >>= 1; 33562306a36Sopenharmony_ci while (!(buffer = xlog_alloc_buffer(log, bufblks))) { 33662306a36Sopenharmony_ci bufblks >>= 1; 33762306a36Sopenharmony_ci if (bufblks < log->l_sectBBsize) 33862306a36Sopenharmony_ci return -ENOMEM; 33962306a36Sopenharmony_ci } 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci for (i = start_blk; i < start_blk + nbblks; i += bufblks) { 34262306a36Sopenharmony_ci int bcount; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci bcount = min(bufblks, (start_blk + nbblks - i)); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci error = xlog_bread(log, i, bcount, buffer, &buf); 34762306a36Sopenharmony_ci if (error) 34862306a36Sopenharmony_ci goto out; 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci for (j = 0; j < bcount; j++) { 35162306a36Sopenharmony_ci cycle = xlog_get_cycle(buf); 35262306a36Sopenharmony_ci if (cycle == stop_on_cycle_no) { 35362306a36Sopenharmony_ci *new_blk = i+j; 35462306a36Sopenharmony_ci goto out; 35562306a36Sopenharmony_ci } 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci buf += BBSIZE; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci } 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci *new_blk = -1; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ciout: 36462306a36Sopenharmony_ci kmem_free(buffer); 36562306a36Sopenharmony_ci return error; 36662306a36Sopenharmony_ci} 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_cistatic inline int 36962306a36Sopenharmony_cixlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh) 37062306a36Sopenharmony_ci{ 37162306a36Sopenharmony_ci if (xfs_has_logv2(log->l_mp)) { 37262306a36Sopenharmony_ci int h_size = be32_to_cpu(rh->h_size); 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) && 37562306a36Sopenharmony_ci h_size > XLOG_HEADER_CYCLE_SIZE) 37662306a36Sopenharmony_ci return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci return 1; 37962306a36Sopenharmony_ci} 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci/* 38262306a36Sopenharmony_ci * Potentially backup over partial log record write. 38362306a36Sopenharmony_ci * 38462306a36Sopenharmony_ci * In the typical case, last_blk is the number of the block directly after 38562306a36Sopenharmony_ci * a good log record. Therefore, we subtract one to get the block number 38662306a36Sopenharmony_ci * of the last block in the given buffer. extra_bblks contains the number 38762306a36Sopenharmony_ci * of blocks we would have read on a previous read. This happens when the 38862306a36Sopenharmony_ci * last log record is split over the end of the physical log. 38962306a36Sopenharmony_ci * 39062306a36Sopenharmony_ci * extra_bblks is the number of blocks potentially verified on a previous 39162306a36Sopenharmony_ci * call to this routine. 39262306a36Sopenharmony_ci */ 39362306a36Sopenharmony_ciSTATIC int 39462306a36Sopenharmony_cixlog_find_verify_log_record( 39562306a36Sopenharmony_ci struct xlog *log, 39662306a36Sopenharmony_ci xfs_daddr_t start_blk, 39762306a36Sopenharmony_ci xfs_daddr_t *last_blk, 39862306a36Sopenharmony_ci int extra_bblks) 39962306a36Sopenharmony_ci{ 40062306a36Sopenharmony_ci xfs_daddr_t i; 40162306a36Sopenharmony_ci char *buffer; 40262306a36Sopenharmony_ci char *offset = NULL; 40362306a36Sopenharmony_ci xlog_rec_header_t *head = NULL; 40462306a36Sopenharmony_ci int error = 0; 40562306a36Sopenharmony_ci int smallmem = 0; 40662306a36Sopenharmony_ci int num_blks = *last_blk - start_blk; 40762306a36Sopenharmony_ci int xhdrs; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci ASSERT(start_blk != 0 || *last_blk != start_blk); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci buffer = xlog_alloc_buffer(log, num_blks); 41262306a36Sopenharmony_ci if (!buffer) { 41362306a36Sopenharmony_ci buffer = xlog_alloc_buffer(log, 1); 41462306a36Sopenharmony_ci if (!buffer) 41562306a36Sopenharmony_ci return -ENOMEM; 41662306a36Sopenharmony_ci smallmem = 1; 41762306a36Sopenharmony_ci } else { 41862306a36Sopenharmony_ci error = xlog_bread(log, start_blk, num_blks, buffer, &offset); 41962306a36Sopenharmony_ci if (error) 42062306a36Sopenharmony_ci goto out; 42162306a36Sopenharmony_ci offset += ((num_blks - 1) << BBSHIFT); 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci for (i = (*last_blk) - 1; i >= 0; i--) { 42562306a36Sopenharmony_ci if (i < start_blk) { 42662306a36Sopenharmony_ci /* valid log record not found */ 42762306a36Sopenharmony_ci xfs_warn(log->l_mp, 42862306a36Sopenharmony_ci "Log inconsistent (didn't find previous header)"); 42962306a36Sopenharmony_ci ASSERT(0); 43062306a36Sopenharmony_ci error = -EFSCORRUPTED; 43162306a36Sopenharmony_ci goto out; 43262306a36Sopenharmony_ci } 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci if (smallmem) { 43562306a36Sopenharmony_ci error = xlog_bread(log, i, 1, buffer, &offset); 43662306a36Sopenharmony_ci if (error) 43762306a36Sopenharmony_ci goto out; 43862306a36Sopenharmony_ci } 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci head = (xlog_rec_header_t *)offset; 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) 44362306a36Sopenharmony_ci break; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci if (!smallmem) 44662306a36Sopenharmony_ci offset -= BBSIZE; 44762306a36Sopenharmony_ci } 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci /* 45062306a36Sopenharmony_ci * We hit the beginning of the physical log & still no header. Return 45162306a36Sopenharmony_ci * to caller. If caller can handle a return of -1, then this routine 45262306a36Sopenharmony_ci * will be called again for the end of the physical log. 45362306a36Sopenharmony_ci */ 45462306a36Sopenharmony_ci if (i == -1) { 45562306a36Sopenharmony_ci error = 1; 45662306a36Sopenharmony_ci goto out; 45762306a36Sopenharmony_ci } 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci /* 46062306a36Sopenharmony_ci * We have the final block of the good log (the first block 46162306a36Sopenharmony_ci * of the log record _before_ the head. So we check the uuid. 46262306a36Sopenharmony_ci */ 46362306a36Sopenharmony_ci if ((error = xlog_header_check_mount(log->l_mp, head))) 46462306a36Sopenharmony_ci goto out; 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci /* 46762306a36Sopenharmony_ci * We may have found a log record header before we expected one. 46862306a36Sopenharmony_ci * last_blk will be the 1st block # with a given cycle #. We may end 46962306a36Sopenharmony_ci * up reading an entire log record. In this case, we don't want to 47062306a36Sopenharmony_ci * reset last_blk. Only when last_blk points in the middle of a log 47162306a36Sopenharmony_ci * record do we update last_blk. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_ci xhdrs = xlog_logrec_hblks(log, head); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci if (*last_blk - i + extra_bblks != 47662306a36Sopenharmony_ci BTOBB(be32_to_cpu(head->h_len)) + xhdrs) 47762306a36Sopenharmony_ci *last_blk = i; 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_ciout: 48062306a36Sopenharmony_ci kmem_free(buffer); 48162306a36Sopenharmony_ci return error; 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci/* 48562306a36Sopenharmony_ci * Head is defined to be the point of the log where the next log write 48662306a36Sopenharmony_ci * could go. This means that incomplete LR writes at the end are 48762306a36Sopenharmony_ci * eliminated when calculating the head. We aren't guaranteed that previous 48862306a36Sopenharmony_ci * LR have complete transactions. We only know that a cycle number of 48962306a36Sopenharmony_ci * current cycle number -1 won't be present in the log if we start writing 49062306a36Sopenharmony_ci * from our current block number. 49162306a36Sopenharmony_ci * 49262306a36Sopenharmony_ci * last_blk contains the block number of the first block with a given 49362306a36Sopenharmony_ci * cycle number. 49462306a36Sopenharmony_ci * 49562306a36Sopenharmony_ci * Return: zero if normal, non-zero if error. 49662306a36Sopenharmony_ci */ 49762306a36Sopenharmony_ciSTATIC int 49862306a36Sopenharmony_cixlog_find_head( 49962306a36Sopenharmony_ci struct xlog *log, 50062306a36Sopenharmony_ci xfs_daddr_t *return_head_blk) 50162306a36Sopenharmony_ci{ 50262306a36Sopenharmony_ci char *buffer; 50362306a36Sopenharmony_ci char *offset; 50462306a36Sopenharmony_ci xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 50562306a36Sopenharmony_ci int num_scan_bblks; 50662306a36Sopenharmony_ci uint first_half_cycle, last_half_cycle; 50762306a36Sopenharmony_ci uint stop_on_cycle; 50862306a36Sopenharmony_ci int error, log_bbnum = log->l_logBBsize; 50962306a36Sopenharmony_ci 51062306a36Sopenharmony_ci /* Is the end of the log device zeroed? */ 51162306a36Sopenharmony_ci error = xlog_find_zeroed(log, &first_blk); 51262306a36Sopenharmony_ci if (error < 0) { 51362306a36Sopenharmony_ci xfs_warn(log->l_mp, "empty log check failed"); 51462306a36Sopenharmony_ci return error; 51562306a36Sopenharmony_ci } 51662306a36Sopenharmony_ci if (error == 1) { 51762306a36Sopenharmony_ci *return_head_blk = first_blk; 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci /* Is the whole lot zeroed? */ 52062306a36Sopenharmony_ci if (!first_blk) { 52162306a36Sopenharmony_ci /* Linux XFS shouldn't generate totally zeroed logs - 52262306a36Sopenharmony_ci * mkfs etc write a dummy unmount record to a fresh 52362306a36Sopenharmony_ci * log so we can store the uuid in there 52462306a36Sopenharmony_ci */ 52562306a36Sopenharmony_ci xfs_warn(log->l_mp, "totally zeroed log"); 52662306a36Sopenharmony_ci } 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci return 0; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci first_blk = 0; /* get cycle # of 1st block */ 53262306a36Sopenharmony_ci buffer = xlog_alloc_buffer(log, 1); 53362306a36Sopenharmony_ci if (!buffer) 53462306a36Sopenharmony_ci return -ENOMEM; 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ci error = xlog_bread(log, 0, 1, buffer, &offset); 53762306a36Sopenharmony_ci if (error) 53862306a36Sopenharmony_ci goto out_free_buffer; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci first_half_cycle = xlog_get_cycle(offset); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 54362306a36Sopenharmony_ci error = xlog_bread(log, last_blk, 1, buffer, &offset); 54462306a36Sopenharmony_ci if (error) 54562306a36Sopenharmony_ci goto out_free_buffer; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci last_half_cycle = xlog_get_cycle(offset); 54862306a36Sopenharmony_ci ASSERT(last_half_cycle != 0); 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_ci /* 55162306a36Sopenharmony_ci * If the 1st half cycle number is equal to the last half cycle number, 55262306a36Sopenharmony_ci * then the entire log is stamped with the same cycle number. In this 55362306a36Sopenharmony_ci * case, head_blk can't be set to zero (which makes sense). The below 55462306a36Sopenharmony_ci * math doesn't work out properly with head_blk equal to zero. Instead, 55562306a36Sopenharmony_ci * we set it to log_bbnum which is an invalid block number, but this 55662306a36Sopenharmony_ci * value makes the math correct. If head_blk doesn't changed through 55762306a36Sopenharmony_ci * all the tests below, *head_blk is set to zero at the very end rather 55862306a36Sopenharmony_ci * than log_bbnum. In a sense, log_bbnum and zero are the same block 55962306a36Sopenharmony_ci * in a circular file. 56062306a36Sopenharmony_ci */ 56162306a36Sopenharmony_ci if (first_half_cycle == last_half_cycle) { 56262306a36Sopenharmony_ci /* 56362306a36Sopenharmony_ci * In this case we believe that the entire log should have 56462306a36Sopenharmony_ci * cycle number last_half_cycle. We need to scan backwards 56562306a36Sopenharmony_ci * from the end verifying that there are no holes still 56662306a36Sopenharmony_ci * containing last_half_cycle - 1. If we find such a hole, 56762306a36Sopenharmony_ci * then the start of that hole will be the new head. The 56862306a36Sopenharmony_ci * simple case looks like 56962306a36Sopenharmony_ci * x | x ... | x - 1 | x 57062306a36Sopenharmony_ci * Another case that fits this picture would be 57162306a36Sopenharmony_ci * x | x + 1 | x ... | x 57262306a36Sopenharmony_ci * In this case the head really is somewhere at the end of the 57362306a36Sopenharmony_ci * log, as one of the latest writes at the beginning was 57462306a36Sopenharmony_ci * incomplete. 57562306a36Sopenharmony_ci * One more case is 57662306a36Sopenharmony_ci * x | x + 1 | x ... | x - 1 | x 57762306a36Sopenharmony_ci * This is really the combination of the above two cases, and 57862306a36Sopenharmony_ci * the head has to end up at the start of the x-1 hole at the 57962306a36Sopenharmony_ci * end of the log. 58062306a36Sopenharmony_ci * 58162306a36Sopenharmony_ci * In the 256k log case, we will read from the beginning to the 58262306a36Sopenharmony_ci * end of the log and search for cycle numbers equal to x-1. 58362306a36Sopenharmony_ci * We don't worry about the x+1 blocks that we encounter, 58462306a36Sopenharmony_ci * because we know that they cannot be the head since the log 58562306a36Sopenharmony_ci * started with x. 58662306a36Sopenharmony_ci */ 58762306a36Sopenharmony_ci head_blk = log_bbnum; 58862306a36Sopenharmony_ci stop_on_cycle = last_half_cycle - 1; 58962306a36Sopenharmony_ci } else { 59062306a36Sopenharmony_ci /* 59162306a36Sopenharmony_ci * In this case we want to find the first block with cycle 59262306a36Sopenharmony_ci * number matching last_half_cycle. We expect the log to be 59362306a36Sopenharmony_ci * some variation on 59462306a36Sopenharmony_ci * x + 1 ... | x ... | x 59562306a36Sopenharmony_ci * The first block with cycle number x (last_half_cycle) will 59662306a36Sopenharmony_ci * be where the new head belongs. First we do a binary search 59762306a36Sopenharmony_ci * for the first occurrence of last_half_cycle. The binary 59862306a36Sopenharmony_ci * search may not be totally accurate, so then we scan back 59962306a36Sopenharmony_ci * from there looking for occurrences of last_half_cycle before 60062306a36Sopenharmony_ci * us. If that backwards scan wraps around the beginning of 60162306a36Sopenharmony_ci * the log, then we look for occurrences of last_half_cycle - 1 60262306a36Sopenharmony_ci * at the end of the log. The cases we're looking for look 60362306a36Sopenharmony_ci * like 60462306a36Sopenharmony_ci * v binary search stopped here 60562306a36Sopenharmony_ci * x + 1 ... | x | x + 1 | x ... | x 60662306a36Sopenharmony_ci * ^ but we want to locate this spot 60762306a36Sopenharmony_ci * or 60862306a36Sopenharmony_ci * <---------> less than scan distance 60962306a36Sopenharmony_ci * x + 1 ... | x ... | x - 1 | x 61062306a36Sopenharmony_ci * ^ we want to locate this spot 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci stop_on_cycle = last_half_cycle; 61362306a36Sopenharmony_ci error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, 61462306a36Sopenharmony_ci last_half_cycle); 61562306a36Sopenharmony_ci if (error) 61662306a36Sopenharmony_ci goto out_free_buffer; 61762306a36Sopenharmony_ci } 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci /* 62062306a36Sopenharmony_ci * Now validate the answer. Scan back some number of maximum possible 62162306a36Sopenharmony_ci * blocks and make sure each one has the expected cycle number. The 62262306a36Sopenharmony_ci * maximum is determined by the total possible amount of buffering 62362306a36Sopenharmony_ci * in the in-core log. The following number can be made tighter if 62462306a36Sopenharmony_ci * we actually look at the block size of the filesystem. 62562306a36Sopenharmony_ci */ 62662306a36Sopenharmony_ci num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log)); 62762306a36Sopenharmony_ci if (head_blk >= num_scan_bblks) { 62862306a36Sopenharmony_ci /* 62962306a36Sopenharmony_ci * We are guaranteed that the entire check can be performed 63062306a36Sopenharmony_ci * in one buffer. 63162306a36Sopenharmony_ci */ 63262306a36Sopenharmony_ci start_blk = head_blk - num_scan_bblks; 63362306a36Sopenharmony_ci if ((error = xlog_find_verify_cycle(log, 63462306a36Sopenharmony_ci start_blk, num_scan_bblks, 63562306a36Sopenharmony_ci stop_on_cycle, &new_blk))) 63662306a36Sopenharmony_ci goto out_free_buffer; 63762306a36Sopenharmony_ci if (new_blk != -1) 63862306a36Sopenharmony_ci head_blk = new_blk; 63962306a36Sopenharmony_ci } else { /* need to read 2 parts of log */ 64062306a36Sopenharmony_ci /* 64162306a36Sopenharmony_ci * We are going to scan backwards in the log in two parts. 64262306a36Sopenharmony_ci * First we scan the physical end of the log. In this part 64362306a36Sopenharmony_ci * of the log, we are looking for blocks with cycle number 64462306a36Sopenharmony_ci * last_half_cycle - 1. 64562306a36Sopenharmony_ci * If we find one, then we know that the log starts there, as 64662306a36Sopenharmony_ci * we've found a hole that didn't get written in going around 64762306a36Sopenharmony_ci * the end of the physical log. The simple case for this is 64862306a36Sopenharmony_ci * x + 1 ... | x ... | x - 1 | x 64962306a36Sopenharmony_ci * <---------> less than scan distance 65062306a36Sopenharmony_ci * If all of the blocks at the end of the log have cycle number 65162306a36Sopenharmony_ci * last_half_cycle, then we check the blocks at the start of 65262306a36Sopenharmony_ci * the log looking for occurrences of last_half_cycle. If we 65362306a36Sopenharmony_ci * find one, then our current estimate for the location of the 65462306a36Sopenharmony_ci * first occurrence of last_half_cycle is wrong and we move 65562306a36Sopenharmony_ci * back to the hole we've found. This case looks like 65662306a36Sopenharmony_ci * x + 1 ... | x | x + 1 | x ... 65762306a36Sopenharmony_ci * ^ binary search stopped here 65862306a36Sopenharmony_ci * Another case we need to handle that only occurs in 256k 65962306a36Sopenharmony_ci * logs is 66062306a36Sopenharmony_ci * x + 1 ... | x ... | x+1 | x ... 66162306a36Sopenharmony_ci * ^ binary search stops here 66262306a36Sopenharmony_ci * In a 256k log, the scan at the end of the log will see the 66362306a36Sopenharmony_ci * x + 1 blocks. We need to skip past those since that is 66462306a36Sopenharmony_ci * certainly not the head of the log. By searching for 66562306a36Sopenharmony_ci * last_half_cycle-1 we accomplish that. 66662306a36Sopenharmony_ci */ 66762306a36Sopenharmony_ci ASSERT(head_blk <= INT_MAX && 66862306a36Sopenharmony_ci (xfs_daddr_t) num_scan_bblks >= head_blk); 66962306a36Sopenharmony_ci start_blk = log_bbnum - (num_scan_bblks - head_blk); 67062306a36Sopenharmony_ci if ((error = xlog_find_verify_cycle(log, start_blk, 67162306a36Sopenharmony_ci num_scan_bblks - (int)head_blk, 67262306a36Sopenharmony_ci (stop_on_cycle - 1), &new_blk))) 67362306a36Sopenharmony_ci goto out_free_buffer; 67462306a36Sopenharmony_ci if (new_blk != -1) { 67562306a36Sopenharmony_ci head_blk = new_blk; 67662306a36Sopenharmony_ci goto validate_head; 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci /* 68062306a36Sopenharmony_ci * Scan beginning of log now. The last part of the physical 68162306a36Sopenharmony_ci * log is good. This scan needs to verify that it doesn't find 68262306a36Sopenharmony_ci * the last_half_cycle. 68362306a36Sopenharmony_ci */ 68462306a36Sopenharmony_ci start_blk = 0; 68562306a36Sopenharmony_ci ASSERT(head_blk <= INT_MAX); 68662306a36Sopenharmony_ci if ((error = xlog_find_verify_cycle(log, 68762306a36Sopenharmony_ci start_blk, (int)head_blk, 68862306a36Sopenharmony_ci stop_on_cycle, &new_blk))) 68962306a36Sopenharmony_ci goto out_free_buffer; 69062306a36Sopenharmony_ci if (new_blk != -1) 69162306a36Sopenharmony_ci head_blk = new_blk; 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_civalidate_head: 69562306a36Sopenharmony_ci /* 69662306a36Sopenharmony_ci * Now we need to make sure head_blk is not pointing to a block in 69762306a36Sopenharmony_ci * the middle of a log record. 69862306a36Sopenharmony_ci */ 69962306a36Sopenharmony_ci num_scan_bblks = XLOG_REC_SHIFT(log); 70062306a36Sopenharmony_ci if (head_blk >= num_scan_bblks) { 70162306a36Sopenharmony_ci start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ 70262306a36Sopenharmony_ci 70362306a36Sopenharmony_ci /* start ptr at last block ptr before head_blk */ 70462306a36Sopenharmony_ci error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); 70562306a36Sopenharmony_ci if (error == 1) 70662306a36Sopenharmony_ci error = -EIO; 70762306a36Sopenharmony_ci if (error) 70862306a36Sopenharmony_ci goto out_free_buffer; 70962306a36Sopenharmony_ci } else { 71062306a36Sopenharmony_ci start_blk = 0; 71162306a36Sopenharmony_ci ASSERT(head_blk <= INT_MAX); 71262306a36Sopenharmony_ci error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); 71362306a36Sopenharmony_ci if (error < 0) 71462306a36Sopenharmony_ci goto out_free_buffer; 71562306a36Sopenharmony_ci if (error == 1) { 71662306a36Sopenharmony_ci /* We hit the beginning of the log during our search */ 71762306a36Sopenharmony_ci start_blk = log_bbnum - (num_scan_bblks - head_blk); 71862306a36Sopenharmony_ci new_blk = log_bbnum; 71962306a36Sopenharmony_ci ASSERT(start_blk <= INT_MAX && 72062306a36Sopenharmony_ci (xfs_daddr_t) log_bbnum-start_blk >= 0); 72162306a36Sopenharmony_ci ASSERT(head_blk <= INT_MAX); 72262306a36Sopenharmony_ci error = xlog_find_verify_log_record(log, start_blk, 72362306a36Sopenharmony_ci &new_blk, (int)head_blk); 72462306a36Sopenharmony_ci if (error == 1) 72562306a36Sopenharmony_ci error = -EIO; 72662306a36Sopenharmony_ci if (error) 72762306a36Sopenharmony_ci goto out_free_buffer; 72862306a36Sopenharmony_ci if (new_blk != log_bbnum) 72962306a36Sopenharmony_ci head_blk = new_blk; 73062306a36Sopenharmony_ci } else if (error) 73162306a36Sopenharmony_ci goto out_free_buffer; 73262306a36Sopenharmony_ci } 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci kmem_free(buffer); 73562306a36Sopenharmony_ci if (head_blk == log_bbnum) 73662306a36Sopenharmony_ci *return_head_blk = 0; 73762306a36Sopenharmony_ci else 73862306a36Sopenharmony_ci *return_head_blk = head_blk; 73962306a36Sopenharmony_ci /* 74062306a36Sopenharmony_ci * When returning here, we have a good block number. Bad block 74162306a36Sopenharmony_ci * means that during a previous crash, we didn't have a clean break 74262306a36Sopenharmony_ci * from cycle number N to cycle number N-1. In this case, we need 74362306a36Sopenharmony_ci * to find the first block with cycle number N-1. 74462306a36Sopenharmony_ci */ 74562306a36Sopenharmony_ci return 0; 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ciout_free_buffer: 74862306a36Sopenharmony_ci kmem_free(buffer); 74962306a36Sopenharmony_ci if (error) 75062306a36Sopenharmony_ci xfs_warn(log->l_mp, "failed to find log head"); 75162306a36Sopenharmony_ci return error; 75262306a36Sopenharmony_ci} 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_ci/* 75562306a36Sopenharmony_ci * Seek backwards in the log for log record headers. 75662306a36Sopenharmony_ci * 75762306a36Sopenharmony_ci * Given a starting log block, walk backwards until we find the provided number 75862306a36Sopenharmony_ci * of records or hit the provided tail block. The return value is the number of 75962306a36Sopenharmony_ci * records encountered or a negative error code. The log block and buffer 76062306a36Sopenharmony_ci * pointer of the last record seen are returned in rblk and rhead respectively. 76162306a36Sopenharmony_ci */ 76262306a36Sopenharmony_ciSTATIC int 76362306a36Sopenharmony_cixlog_rseek_logrec_hdr( 76462306a36Sopenharmony_ci struct xlog *log, 76562306a36Sopenharmony_ci xfs_daddr_t head_blk, 76662306a36Sopenharmony_ci xfs_daddr_t tail_blk, 76762306a36Sopenharmony_ci int count, 76862306a36Sopenharmony_ci char *buffer, 76962306a36Sopenharmony_ci xfs_daddr_t *rblk, 77062306a36Sopenharmony_ci struct xlog_rec_header **rhead, 77162306a36Sopenharmony_ci bool *wrapped) 77262306a36Sopenharmony_ci{ 77362306a36Sopenharmony_ci int i; 77462306a36Sopenharmony_ci int error; 77562306a36Sopenharmony_ci int found = 0; 77662306a36Sopenharmony_ci char *offset = NULL; 77762306a36Sopenharmony_ci xfs_daddr_t end_blk; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci *wrapped = false; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci /* 78262306a36Sopenharmony_ci * Walk backwards from the head block until we hit the tail or the first 78362306a36Sopenharmony_ci * block in the log. 78462306a36Sopenharmony_ci */ 78562306a36Sopenharmony_ci end_blk = head_blk > tail_blk ? tail_blk : 0; 78662306a36Sopenharmony_ci for (i = (int) head_blk - 1; i >= end_blk; i--) { 78762306a36Sopenharmony_ci error = xlog_bread(log, i, 1, buffer, &offset); 78862306a36Sopenharmony_ci if (error) 78962306a36Sopenharmony_ci goto out_error; 79062306a36Sopenharmony_ci 79162306a36Sopenharmony_ci if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 79262306a36Sopenharmony_ci *rblk = i; 79362306a36Sopenharmony_ci *rhead = (struct xlog_rec_header *) offset; 79462306a36Sopenharmony_ci if (++found == count) 79562306a36Sopenharmony_ci break; 79662306a36Sopenharmony_ci } 79762306a36Sopenharmony_ci } 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci /* 80062306a36Sopenharmony_ci * If we haven't hit the tail block or the log record header count, 80162306a36Sopenharmony_ci * start looking again from the end of the physical log. Note that 80262306a36Sopenharmony_ci * callers can pass head == tail if the tail is not yet known. 80362306a36Sopenharmony_ci */ 80462306a36Sopenharmony_ci if (tail_blk >= head_blk && found != count) { 80562306a36Sopenharmony_ci for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { 80662306a36Sopenharmony_ci error = xlog_bread(log, i, 1, buffer, &offset); 80762306a36Sopenharmony_ci if (error) 80862306a36Sopenharmony_ci goto out_error; 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci if (*(__be32 *)offset == 81162306a36Sopenharmony_ci cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 81262306a36Sopenharmony_ci *wrapped = true; 81362306a36Sopenharmony_ci *rblk = i; 81462306a36Sopenharmony_ci *rhead = (struct xlog_rec_header *) offset; 81562306a36Sopenharmony_ci if (++found == count) 81662306a36Sopenharmony_ci break; 81762306a36Sopenharmony_ci } 81862306a36Sopenharmony_ci } 81962306a36Sopenharmony_ci } 82062306a36Sopenharmony_ci 82162306a36Sopenharmony_ci return found; 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ciout_error: 82462306a36Sopenharmony_ci return error; 82562306a36Sopenharmony_ci} 82662306a36Sopenharmony_ci 82762306a36Sopenharmony_ci/* 82862306a36Sopenharmony_ci * Seek forward in the log for log record headers. 82962306a36Sopenharmony_ci * 83062306a36Sopenharmony_ci * Given head and tail blocks, walk forward from the tail block until we find 83162306a36Sopenharmony_ci * the provided number of records or hit the head block. The return value is the 83262306a36Sopenharmony_ci * number of records encountered or a negative error code. The log block and 83362306a36Sopenharmony_ci * buffer pointer of the last record seen are returned in rblk and rhead 83462306a36Sopenharmony_ci * respectively. 83562306a36Sopenharmony_ci */ 83662306a36Sopenharmony_ciSTATIC int 83762306a36Sopenharmony_cixlog_seek_logrec_hdr( 83862306a36Sopenharmony_ci struct xlog *log, 83962306a36Sopenharmony_ci xfs_daddr_t head_blk, 84062306a36Sopenharmony_ci xfs_daddr_t tail_blk, 84162306a36Sopenharmony_ci int count, 84262306a36Sopenharmony_ci char *buffer, 84362306a36Sopenharmony_ci xfs_daddr_t *rblk, 84462306a36Sopenharmony_ci struct xlog_rec_header **rhead, 84562306a36Sopenharmony_ci bool *wrapped) 84662306a36Sopenharmony_ci{ 84762306a36Sopenharmony_ci int i; 84862306a36Sopenharmony_ci int error; 84962306a36Sopenharmony_ci int found = 0; 85062306a36Sopenharmony_ci char *offset = NULL; 85162306a36Sopenharmony_ci xfs_daddr_t end_blk; 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci *wrapped = false; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci /* 85662306a36Sopenharmony_ci * Walk forward from the tail block until we hit the head or the last 85762306a36Sopenharmony_ci * block in the log. 85862306a36Sopenharmony_ci */ 85962306a36Sopenharmony_ci end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; 86062306a36Sopenharmony_ci for (i = (int) tail_blk; i <= end_blk; i++) { 86162306a36Sopenharmony_ci error = xlog_bread(log, i, 1, buffer, &offset); 86262306a36Sopenharmony_ci if (error) 86362306a36Sopenharmony_ci goto out_error; 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 86662306a36Sopenharmony_ci *rblk = i; 86762306a36Sopenharmony_ci *rhead = (struct xlog_rec_header *) offset; 86862306a36Sopenharmony_ci if (++found == count) 86962306a36Sopenharmony_ci break; 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci } 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci /* 87462306a36Sopenharmony_ci * If we haven't hit the head block or the log record header count, 87562306a36Sopenharmony_ci * start looking again from the start of the physical log. 87662306a36Sopenharmony_ci */ 87762306a36Sopenharmony_ci if (tail_blk > head_blk && found != count) { 87862306a36Sopenharmony_ci for (i = 0; i < (int) head_blk; i++) { 87962306a36Sopenharmony_ci error = xlog_bread(log, i, 1, buffer, &offset); 88062306a36Sopenharmony_ci if (error) 88162306a36Sopenharmony_ci goto out_error; 88262306a36Sopenharmony_ci 88362306a36Sopenharmony_ci if (*(__be32 *)offset == 88462306a36Sopenharmony_ci cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { 88562306a36Sopenharmony_ci *wrapped = true; 88662306a36Sopenharmony_ci *rblk = i; 88762306a36Sopenharmony_ci *rhead = (struct xlog_rec_header *) offset; 88862306a36Sopenharmony_ci if (++found == count) 88962306a36Sopenharmony_ci break; 89062306a36Sopenharmony_ci } 89162306a36Sopenharmony_ci } 89262306a36Sopenharmony_ci } 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci return found; 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ciout_error: 89762306a36Sopenharmony_ci return error; 89862306a36Sopenharmony_ci} 89962306a36Sopenharmony_ci 90062306a36Sopenharmony_ci/* 90162306a36Sopenharmony_ci * Calculate distance from head to tail (i.e., unused space in the log). 90262306a36Sopenharmony_ci */ 90362306a36Sopenharmony_cistatic inline int 90462306a36Sopenharmony_cixlog_tail_distance( 90562306a36Sopenharmony_ci struct xlog *log, 90662306a36Sopenharmony_ci xfs_daddr_t head_blk, 90762306a36Sopenharmony_ci xfs_daddr_t tail_blk) 90862306a36Sopenharmony_ci{ 90962306a36Sopenharmony_ci if (head_blk < tail_blk) 91062306a36Sopenharmony_ci return tail_blk - head_blk; 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_ci return tail_blk + (log->l_logBBsize - head_blk); 91362306a36Sopenharmony_ci} 91462306a36Sopenharmony_ci 91562306a36Sopenharmony_ci/* 91662306a36Sopenharmony_ci * Verify the log tail. This is particularly important when torn or incomplete 91762306a36Sopenharmony_ci * writes have been detected near the front of the log and the head has been 91862306a36Sopenharmony_ci * walked back accordingly. 91962306a36Sopenharmony_ci * 92062306a36Sopenharmony_ci * We also have to handle the case where the tail was pinned and the head 92162306a36Sopenharmony_ci * blocked behind the tail right before a crash. If the tail had been pushed 92262306a36Sopenharmony_ci * immediately prior to the crash and the subsequent checkpoint was only 92362306a36Sopenharmony_ci * partially written, it's possible it overwrote the last referenced tail in the 92462306a36Sopenharmony_ci * log with garbage. This is not a coherency problem because the tail must have 92562306a36Sopenharmony_ci * been pushed before it can be overwritten, but appears as log corruption to 92662306a36Sopenharmony_ci * recovery because we have no way to know the tail was updated if the 92762306a36Sopenharmony_ci * subsequent checkpoint didn't write successfully. 92862306a36Sopenharmony_ci * 92962306a36Sopenharmony_ci * Therefore, CRC check the log from tail to head. If a failure occurs and the 93062306a36Sopenharmony_ci * offending record is within max iclog bufs from the head, walk the tail 93162306a36Sopenharmony_ci * forward and retry until a valid tail is found or corruption is detected out 93262306a36Sopenharmony_ci * of the range of a possible overwrite. 93362306a36Sopenharmony_ci */ 93462306a36Sopenharmony_ciSTATIC int 93562306a36Sopenharmony_cixlog_verify_tail( 93662306a36Sopenharmony_ci struct xlog *log, 93762306a36Sopenharmony_ci xfs_daddr_t head_blk, 93862306a36Sopenharmony_ci xfs_daddr_t *tail_blk, 93962306a36Sopenharmony_ci int hsize) 94062306a36Sopenharmony_ci{ 94162306a36Sopenharmony_ci struct xlog_rec_header *thead; 94262306a36Sopenharmony_ci char *buffer; 94362306a36Sopenharmony_ci xfs_daddr_t first_bad; 94462306a36Sopenharmony_ci int error = 0; 94562306a36Sopenharmony_ci bool wrapped; 94662306a36Sopenharmony_ci xfs_daddr_t tmp_tail; 94762306a36Sopenharmony_ci xfs_daddr_t orig_tail = *tail_blk; 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci buffer = xlog_alloc_buffer(log, 1); 95062306a36Sopenharmony_ci if (!buffer) 95162306a36Sopenharmony_ci return -ENOMEM; 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci /* 95462306a36Sopenharmony_ci * Make sure the tail points to a record (returns positive count on 95562306a36Sopenharmony_ci * success). 95662306a36Sopenharmony_ci */ 95762306a36Sopenharmony_ci error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, 95862306a36Sopenharmony_ci &tmp_tail, &thead, &wrapped); 95962306a36Sopenharmony_ci if (error < 0) 96062306a36Sopenharmony_ci goto out; 96162306a36Sopenharmony_ci if (*tail_blk != tmp_tail) 96262306a36Sopenharmony_ci *tail_blk = tmp_tail; 96362306a36Sopenharmony_ci 96462306a36Sopenharmony_ci /* 96562306a36Sopenharmony_ci * Run a CRC check from the tail to the head. We can't just check 96662306a36Sopenharmony_ci * MAX_ICLOGS records past the tail because the tail may point to stale 96762306a36Sopenharmony_ci * blocks cleared during the search for the head/tail. These blocks are 96862306a36Sopenharmony_ci * overwritten with zero-length records and thus record count is not a 96962306a36Sopenharmony_ci * reliable indicator of the iclog state before a crash. 97062306a36Sopenharmony_ci */ 97162306a36Sopenharmony_ci first_bad = 0; 97262306a36Sopenharmony_ci error = xlog_do_recovery_pass(log, head_blk, *tail_blk, 97362306a36Sopenharmony_ci XLOG_RECOVER_CRCPASS, &first_bad); 97462306a36Sopenharmony_ci while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) { 97562306a36Sopenharmony_ci int tail_distance; 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci /* 97862306a36Sopenharmony_ci * Is corruption within range of the head? If so, retry from 97962306a36Sopenharmony_ci * the next record. Otherwise return an error. 98062306a36Sopenharmony_ci */ 98162306a36Sopenharmony_ci tail_distance = xlog_tail_distance(log, head_blk, first_bad); 98262306a36Sopenharmony_ci if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize)) 98362306a36Sopenharmony_ci break; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci /* skip to the next record; returns positive count on success */ 98662306a36Sopenharmony_ci error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, 98762306a36Sopenharmony_ci buffer, &tmp_tail, &thead, &wrapped); 98862306a36Sopenharmony_ci if (error < 0) 98962306a36Sopenharmony_ci goto out; 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci *tail_blk = tmp_tail; 99262306a36Sopenharmony_ci first_bad = 0; 99362306a36Sopenharmony_ci error = xlog_do_recovery_pass(log, head_blk, *tail_blk, 99462306a36Sopenharmony_ci XLOG_RECOVER_CRCPASS, &first_bad); 99562306a36Sopenharmony_ci } 99662306a36Sopenharmony_ci 99762306a36Sopenharmony_ci if (!error && *tail_blk != orig_tail) 99862306a36Sopenharmony_ci xfs_warn(log->l_mp, 99962306a36Sopenharmony_ci "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", 100062306a36Sopenharmony_ci orig_tail, *tail_blk); 100162306a36Sopenharmony_ciout: 100262306a36Sopenharmony_ci kmem_free(buffer); 100362306a36Sopenharmony_ci return error; 100462306a36Sopenharmony_ci} 100562306a36Sopenharmony_ci 100662306a36Sopenharmony_ci/* 100762306a36Sopenharmony_ci * Detect and trim torn writes from the head of the log. 100862306a36Sopenharmony_ci * 100962306a36Sopenharmony_ci * Storage without sector atomicity guarantees can result in torn writes in the 101062306a36Sopenharmony_ci * log in the event of a crash. Our only means to detect this scenario is via 101162306a36Sopenharmony_ci * CRC verification. While we can't always be certain that CRC verification 101262306a36Sopenharmony_ci * failure is due to a torn write vs. an unrelated corruption, we do know that 101362306a36Sopenharmony_ci * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at 101462306a36Sopenharmony_ci * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of 101562306a36Sopenharmony_ci * the log and treat failures in this range as torn writes as a matter of 101662306a36Sopenharmony_ci * policy. In the event of CRC failure, the head is walked back to the last good 101762306a36Sopenharmony_ci * record in the log and the tail is updated from that record and verified. 101862306a36Sopenharmony_ci */ 101962306a36Sopenharmony_ciSTATIC int 102062306a36Sopenharmony_cixlog_verify_head( 102162306a36Sopenharmony_ci struct xlog *log, 102262306a36Sopenharmony_ci xfs_daddr_t *head_blk, /* in/out: unverified head */ 102362306a36Sopenharmony_ci xfs_daddr_t *tail_blk, /* out: tail block */ 102462306a36Sopenharmony_ci char *buffer, 102562306a36Sopenharmony_ci xfs_daddr_t *rhead_blk, /* start blk of last record */ 102662306a36Sopenharmony_ci struct xlog_rec_header **rhead, /* ptr to last record */ 102762306a36Sopenharmony_ci bool *wrapped) /* last rec. wraps phys. log */ 102862306a36Sopenharmony_ci{ 102962306a36Sopenharmony_ci struct xlog_rec_header *tmp_rhead; 103062306a36Sopenharmony_ci char *tmp_buffer; 103162306a36Sopenharmony_ci xfs_daddr_t first_bad; 103262306a36Sopenharmony_ci xfs_daddr_t tmp_rhead_blk; 103362306a36Sopenharmony_ci int found; 103462306a36Sopenharmony_ci int error; 103562306a36Sopenharmony_ci bool tmp_wrapped; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci /* 103862306a36Sopenharmony_ci * Check the head of the log for torn writes. Search backwards from the 103962306a36Sopenharmony_ci * head until we hit the tail or the maximum number of log record I/Os 104062306a36Sopenharmony_ci * that could have been in flight at one time. Use a temporary buffer so 104162306a36Sopenharmony_ci * we don't trash the rhead/buffer pointers from the caller. 104262306a36Sopenharmony_ci */ 104362306a36Sopenharmony_ci tmp_buffer = xlog_alloc_buffer(log, 1); 104462306a36Sopenharmony_ci if (!tmp_buffer) 104562306a36Sopenharmony_ci return -ENOMEM; 104662306a36Sopenharmony_ci error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, 104762306a36Sopenharmony_ci XLOG_MAX_ICLOGS, tmp_buffer, 104862306a36Sopenharmony_ci &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); 104962306a36Sopenharmony_ci kmem_free(tmp_buffer); 105062306a36Sopenharmony_ci if (error < 0) 105162306a36Sopenharmony_ci return error; 105262306a36Sopenharmony_ci 105362306a36Sopenharmony_ci /* 105462306a36Sopenharmony_ci * Now run a CRC verification pass over the records starting at the 105562306a36Sopenharmony_ci * block found above to the current head. If a CRC failure occurs, the 105662306a36Sopenharmony_ci * log block of the first bad record is saved in first_bad. 105762306a36Sopenharmony_ci */ 105862306a36Sopenharmony_ci error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk, 105962306a36Sopenharmony_ci XLOG_RECOVER_CRCPASS, &first_bad); 106062306a36Sopenharmony_ci if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) { 106162306a36Sopenharmony_ci /* 106262306a36Sopenharmony_ci * We've hit a potential torn write. Reset the error and warn 106362306a36Sopenharmony_ci * about it. 106462306a36Sopenharmony_ci */ 106562306a36Sopenharmony_ci error = 0; 106662306a36Sopenharmony_ci xfs_warn(log->l_mp, 106762306a36Sopenharmony_ci"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.", 106862306a36Sopenharmony_ci first_bad, *head_blk); 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci /* 107162306a36Sopenharmony_ci * Get the header block and buffer pointer for the last good 107262306a36Sopenharmony_ci * record before the bad record. 107362306a36Sopenharmony_ci * 107462306a36Sopenharmony_ci * Note that xlog_find_tail() clears the blocks at the new head 107562306a36Sopenharmony_ci * (i.e., the records with invalid CRC) if the cycle number 107662306a36Sopenharmony_ci * matches the current cycle. 107762306a36Sopenharmony_ci */ 107862306a36Sopenharmony_ci found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, 107962306a36Sopenharmony_ci buffer, rhead_blk, rhead, wrapped); 108062306a36Sopenharmony_ci if (found < 0) 108162306a36Sopenharmony_ci return found; 108262306a36Sopenharmony_ci if (found == 0) /* XXX: right thing to do here? */ 108362306a36Sopenharmony_ci return -EIO; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci /* 108662306a36Sopenharmony_ci * Reset the head block to the starting block of the first bad 108762306a36Sopenharmony_ci * log record and set the tail block based on the last good 108862306a36Sopenharmony_ci * record. 108962306a36Sopenharmony_ci * 109062306a36Sopenharmony_ci * Bail out if the updated head/tail match as this indicates 109162306a36Sopenharmony_ci * possible corruption outside of the acceptable 109262306a36Sopenharmony_ci * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair... 109362306a36Sopenharmony_ci */ 109462306a36Sopenharmony_ci *head_blk = first_bad; 109562306a36Sopenharmony_ci *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn)); 109662306a36Sopenharmony_ci if (*head_blk == *tail_blk) { 109762306a36Sopenharmony_ci ASSERT(0); 109862306a36Sopenharmony_ci return 0; 109962306a36Sopenharmony_ci } 110062306a36Sopenharmony_ci } 110162306a36Sopenharmony_ci if (error) 110262306a36Sopenharmony_ci return error; 110362306a36Sopenharmony_ci 110462306a36Sopenharmony_ci return xlog_verify_tail(log, *head_blk, tail_blk, 110562306a36Sopenharmony_ci be32_to_cpu((*rhead)->h_size)); 110662306a36Sopenharmony_ci} 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci/* 110962306a36Sopenharmony_ci * We need to make sure we handle log wrapping properly, so we can't use the 111062306a36Sopenharmony_ci * calculated logbno directly. Make sure it wraps to the correct bno inside the 111162306a36Sopenharmony_ci * log. 111262306a36Sopenharmony_ci * 111362306a36Sopenharmony_ci * The log is limited to 32 bit sizes, so we use the appropriate modulus 111462306a36Sopenharmony_ci * operation here and cast it back to a 64 bit daddr on return. 111562306a36Sopenharmony_ci */ 111662306a36Sopenharmony_cistatic inline xfs_daddr_t 111762306a36Sopenharmony_cixlog_wrap_logbno( 111862306a36Sopenharmony_ci struct xlog *log, 111962306a36Sopenharmony_ci xfs_daddr_t bno) 112062306a36Sopenharmony_ci{ 112162306a36Sopenharmony_ci int mod; 112262306a36Sopenharmony_ci 112362306a36Sopenharmony_ci div_s64_rem(bno, log->l_logBBsize, &mod); 112462306a36Sopenharmony_ci return mod; 112562306a36Sopenharmony_ci} 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_ci/* 112862306a36Sopenharmony_ci * Check whether the head of the log points to an unmount record. In other 112962306a36Sopenharmony_ci * words, determine whether the log is clean. If so, update the in-core state 113062306a36Sopenharmony_ci * appropriately. 113162306a36Sopenharmony_ci */ 113262306a36Sopenharmony_cistatic int 113362306a36Sopenharmony_cixlog_check_unmount_rec( 113462306a36Sopenharmony_ci struct xlog *log, 113562306a36Sopenharmony_ci xfs_daddr_t *head_blk, 113662306a36Sopenharmony_ci xfs_daddr_t *tail_blk, 113762306a36Sopenharmony_ci struct xlog_rec_header *rhead, 113862306a36Sopenharmony_ci xfs_daddr_t rhead_blk, 113962306a36Sopenharmony_ci char *buffer, 114062306a36Sopenharmony_ci bool *clean) 114162306a36Sopenharmony_ci{ 114262306a36Sopenharmony_ci struct xlog_op_header *op_head; 114362306a36Sopenharmony_ci xfs_daddr_t umount_data_blk; 114462306a36Sopenharmony_ci xfs_daddr_t after_umount_blk; 114562306a36Sopenharmony_ci int hblks; 114662306a36Sopenharmony_ci int error; 114762306a36Sopenharmony_ci char *offset; 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci *clean = false; 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci /* 115262306a36Sopenharmony_ci * Look for unmount record. If we find it, then we know there was a 115362306a36Sopenharmony_ci * clean unmount. Since 'i' could be the last block in the physical 115462306a36Sopenharmony_ci * log, we convert to a log block before comparing to the head_blk. 115562306a36Sopenharmony_ci * 115662306a36Sopenharmony_ci * Save the current tail lsn to use to pass to xlog_clear_stale_blocks() 115762306a36Sopenharmony_ci * below. We won't want to clear the unmount record if there is one, so 115862306a36Sopenharmony_ci * we pass the lsn of the unmount record rather than the block after it. 115962306a36Sopenharmony_ci */ 116062306a36Sopenharmony_ci hblks = xlog_logrec_hblks(log, rhead); 116162306a36Sopenharmony_ci after_umount_blk = xlog_wrap_logbno(log, 116262306a36Sopenharmony_ci rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len))); 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci if (*head_blk == after_umount_blk && 116562306a36Sopenharmony_ci be32_to_cpu(rhead->h_num_logops) == 1) { 116662306a36Sopenharmony_ci umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); 116762306a36Sopenharmony_ci error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); 116862306a36Sopenharmony_ci if (error) 116962306a36Sopenharmony_ci return error; 117062306a36Sopenharmony_ci 117162306a36Sopenharmony_ci op_head = (struct xlog_op_header *)offset; 117262306a36Sopenharmony_ci if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 117362306a36Sopenharmony_ci /* 117462306a36Sopenharmony_ci * Set tail and last sync so that newly written log 117562306a36Sopenharmony_ci * records will point recovery to after the current 117662306a36Sopenharmony_ci * unmount record. 117762306a36Sopenharmony_ci */ 117862306a36Sopenharmony_ci xlog_assign_atomic_lsn(&log->l_tail_lsn, 117962306a36Sopenharmony_ci log->l_curr_cycle, after_umount_blk); 118062306a36Sopenharmony_ci xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 118162306a36Sopenharmony_ci log->l_curr_cycle, after_umount_blk); 118262306a36Sopenharmony_ci *tail_blk = after_umount_blk; 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci *clean = true; 118562306a36Sopenharmony_ci } 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci return 0; 118962306a36Sopenharmony_ci} 119062306a36Sopenharmony_ci 119162306a36Sopenharmony_cistatic void 119262306a36Sopenharmony_cixlog_set_state( 119362306a36Sopenharmony_ci struct xlog *log, 119462306a36Sopenharmony_ci xfs_daddr_t head_blk, 119562306a36Sopenharmony_ci struct xlog_rec_header *rhead, 119662306a36Sopenharmony_ci xfs_daddr_t rhead_blk, 119762306a36Sopenharmony_ci bool bump_cycle) 119862306a36Sopenharmony_ci{ 119962306a36Sopenharmony_ci /* 120062306a36Sopenharmony_ci * Reset log values according to the state of the log when we 120162306a36Sopenharmony_ci * crashed. In the case where head_blk == 0, we bump curr_cycle 120262306a36Sopenharmony_ci * one because the next write starts a new cycle rather than 120362306a36Sopenharmony_ci * continuing the cycle of the last good log record. At this 120462306a36Sopenharmony_ci * point we have guaranteed that all partial log records have been 120562306a36Sopenharmony_ci * accounted for. Therefore, we know that the last good log record 120662306a36Sopenharmony_ci * written was complete and ended exactly on the end boundary 120762306a36Sopenharmony_ci * of the physical log. 120862306a36Sopenharmony_ci */ 120962306a36Sopenharmony_ci log->l_prev_block = rhead_blk; 121062306a36Sopenharmony_ci log->l_curr_block = (int)head_blk; 121162306a36Sopenharmony_ci log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 121262306a36Sopenharmony_ci if (bump_cycle) 121362306a36Sopenharmony_ci log->l_curr_cycle++; 121462306a36Sopenharmony_ci atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 121562306a36Sopenharmony_ci atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 121662306a36Sopenharmony_ci xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, 121762306a36Sopenharmony_ci BBTOB(log->l_curr_block)); 121862306a36Sopenharmony_ci xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, 121962306a36Sopenharmony_ci BBTOB(log->l_curr_block)); 122062306a36Sopenharmony_ci} 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci/* 122362306a36Sopenharmony_ci * Find the sync block number or the tail of the log. 122462306a36Sopenharmony_ci * 122562306a36Sopenharmony_ci * This will be the block number of the last record to have its 122662306a36Sopenharmony_ci * associated buffers synced to disk. Every log record header has 122762306a36Sopenharmony_ci * a sync lsn embedded in it. LSNs hold block numbers, so it is easy 122862306a36Sopenharmony_ci * to get a sync block number. The only concern is to figure out which 122962306a36Sopenharmony_ci * log record header to believe. 123062306a36Sopenharmony_ci * 123162306a36Sopenharmony_ci * The following algorithm uses the log record header with the largest 123262306a36Sopenharmony_ci * lsn. The entire log record does not need to be valid. We only care 123362306a36Sopenharmony_ci * that the header is valid. 123462306a36Sopenharmony_ci * 123562306a36Sopenharmony_ci * We could speed up search by using current head_blk buffer, but it is not 123662306a36Sopenharmony_ci * available. 123762306a36Sopenharmony_ci */ 123862306a36Sopenharmony_ciSTATIC int 123962306a36Sopenharmony_cixlog_find_tail( 124062306a36Sopenharmony_ci struct xlog *log, 124162306a36Sopenharmony_ci xfs_daddr_t *head_blk, 124262306a36Sopenharmony_ci xfs_daddr_t *tail_blk) 124362306a36Sopenharmony_ci{ 124462306a36Sopenharmony_ci xlog_rec_header_t *rhead; 124562306a36Sopenharmony_ci char *offset = NULL; 124662306a36Sopenharmony_ci char *buffer; 124762306a36Sopenharmony_ci int error; 124862306a36Sopenharmony_ci xfs_daddr_t rhead_blk; 124962306a36Sopenharmony_ci xfs_lsn_t tail_lsn; 125062306a36Sopenharmony_ci bool wrapped = false; 125162306a36Sopenharmony_ci bool clean = false; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci /* 125462306a36Sopenharmony_ci * Find previous log record 125562306a36Sopenharmony_ci */ 125662306a36Sopenharmony_ci if ((error = xlog_find_head(log, head_blk))) 125762306a36Sopenharmony_ci return error; 125862306a36Sopenharmony_ci ASSERT(*head_blk < INT_MAX); 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci buffer = xlog_alloc_buffer(log, 1); 126162306a36Sopenharmony_ci if (!buffer) 126262306a36Sopenharmony_ci return -ENOMEM; 126362306a36Sopenharmony_ci if (*head_blk == 0) { /* special case */ 126462306a36Sopenharmony_ci error = xlog_bread(log, 0, 1, buffer, &offset); 126562306a36Sopenharmony_ci if (error) 126662306a36Sopenharmony_ci goto done; 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci if (xlog_get_cycle(offset) == 0) { 126962306a36Sopenharmony_ci *tail_blk = 0; 127062306a36Sopenharmony_ci /* leave all other log inited values alone */ 127162306a36Sopenharmony_ci goto done; 127262306a36Sopenharmony_ci } 127362306a36Sopenharmony_ci } 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_ci /* 127662306a36Sopenharmony_ci * Search backwards through the log looking for the log record header 127762306a36Sopenharmony_ci * block. This wraps all the way back around to the head so something is 127862306a36Sopenharmony_ci * seriously wrong if we can't find it. 127962306a36Sopenharmony_ci */ 128062306a36Sopenharmony_ci error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, 128162306a36Sopenharmony_ci &rhead_blk, &rhead, &wrapped); 128262306a36Sopenharmony_ci if (error < 0) 128362306a36Sopenharmony_ci goto done; 128462306a36Sopenharmony_ci if (!error) { 128562306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); 128662306a36Sopenharmony_ci error = -EFSCORRUPTED; 128762306a36Sopenharmony_ci goto done; 128862306a36Sopenharmony_ci } 128962306a36Sopenharmony_ci *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); 129062306a36Sopenharmony_ci 129162306a36Sopenharmony_ci /* 129262306a36Sopenharmony_ci * Set the log state based on the current head record. 129362306a36Sopenharmony_ci */ 129462306a36Sopenharmony_ci xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); 129562306a36Sopenharmony_ci tail_lsn = atomic64_read(&log->l_tail_lsn); 129662306a36Sopenharmony_ci 129762306a36Sopenharmony_ci /* 129862306a36Sopenharmony_ci * Look for an unmount record at the head of the log. This sets the log 129962306a36Sopenharmony_ci * state to determine whether recovery is necessary. 130062306a36Sopenharmony_ci */ 130162306a36Sopenharmony_ci error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, 130262306a36Sopenharmony_ci rhead_blk, buffer, &clean); 130362306a36Sopenharmony_ci if (error) 130462306a36Sopenharmony_ci goto done; 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci /* 130762306a36Sopenharmony_ci * Verify the log head if the log is not clean (e.g., we have anything 130862306a36Sopenharmony_ci * but an unmount record at the head). This uses CRC verification to 130962306a36Sopenharmony_ci * detect and trim torn writes. If discovered, CRC failures are 131062306a36Sopenharmony_ci * considered torn writes and the log head is trimmed accordingly. 131162306a36Sopenharmony_ci * 131262306a36Sopenharmony_ci * Note that we can only run CRC verification when the log is dirty 131362306a36Sopenharmony_ci * because there's no guarantee that the log data behind an unmount 131462306a36Sopenharmony_ci * record is compatible with the current architecture. 131562306a36Sopenharmony_ci */ 131662306a36Sopenharmony_ci if (!clean) { 131762306a36Sopenharmony_ci xfs_daddr_t orig_head = *head_blk; 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci error = xlog_verify_head(log, head_blk, tail_blk, buffer, 132062306a36Sopenharmony_ci &rhead_blk, &rhead, &wrapped); 132162306a36Sopenharmony_ci if (error) 132262306a36Sopenharmony_ci goto done; 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci /* update in-core state again if the head changed */ 132562306a36Sopenharmony_ci if (*head_blk != orig_head) { 132662306a36Sopenharmony_ci xlog_set_state(log, *head_blk, rhead, rhead_blk, 132762306a36Sopenharmony_ci wrapped); 132862306a36Sopenharmony_ci tail_lsn = atomic64_read(&log->l_tail_lsn); 132962306a36Sopenharmony_ci error = xlog_check_unmount_rec(log, head_blk, tail_blk, 133062306a36Sopenharmony_ci rhead, rhead_blk, buffer, 133162306a36Sopenharmony_ci &clean); 133262306a36Sopenharmony_ci if (error) 133362306a36Sopenharmony_ci goto done; 133462306a36Sopenharmony_ci } 133562306a36Sopenharmony_ci } 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci /* 133862306a36Sopenharmony_ci * Note that the unmount was clean. If the unmount was not clean, we 133962306a36Sopenharmony_ci * need to know this to rebuild the superblock counters from the perag 134062306a36Sopenharmony_ci * headers if we have a filesystem using non-persistent counters. 134162306a36Sopenharmony_ci */ 134262306a36Sopenharmony_ci if (clean) 134362306a36Sopenharmony_ci set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_ci /* 134662306a36Sopenharmony_ci * Make sure that there are no blocks in front of the head 134762306a36Sopenharmony_ci * with the same cycle number as the head. This can happen 134862306a36Sopenharmony_ci * because we allow multiple outstanding log writes concurrently, 134962306a36Sopenharmony_ci * and the later writes might make it out before earlier ones. 135062306a36Sopenharmony_ci * 135162306a36Sopenharmony_ci * We use the lsn from before modifying it so that we'll never 135262306a36Sopenharmony_ci * overwrite the unmount record after a clean unmount. 135362306a36Sopenharmony_ci * 135462306a36Sopenharmony_ci * Do this only if we are going to recover the filesystem 135562306a36Sopenharmony_ci * 135662306a36Sopenharmony_ci * NOTE: This used to say "if (!readonly)" 135762306a36Sopenharmony_ci * However on Linux, we can & do recover a read-only filesystem. 135862306a36Sopenharmony_ci * We only skip recovery if NORECOVERY is specified on mount, 135962306a36Sopenharmony_ci * in which case we would not be here. 136062306a36Sopenharmony_ci * 136162306a36Sopenharmony_ci * But... if the -device- itself is readonly, just skip this. 136262306a36Sopenharmony_ci * We can't recover this device anyway, so it won't matter. 136362306a36Sopenharmony_ci */ 136462306a36Sopenharmony_ci if (!xfs_readonly_buftarg(log->l_targ)) 136562306a36Sopenharmony_ci error = xlog_clear_stale_blocks(log, tail_lsn); 136662306a36Sopenharmony_ci 136762306a36Sopenharmony_cidone: 136862306a36Sopenharmony_ci kmem_free(buffer); 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci if (error) 137162306a36Sopenharmony_ci xfs_warn(log->l_mp, "failed to locate log tail"); 137262306a36Sopenharmony_ci return error; 137362306a36Sopenharmony_ci} 137462306a36Sopenharmony_ci 137562306a36Sopenharmony_ci/* 137662306a36Sopenharmony_ci * Is the log zeroed at all? 137762306a36Sopenharmony_ci * 137862306a36Sopenharmony_ci * The last binary search should be changed to perform an X block read 137962306a36Sopenharmony_ci * once X becomes small enough. You can then search linearly through 138062306a36Sopenharmony_ci * the X blocks. This will cut down on the number of reads we need to do. 138162306a36Sopenharmony_ci * 138262306a36Sopenharmony_ci * If the log is partially zeroed, this routine will pass back the blkno 138362306a36Sopenharmony_ci * of the first block with cycle number 0. It won't have a complete LR 138462306a36Sopenharmony_ci * preceding it. 138562306a36Sopenharmony_ci * 138662306a36Sopenharmony_ci * Return: 138762306a36Sopenharmony_ci * 0 => the log is completely written to 138862306a36Sopenharmony_ci * 1 => use *blk_no as the first block of the log 138962306a36Sopenharmony_ci * <0 => error has occurred 139062306a36Sopenharmony_ci */ 139162306a36Sopenharmony_ciSTATIC int 139262306a36Sopenharmony_cixlog_find_zeroed( 139362306a36Sopenharmony_ci struct xlog *log, 139462306a36Sopenharmony_ci xfs_daddr_t *blk_no) 139562306a36Sopenharmony_ci{ 139662306a36Sopenharmony_ci char *buffer; 139762306a36Sopenharmony_ci char *offset; 139862306a36Sopenharmony_ci uint first_cycle, last_cycle; 139962306a36Sopenharmony_ci xfs_daddr_t new_blk, last_blk, start_blk; 140062306a36Sopenharmony_ci xfs_daddr_t num_scan_bblks; 140162306a36Sopenharmony_ci int error, log_bbnum = log->l_logBBsize; 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci *blk_no = 0; 140462306a36Sopenharmony_ci 140562306a36Sopenharmony_ci /* check totally zeroed log */ 140662306a36Sopenharmony_ci buffer = xlog_alloc_buffer(log, 1); 140762306a36Sopenharmony_ci if (!buffer) 140862306a36Sopenharmony_ci return -ENOMEM; 140962306a36Sopenharmony_ci error = xlog_bread(log, 0, 1, buffer, &offset); 141062306a36Sopenharmony_ci if (error) 141162306a36Sopenharmony_ci goto out_free_buffer; 141262306a36Sopenharmony_ci 141362306a36Sopenharmony_ci first_cycle = xlog_get_cycle(offset); 141462306a36Sopenharmony_ci if (first_cycle == 0) { /* completely zeroed log */ 141562306a36Sopenharmony_ci *blk_no = 0; 141662306a36Sopenharmony_ci kmem_free(buffer); 141762306a36Sopenharmony_ci return 1; 141862306a36Sopenharmony_ci } 141962306a36Sopenharmony_ci 142062306a36Sopenharmony_ci /* check partially zeroed log */ 142162306a36Sopenharmony_ci error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); 142262306a36Sopenharmony_ci if (error) 142362306a36Sopenharmony_ci goto out_free_buffer; 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci last_cycle = xlog_get_cycle(offset); 142662306a36Sopenharmony_ci if (last_cycle != 0) { /* log completely written to */ 142762306a36Sopenharmony_ci kmem_free(buffer); 142862306a36Sopenharmony_ci return 0; 142962306a36Sopenharmony_ci } 143062306a36Sopenharmony_ci 143162306a36Sopenharmony_ci /* we have a partially zeroed log */ 143262306a36Sopenharmony_ci last_blk = log_bbnum-1; 143362306a36Sopenharmony_ci error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); 143462306a36Sopenharmony_ci if (error) 143562306a36Sopenharmony_ci goto out_free_buffer; 143662306a36Sopenharmony_ci 143762306a36Sopenharmony_ci /* 143862306a36Sopenharmony_ci * Validate the answer. Because there is no way to guarantee that 143962306a36Sopenharmony_ci * the entire log is made up of log records which are the same size, 144062306a36Sopenharmony_ci * we scan over the defined maximum blocks. At this point, the maximum 144162306a36Sopenharmony_ci * is not chosen to mean anything special. XXXmiken 144262306a36Sopenharmony_ci */ 144362306a36Sopenharmony_ci num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); 144462306a36Sopenharmony_ci ASSERT(num_scan_bblks <= INT_MAX); 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci if (last_blk < num_scan_bblks) 144762306a36Sopenharmony_ci num_scan_bblks = last_blk; 144862306a36Sopenharmony_ci start_blk = last_blk - num_scan_bblks; 144962306a36Sopenharmony_ci 145062306a36Sopenharmony_ci /* 145162306a36Sopenharmony_ci * We search for any instances of cycle number 0 that occur before 145262306a36Sopenharmony_ci * our current estimate of the head. What we're trying to detect is 145362306a36Sopenharmony_ci * 1 ... | 0 | 1 | 0... 145462306a36Sopenharmony_ci * ^ binary search ends here 145562306a36Sopenharmony_ci */ 145662306a36Sopenharmony_ci if ((error = xlog_find_verify_cycle(log, start_blk, 145762306a36Sopenharmony_ci (int)num_scan_bblks, 0, &new_blk))) 145862306a36Sopenharmony_ci goto out_free_buffer; 145962306a36Sopenharmony_ci if (new_blk != -1) 146062306a36Sopenharmony_ci last_blk = new_blk; 146162306a36Sopenharmony_ci 146262306a36Sopenharmony_ci /* 146362306a36Sopenharmony_ci * Potentially backup over partial log record write. We don't need 146462306a36Sopenharmony_ci * to search the end of the log because we know it is zero. 146562306a36Sopenharmony_ci */ 146662306a36Sopenharmony_ci error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0); 146762306a36Sopenharmony_ci if (error == 1) 146862306a36Sopenharmony_ci error = -EIO; 146962306a36Sopenharmony_ci if (error) 147062306a36Sopenharmony_ci goto out_free_buffer; 147162306a36Sopenharmony_ci 147262306a36Sopenharmony_ci *blk_no = last_blk; 147362306a36Sopenharmony_ciout_free_buffer: 147462306a36Sopenharmony_ci kmem_free(buffer); 147562306a36Sopenharmony_ci if (error) 147662306a36Sopenharmony_ci return error; 147762306a36Sopenharmony_ci return 1; 147862306a36Sopenharmony_ci} 147962306a36Sopenharmony_ci 148062306a36Sopenharmony_ci/* 148162306a36Sopenharmony_ci * These are simple subroutines used by xlog_clear_stale_blocks() below 148262306a36Sopenharmony_ci * to initialize a buffer full of empty log record headers and write 148362306a36Sopenharmony_ci * them into the log. 148462306a36Sopenharmony_ci */ 148562306a36Sopenharmony_ciSTATIC void 148662306a36Sopenharmony_cixlog_add_record( 148762306a36Sopenharmony_ci struct xlog *log, 148862306a36Sopenharmony_ci char *buf, 148962306a36Sopenharmony_ci int cycle, 149062306a36Sopenharmony_ci int block, 149162306a36Sopenharmony_ci int tail_cycle, 149262306a36Sopenharmony_ci int tail_block) 149362306a36Sopenharmony_ci{ 149462306a36Sopenharmony_ci xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; 149562306a36Sopenharmony_ci 149662306a36Sopenharmony_ci memset(buf, 0, BBSIZE); 149762306a36Sopenharmony_ci recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); 149862306a36Sopenharmony_ci recp->h_cycle = cpu_to_be32(cycle); 149962306a36Sopenharmony_ci recp->h_version = cpu_to_be32( 150062306a36Sopenharmony_ci xfs_has_logv2(log->l_mp) ? 2 : 1); 150162306a36Sopenharmony_ci recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); 150262306a36Sopenharmony_ci recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); 150362306a36Sopenharmony_ci recp->h_fmt = cpu_to_be32(XLOG_FMT); 150462306a36Sopenharmony_ci memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); 150562306a36Sopenharmony_ci} 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ciSTATIC int 150862306a36Sopenharmony_cixlog_write_log_records( 150962306a36Sopenharmony_ci struct xlog *log, 151062306a36Sopenharmony_ci int cycle, 151162306a36Sopenharmony_ci int start_block, 151262306a36Sopenharmony_ci int blocks, 151362306a36Sopenharmony_ci int tail_cycle, 151462306a36Sopenharmony_ci int tail_block) 151562306a36Sopenharmony_ci{ 151662306a36Sopenharmony_ci char *offset; 151762306a36Sopenharmony_ci char *buffer; 151862306a36Sopenharmony_ci int balign, ealign; 151962306a36Sopenharmony_ci int sectbb = log->l_sectBBsize; 152062306a36Sopenharmony_ci int end_block = start_block + blocks; 152162306a36Sopenharmony_ci int bufblks; 152262306a36Sopenharmony_ci int error = 0; 152362306a36Sopenharmony_ci int i, j = 0; 152462306a36Sopenharmony_ci 152562306a36Sopenharmony_ci /* 152662306a36Sopenharmony_ci * Greedily allocate a buffer big enough to handle the full 152762306a36Sopenharmony_ci * range of basic blocks to be written. If that fails, try 152862306a36Sopenharmony_ci * a smaller size. We need to be able to write at least a 152962306a36Sopenharmony_ci * log sector, or we're out of luck. 153062306a36Sopenharmony_ci */ 153162306a36Sopenharmony_ci bufblks = roundup_pow_of_two(blocks); 153262306a36Sopenharmony_ci while (bufblks > log->l_logBBsize) 153362306a36Sopenharmony_ci bufblks >>= 1; 153462306a36Sopenharmony_ci while (!(buffer = xlog_alloc_buffer(log, bufblks))) { 153562306a36Sopenharmony_ci bufblks >>= 1; 153662306a36Sopenharmony_ci if (bufblks < sectbb) 153762306a36Sopenharmony_ci return -ENOMEM; 153862306a36Sopenharmony_ci } 153962306a36Sopenharmony_ci 154062306a36Sopenharmony_ci /* We may need to do a read at the start to fill in part of 154162306a36Sopenharmony_ci * the buffer in the starting sector not covered by the first 154262306a36Sopenharmony_ci * write below. 154362306a36Sopenharmony_ci */ 154462306a36Sopenharmony_ci balign = round_down(start_block, sectbb); 154562306a36Sopenharmony_ci if (balign != start_block) { 154662306a36Sopenharmony_ci error = xlog_bread_noalign(log, start_block, 1, buffer); 154762306a36Sopenharmony_ci if (error) 154862306a36Sopenharmony_ci goto out_free_buffer; 154962306a36Sopenharmony_ci 155062306a36Sopenharmony_ci j = start_block - balign; 155162306a36Sopenharmony_ci } 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci for (i = start_block; i < end_block; i += bufblks) { 155462306a36Sopenharmony_ci int bcount, endcount; 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci bcount = min(bufblks, end_block - start_block); 155762306a36Sopenharmony_ci endcount = bcount - j; 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci /* We may need to do a read at the end to fill in part of 156062306a36Sopenharmony_ci * the buffer in the final sector not covered by the write. 156162306a36Sopenharmony_ci * If this is the same sector as the above read, skip it. 156262306a36Sopenharmony_ci */ 156362306a36Sopenharmony_ci ealign = round_down(end_block, sectbb); 156462306a36Sopenharmony_ci if (j == 0 && (start_block + endcount > ealign)) { 156562306a36Sopenharmony_ci error = xlog_bread_noalign(log, ealign, sectbb, 156662306a36Sopenharmony_ci buffer + BBTOB(ealign - start_block)); 156762306a36Sopenharmony_ci if (error) 156862306a36Sopenharmony_ci break; 156962306a36Sopenharmony_ci 157062306a36Sopenharmony_ci } 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci offset = buffer + xlog_align(log, start_block); 157362306a36Sopenharmony_ci for (; j < endcount; j++) { 157462306a36Sopenharmony_ci xlog_add_record(log, offset, cycle, i+j, 157562306a36Sopenharmony_ci tail_cycle, tail_block); 157662306a36Sopenharmony_ci offset += BBSIZE; 157762306a36Sopenharmony_ci } 157862306a36Sopenharmony_ci error = xlog_bwrite(log, start_block, endcount, buffer); 157962306a36Sopenharmony_ci if (error) 158062306a36Sopenharmony_ci break; 158162306a36Sopenharmony_ci start_block += endcount; 158262306a36Sopenharmony_ci j = 0; 158362306a36Sopenharmony_ci } 158462306a36Sopenharmony_ci 158562306a36Sopenharmony_ciout_free_buffer: 158662306a36Sopenharmony_ci kmem_free(buffer); 158762306a36Sopenharmony_ci return error; 158862306a36Sopenharmony_ci} 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci/* 159162306a36Sopenharmony_ci * This routine is called to blow away any incomplete log writes out 159262306a36Sopenharmony_ci * in front of the log head. We do this so that we won't become confused 159362306a36Sopenharmony_ci * if we come up, write only a little bit more, and then crash again. 159462306a36Sopenharmony_ci * If we leave the partial log records out there, this situation could 159562306a36Sopenharmony_ci * cause us to think those partial writes are valid blocks since they 159662306a36Sopenharmony_ci * have the current cycle number. We get rid of them by overwriting them 159762306a36Sopenharmony_ci * with empty log records with the old cycle number rather than the 159862306a36Sopenharmony_ci * current one. 159962306a36Sopenharmony_ci * 160062306a36Sopenharmony_ci * The tail lsn is passed in rather than taken from 160162306a36Sopenharmony_ci * the log so that we will not write over the unmount record after a 160262306a36Sopenharmony_ci * clean unmount in a 512 block log. Doing so would leave the log without 160362306a36Sopenharmony_ci * any valid log records in it until a new one was written. If we crashed 160462306a36Sopenharmony_ci * during that time we would not be able to recover. 160562306a36Sopenharmony_ci */ 160662306a36Sopenharmony_ciSTATIC int 160762306a36Sopenharmony_cixlog_clear_stale_blocks( 160862306a36Sopenharmony_ci struct xlog *log, 160962306a36Sopenharmony_ci xfs_lsn_t tail_lsn) 161062306a36Sopenharmony_ci{ 161162306a36Sopenharmony_ci int tail_cycle, head_cycle; 161262306a36Sopenharmony_ci int tail_block, head_block; 161362306a36Sopenharmony_ci int tail_distance, max_distance; 161462306a36Sopenharmony_ci int distance; 161562306a36Sopenharmony_ci int error; 161662306a36Sopenharmony_ci 161762306a36Sopenharmony_ci tail_cycle = CYCLE_LSN(tail_lsn); 161862306a36Sopenharmony_ci tail_block = BLOCK_LSN(tail_lsn); 161962306a36Sopenharmony_ci head_cycle = log->l_curr_cycle; 162062306a36Sopenharmony_ci head_block = log->l_curr_block; 162162306a36Sopenharmony_ci 162262306a36Sopenharmony_ci /* 162362306a36Sopenharmony_ci * Figure out the distance between the new head of the log 162462306a36Sopenharmony_ci * and the tail. We want to write over any blocks beyond the 162562306a36Sopenharmony_ci * head that we may have written just before the crash, but 162662306a36Sopenharmony_ci * we don't want to overwrite the tail of the log. 162762306a36Sopenharmony_ci */ 162862306a36Sopenharmony_ci if (head_cycle == tail_cycle) { 162962306a36Sopenharmony_ci /* 163062306a36Sopenharmony_ci * The tail is behind the head in the physical log, 163162306a36Sopenharmony_ci * so the distance from the head to the tail is the 163262306a36Sopenharmony_ci * distance from the head to the end of the log plus 163362306a36Sopenharmony_ci * the distance from the beginning of the log to the 163462306a36Sopenharmony_ci * tail. 163562306a36Sopenharmony_ci */ 163662306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, 163762306a36Sopenharmony_ci head_block < tail_block || 163862306a36Sopenharmony_ci head_block >= log->l_logBBsize)) 163962306a36Sopenharmony_ci return -EFSCORRUPTED; 164062306a36Sopenharmony_ci tail_distance = tail_block + (log->l_logBBsize - head_block); 164162306a36Sopenharmony_ci } else { 164262306a36Sopenharmony_ci /* 164362306a36Sopenharmony_ci * The head is behind the tail in the physical log, 164462306a36Sopenharmony_ci * so the distance from the head to the tail is just 164562306a36Sopenharmony_ci * the tail block minus the head block. 164662306a36Sopenharmony_ci */ 164762306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, 164862306a36Sopenharmony_ci head_block >= tail_block || 164962306a36Sopenharmony_ci head_cycle != tail_cycle + 1)) 165062306a36Sopenharmony_ci return -EFSCORRUPTED; 165162306a36Sopenharmony_ci tail_distance = tail_block - head_block; 165262306a36Sopenharmony_ci } 165362306a36Sopenharmony_ci 165462306a36Sopenharmony_ci /* 165562306a36Sopenharmony_ci * If the head is right up against the tail, we can't clear 165662306a36Sopenharmony_ci * anything. 165762306a36Sopenharmony_ci */ 165862306a36Sopenharmony_ci if (tail_distance <= 0) { 165962306a36Sopenharmony_ci ASSERT(tail_distance == 0); 166062306a36Sopenharmony_ci return 0; 166162306a36Sopenharmony_ci } 166262306a36Sopenharmony_ci 166362306a36Sopenharmony_ci max_distance = XLOG_TOTAL_REC_SHIFT(log); 166462306a36Sopenharmony_ci /* 166562306a36Sopenharmony_ci * Take the smaller of the maximum amount of outstanding I/O 166662306a36Sopenharmony_ci * we could have and the distance to the tail to clear out. 166762306a36Sopenharmony_ci * We take the smaller so that we don't overwrite the tail and 166862306a36Sopenharmony_ci * we don't waste all day writing from the head to the tail 166962306a36Sopenharmony_ci * for no reason. 167062306a36Sopenharmony_ci */ 167162306a36Sopenharmony_ci max_distance = min(max_distance, tail_distance); 167262306a36Sopenharmony_ci 167362306a36Sopenharmony_ci if ((head_block + max_distance) <= log->l_logBBsize) { 167462306a36Sopenharmony_ci /* 167562306a36Sopenharmony_ci * We can stomp all the blocks we need to without 167662306a36Sopenharmony_ci * wrapping around the end of the log. Just do it 167762306a36Sopenharmony_ci * in a single write. Use the cycle number of the 167862306a36Sopenharmony_ci * current cycle minus one so that the log will look like: 167962306a36Sopenharmony_ci * n ... | n - 1 ... 168062306a36Sopenharmony_ci */ 168162306a36Sopenharmony_ci error = xlog_write_log_records(log, (head_cycle - 1), 168262306a36Sopenharmony_ci head_block, max_distance, tail_cycle, 168362306a36Sopenharmony_ci tail_block); 168462306a36Sopenharmony_ci if (error) 168562306a36Sopenharmony_ci return error; 168662306a36Sopenharmony_ci } else { 168762306a36Sopenharmony_ci /* 168862306a36Sopenharmony_ci * We need to wrap around the end of the physical log in 168962306a36Sopenharmony_ci * order to clear all the blocks. Do it in two separate 169062306a36Sopenharmony_ci * I/Os. The first write should be from the head to the 169162306a36Sopenharmony_ci * end of the physical log, and it should use the current 169262306a36Sopenharmony_ci * cycle number minus one just like above. 169362306a36Sopenharmony_ci */ 169462306a36Sopenharmony_ci distance = log->l_logBBsize - head_block; 169562306a36Sopenharmony_ci error = xlog_write_log_records(log, (head_cycle - 1), 169662306a36Sopenharmony_ci head_block, distance, tail_cycle, 169762306a36Sopenharmony_ci tail_block); 169862306a36Sopenharmony_ci 169962306a36Sopenharmony_ci if (error) 170062306a36Sopenharmony_ci return error; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci /* 170362306a36Sopenharmony_ci * Now write the blocks at the start of the physical log. 170462306a36Sopenharmony_ci * This writes the remainder of the blocks we want to clear. 170562306a36Sopenharmony_ci * It uses the current cycle number since we're now on the 170662306a36Sopenharmony_ci * same cycle as the head so that we get: 170762306a36Sopenharmony_ci * n ... n ... | n - 1 ... 170862306a36Sopenharmony_ci * ^^^^^ blocks we're writing 170962306a36Sopenharmony_ci */ 171062306a36Sopenharmony_ci distance = max_distance - (log->l_logBBsize - head_block); 171162306a36Sopenharmony_ci error = xlog_write_log_records(log, head_cycle, 0, distance, 171262306a36Sopenharmony_ci tail_cycle, tail_block); 171362306a36Sopenharmony_ci if (error) 171462306a36Sopenharmony_ci return error; 171562306a36Sopenharmony_ci } 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci return 0; 171862306a36Sopenharmony_ci} 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci/* 172162306a36Sopenharmony_ci * Release the recovered intent item in the AIL that matches the given intent 172262306a36Sopenharmony_ci * type and intent id. 172362306a36Sopenharmony_ci */ 172462306a36Sopenharmony_civoid 172562306a36Sopenharmony_cixlog_recover_release_intent( 172662306a36Sopenharmony_ci struct xlog *log, 172762306a36Sopenharmony_ci unsigned short intent_type, 172862306a36Sopenharmony_ci uint64_t intent_id) 172962306a36Sopenharmony_ci{ 173062306a36Sopenharmony_ci struct xfs_ail_cursor cur; 173162306a36Sopenharmony_ci struct xfs_log_item *lip; 173262306a36Sopenharmony_ci struct xfs_ail *ailp = log->l_ailp; 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci spin_lock(&ailp->ail_lock); 173562306a36Sopenharmony_ci for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL; 173662306a36Sopenharmony_ci lip = xfs_trans_ail_cursor_next(ailp, &cur)) { 173762306a36Sopenharmony_ci if (lip->li_type != intent_type) 173862306a36Sopenharmony_ci continue; 173962306a36Sopenharmony_ci if (!lip->li_ops->iop_match(lip, intent_id)) 174062306a36Sopenharmony_ci continue; 174162306a36Sopenharmony_ci 174262306a36Sopenharmony_ci spin_unlock(&ailp->ail_lock); 174362306a36Sopenharmony_ci lip->li_ops->iop_release(lip); 174462306a36Sopenharmony_ci spin_lock(&ailp->ail_lock); 174562306a36Sopenharmony_ci break; 174662306a36Sopenharmony_ci } 174762306a36Sopenharmony_ci 174862306a36Sopenharmony_ci xfs_trans_ail_cursor_done(&cur); 174962306a36Sopenharmony_ci spin_unlock(&ailp->ail_lock); 175062306a36Sopenharmony_ci} 175162306a36Sopenharmony_ci 175262306a36Sopenharmony_ciint 175362306a36Sopenharmony_cixlog_recover_iget( 175462306a36Sopenharmony_ci struct xfs_mount *mp, 175562306a36Sopenharmony_ci xfs_ino_t ino, 175662306a36Sopenharmony_ci struct xfs_inode **ipp) 175762306a36Sopenharmony_ci{ 175862306a36Sopenharmony_ci int error; 175962306a36Sopenharmony_ci 176062306a36Sopenharmony_ci error = xfs_iget(mp, NULL, ino, 0, 0, ipp); 176162306a36Sopenharmony_ci if (error) 176262306a36Sopenharmony_ci return error; 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci error = xfs_qm_dqattach(*ipp); 176562306a36Sopenharmony_ci if (error) { 176662306a36Sopenharmony_ci xfs_irele(*ipp); 176762306a36Sopenharmony_ci return error; 176862306a36Sopenharmony_ci } 176962306a36Sopenharmony_ci 177062306a36Sopenharmony_ci if (VFS_I(*ipp)->i_nlink == 0) 177162306a36Sopenharmony_ci xfs_iflags_set(*ipp, XFS_IRECOVERY); 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_ci return 0; 177462306a36Sopenharmony_ci} 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci/****************************************************************************** 177762306a36Sopenharmony_ci * 177862306a36Sopenharmony_ci * Log recover routines 177962306a36Sopenharmony_ci * 178062306a36Sopenharmony_ci ****************************************************************************** 178162306a36Sopenharmony_ci */ 178262306a36Sopenharmony_cistatic const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { 178362306a36Sopenharmony_ci &xlog_buf_item_ops, 178462306a36Sopenharmony_ci &xlog_inode_item_ops, 178562306a36Sopenharmony_ci &xlog_dquot_item_ops, 178662306a36Sopenharmony_ci &xlog_quotaoff_item_ops, 178762306a36Sopenharmony_ci &xlog_icreate_item_ops, 178862306a36Sopenharmony_ci &xlog_efi_item_ops, 178962306a36Sopenharmony_ci &xlog_efd_item_ops, 179062306a36Sopenharmony_ci &xlog_rui_item_ops, 179162306a36Sopenharmony_ci &xlog_rud_item_ops, 179262306a36Sopenharmony_ci &xlog_cui_item_ops, 179362306a36Sopenharmony_ci &xlog_cud_item_ops, 179462306a36Sopenharmony_ci &xlog_bui_item_ops, 179562306a36Sopenharmony_ci &xlog_bud_item_ops, 179662306a36Sopenharmony_ci &xlog_attri_item_ops, 179762306a36Sopenharmony_ci &xlog_attrd_item_ops, 179862306a36Sopenharmony_ci}; 179962306a36Sopenharmony_ci 180062306a36Sopenharmony_cistatic const struct xlog_recover_item_ops * 180162306a36Sopenharmony_cixlog_find_item_ops( 180262306a36Sopenharmony_ci struct xlog_recover_item *item) 180362306a36Sopenharmony_ci{ 180462306a36Sopenharmony_ci unsigned int i; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++) 180762306a36Sopenharmony_ci if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type) 180862306a36Sopenharmony_ci return xlog_recover_item_ops[i]; 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_ci return NULL; 181162306a36Sopenharmony_ci} 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ci/* 181462306a36Sopenharmony_ci * Sort the log items in the transaction. 181562306a36Sopenharmony_ci * 181662306a36Sopenharmony_ci * The ordering constraints are defined by the inode allocation and unlink 181762306a36Sopenharmony_ci * behaviour. The rules are: 181862306a36Sopenharmony_ci * 181962306a36Sopenharmony_ci * 1. Every item is only logged once in a given transaction. Hence it 182062306a36Sopenharmony_ci * represents the last logged state of the item. Hence ordering is 182162306a36Sopenharmony_ci * dependent on the order in which operations need to be performed so 182262306a36Sopenharmony_ci * required initial conditions are always met. 182362306a36Sopenharmony_ci * 182462306a36Sopenharmony_ci * 2. Cancelled buffers are recorded in pass 1 in a separate table and 182562306a36Sopenharmony_ci * there's nothing to replay from them so we can simply cull them 182662306a36Sopenharmony_ci * from the transaction. However, we can't do that until after we've 182762306a36Sopenharmony_ci * replayed all the other items because they may be dependent on the 182862306a36Sopenharmony_ci * cancelled buffer and replaying the cancelled buffer can remove it 182962306a36Sopenharmony_ci * form the cancelled buffer table. Hence they have tobe done last. 183062306a36Sopenharmony_ci * 183162306a36Sopenharmony_ci * 3. Inode allocation buffers must be replayed before inode items that 183262306a36Sopenharmony_ci * read the buffer and replay changes into it. For filesystems using the 183362306a36Sopenharmony_ci * ICREATE transactions, this means XFS_LI_ICREATE objects need to get 183462306a36Sopenharmony_ci * treated the same as inode allocation buffers as they create and 183562306a36Sopenharmony_ci * initialise the buffers directly. 183662306a36Sopenharmony_ci * 183762306a36Sopenharmony_ci * 4. Inode unlink buffers must be replayed after inode items are replayed. 183862306a36Sopenharmony_ci * This ensures that inodes are completely flushed to the inode buffer 183962306a36Sopenharmony_ci * in a "free" state before we remove the unlinked inode list pointer. 184062306a36Sopenharmony_ci * 184162306a36Sopenharmony_ci * Hence the ordering needs to be inode allocation buffers first, inode items 184262306a36Sopenharmony_ci * second, inode unlink buffers third and cancelled buffers last. 184362306a36Sopenharmony_ci * 184462306a36Sopenharmony_ci * But there's a problem with that - we can't tell an inode allocation buffer 184562306a36Sopenharmony_ci * apart from a regular buffer, so we can't separate them. We can, however, 184662306a36Sopenharmony_ci * tell an inode unlink buffer from the others, and so we can separate them out 184762306a36Sopenharmony_ci * from all the other buffers and move them to last. 184862306a36Sopenharmony_ci * 184962306a36Sopenharmony_ci * Hence, 4 lists, in order from head to tail: 185062306a36Sopenharmony_ci * - buffer_list for all buffers except cancelled/inode unlink buffers 185162306a36Sopenharmony_ci * - item_list for all non-buffer items 185262306a36Sopenharmony_ci * - inode_buffer_list for inode unlink buffers 185362306a36Sopenharmony_ci * - cancel_list for the cancelled buffers 185462306a36Sopenharmony_ci * 185562306a36Sopenharmony_ci * Note that we add objects to the tail of the lists so that first-to-last 185662306a36Sopenharmony_ci * ordering is preserved within the lists. Adding objects to the head of the 185762306a36Sopenharmony_ci * list means when we traverse from the head we walk them in last-to-first 185862306a36Sopenharmony_ci * order. For cancelled buffers and inode unlink buffers this doesn't matter, 185962306a36Sopenharmony_ci * but for all other items there may be specific ordering that we need to 186062306a36Sopenharmony_ci * preserve. 186162306a36Sopenharmony_ci */ 186262306a36Sopenharmony_ciSTATIC int 186362306a36Sopenharmony_cixlog_recover_reorder_trans( 186462306a36Sopenharmony_ci struct xlog *log, 186562306a36Sopenharmony_ci struct xlog_recover *trans, 186662306a36Sopenharmony_ci int pass) 186762306a36Sopenharmony_ci{ 186862306a36Sopenharmony_ci struct xlog_recover_item *item, *n; 186962306a36Sopenharmony_ci int error = 0; 187062306a36Sopenharmony_ci LIST_HEAD(sort_list); 187162306a36Sopenharmony_ci LIST_HEAD(cancel_list); 187262306a36Sopenharmony_ci LIST_HEAD(buffer_list); 187362306a36Sopenharmony_ci LIST_HEAD(inode_buffer_list); 187462306a36Sopenharmony_ci LIST_HEAD(item_list); 187562306a36Sopenharmony_ci 187662306a36Sopenharmony_ci list_splice_init(&trans->r_itemq, &sort_list); 187762306a36Sopenharmony_ci list_for_each_entry_safe(item, n, &sort_list, ri_list) { 187862306a36Sopenharmony_ci enum xlog_recover_reorder fate = XLOG_REORDER_ITEM_LIST; 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci item->ri_ops = xlog_find_item_ops(item); 188162306a36Sopenharmony_ci if (!item->ri_ops) { 188262306a36Sopenharmony_ci xfs_warn(log->l_mp, 188362306a36Sopenharmony_ci "%s: unrecognized type of log operation (%d)", 188462306a36Sopenharmony_ci __func__, ITEM_TYPE(item)); 188562306a36Sopenharmony_ci ASSERT(0); 188662306a36Sopenharmony_ci /* 188762306a36Sopenharmony_ci * return the remaining items back to the transaction 188862306a36Sopenharmony_ci * item list so they can be freed in caller. 188962306a36Sopenharmony_ci */ 189062306a36Sopenharmony_ci if (!list_empty(&sort_list)) 189162306a36Sopenharmony_ci list_splice_init(&sort_list, &trans->r_itemq); 189262306a36Sopenharmony_ci error = -EFSCORRUPTED; 189362306a36Sopenharmony_ci break; 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci if (item->ri_ops->reorder) 189762306a36Sopenharmony_ci fate = item->ri_ops->reorder(item); 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci switch (fate) { 190062306a36Sopenharmony_ci case XLOG_REORDER_BUFFER_LIST: 190162306a36Sopenharmony_ci list_move_tail(&item->ri_list, &buffer_list); 190262306a36Sopenharmony_ci break; 190362306a36Sopenharmony_ci case XLOG_REORDER_CANCEL_LIST: 190462306a36Sopenharmony_ci trace_xfs_log_recover_item_reorder_head(log, 190562306a36Sopenharmony_ci trans, item, pass); 190662306a36Sopenharmony_ci list_move(&item->ri_list, &cancel_list); 190762306a36Sopenharmony_ci break; 190862306a36Sopenharmony_ci case XLOG_REORDER_INODE_BUFFER_LIST: 190962306a36Sopenharmony_ci list_move(&item->ri_list, &inode_buffer_list); 191062306a36Sopenharmony_ci break; 191162306a36Sopenharmony_ci case XLOG_REORDER_ITEM_LIST: 191262306a36Sopenharmony_ci trace_xfs_log_recover_item_reorder_tail(log, 191362306a36Sopenharmony_ci trans, item, pass); 191462306a36Sopenharmony_ci list_move_tail(&item->ri_list, &item_list); 191562306a36Sopenharmony_ci break; 191662306a36Sopenharmony_ci } 191762306a36Sopenharmony_ci } 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci ASSERT(list_empty(&sort_list)); 192062306a36Sopenharmony_ci if (!list_empty(&buffer_list)) 192162306a36Sopenharmony_ci list_splice(&buffer_list, &trans->r_itemq); 192262306a36Sopenharmony_ci if (!list_empty(&item_list)) 192362306a36Sopenharmony_ci list_splice_tail(&item_list, &trans->r_itemq); 192462306a36Sopenharmony_ci if (!list_empty(&inode_buffer_list)) 192562306a36Sopenharmony_ci list_splice_tail(&inode_buffer_list, &trans->r_itemq); 192662306a36Sopenharmony_ci if (!list_empty(&cancel_list)) 192762306a36Sopenharmony_ci list_splice_tail(&cancel_list, &trans->r_itemq); 192862306a36Sopenharmony_ci return error; 192962306a36Sopenharmony_ci} 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_civoid 193262306a36Sopenharmony_cixlog_buf_readahead( 193362306a36Sopenharmony_ci struct xlog *log, 193462306a36Sopenharmony_ci xfs_daddr_t blkno, 193562306a36Sopenharmony_ci uint len, 193662306a36Sopenharmony_ci const struct xfs_buf_ops *ops) 193762306a36Sopenharmony_ci{ 193862306a36Sopenharmony_ci if (!xlog_is_buffer_cancelled(log, blkno, len)) 193962306a36Sopenharmony_ci xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops); 194062306a36Sopenharmony_ci} 194162306a36Sopenharmony_ci 194262306a36Sopenharmony_ciSTATIC int 194362306a36Sopenharmony_cixlog_recover_items_pass2( 194462306a36Sopenharmony_ci struct xlog *log, 194562306a36Sopenharmony_ci struct xlog_recover *trans, 194662306a36Sopenharmony_ci struct list_head *buffer_list, 194762306a36Sopenharmony_ci struct list_head *item_list) 194862306a36Sopenharmony_ci{ 194962306a36Sopenharmony_ci struct xlog_recover_item *item; 195062306a36Sopenharmony_ci int error = 0; 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_ci list_for_each_entry(item, item_list, ri_list) { 195362306a36Sopenharmony_ci trace_xfs_log_recover_item_recover(log, trans, item, 195462306a36Sopenharmony_ci XLOG_RECOVER_PASS2); 195562306a36Sopenharmony_ci 195662306a36Sopenharmony_ci if (item->ri_ops->commit_pass2) 195762306a36Sopenharmony_ci error = item->ri_ops->commit_pass2(log, buffer_list, 195862306a36Sopenharmony_ci item, trans->r_lsn); 195962306a36Sopenharmony_ci if (error) 196062306a36Sopenharmony_ci return error; 196162306a36Sopenharmony_ci } 196262306a36Sopenharmony_ci 196362306a36Sopenharmony_ci return error; 196462306a36Sopenharmony_ci} 196562306a36Sopenharmony_ci 196662306a36Sopenharmony_ci/* 196762306a36Sopenharmony_ci * Perform the transaction. 196862306a36Sopenharmony_ci * 196962306a36Sopenharmony_ci * If the transaction modifies a buffer or inode, do it now. Otherwise, 197062306a36Sopenharmony_ci * EFIs and EFDs get queued up by adding entries into the AIL for them. 197162306a36Sopenharmony_ci */ 197262306a36Sopenharmony_ciSTATIC int 197362306a36Sopenharmony_cixlog_recover_commit_trans( 197462306a36Sopenharmony_ci struct xlog *log, 197562306a36Sopenharmony_ci struct xlog_recover *trans, 197662306a36Sopenharmony_ci int pass, 197762306a36Sopenharmony_ci struct list_head *buffer_list) 197862306a36Sopenharmony_ci{ 197962306a36Sopenharmony_ci int error = 0; 198062306a36Sopenharmony_ci int items_queued = 0; 198162306a36Sopenharmony_ci struct xlog_recover_item *item; 198262306a36Sopenharmony_ci struct xlog_recover_item *next; 198362306a36Sopenharmony_ci LIST_HEAD (ra_list); 198462306a36Sopenharmony_ci LIST_HEAD (done_list); 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci #define XLOG_RECOVER_COMMIT_QUEUE_MAX 100 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci hlist_del_init(&trans->r_list); 198962306a36Sopenharmony_ci 199062306a36Sopenharmony_ci error = xlog_recover_reorder_trans(log, trans, pass); 199162306a36Sopenharmony_ci if (error) 199262306a36Sopenharmony_ci return error; 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_ci list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) { 199562306a36Sopenharmony_ci trace_xfs_log_recover_item_recover(log, trans, item, pass); 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci switch (pass) { 199862306a36Sopenharmony_ci case XLOG_RECOVER_PASS1: 199962306a36Sopenharmony_ci if (item->ri_ops->commit_pass1) 200062306a36Sopenharmony_ci error = item->ri_ops->commit_pass1(log, item); 200162306a36Sopenharmony_ci break; 200262306a36Sopenharmony_ci case XLOG_RECOVER_PASS2: 200362306a36Sopenharmony_ci if (item->ri_ops->ra_pass2) 200462306a36Sopenharmony_ci item->ri_ops->ra_pass2(log, item); 200562306a36Sopenharmony_ci list_move_tail(&item->ri_list, &ra_list); 200662306a36Sopenharmony_ci items_queued++; 200762306a36Sopenharmony_ci if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { 200862306a36Sopenharmony_ci error = xlog_recover_items_pass2(log, trans, 200962306a36Sopenharmony_ci buffer_list, &ra_list); 201062306a36Sopenharmony_ci list_splice_tail_init(&ra_list, &done_list); 201162306a36Sopenharmony_ci items_queued = 0; 201262306a36Sopenharmony_ci } 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci break; 201562306a36Sopenharmony_ci default: 201662306a36Sopenharmony_ci ASSERT(0); 201762306a36Sopenharmony_ci } 201862306a36Sopenharmony_ci 201962306a36Sopenharmony_ci if (error) 202062306a36Sopenharmony_ci goto out; 202162306a36Sopenharmony_ci } 202262306a36Sopenharmony_ci 202362306a36Sopenharmony_ciout: 202462306a36Sopenharmony_ci if (!list_empty(&ra_list)) { 202562306a36Sopenharmony_ci if (!error) 202662306a36Sopenharmony_ci error = xlog_recover_items_pass2(log, trans, 202762306a36Sopenharmony_ci buffer_list, &ra_list); 202862306a36Sopenharmony_ci list_splice_tail_init(&ra_list, &done_list); 202962306a36Sopenharmony_ci } 203062306a36Sopenharmony_ci 203162306a36Sopenharmony_ci if (!list_empty(&done_list)) 203262306a36Sopenharmony_ci list_splice_init(&done_list, &trans->r_itemq); 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_ci return error; 203562306a36Sopenharmony_ci} 203662306a36Sopenharmony_ci 203762306a36Sopenharmony_ciSTATIC void 203862306a36Sopenharmony_cixlog_recover_add_item( 203962306a36Sopenharmony_ci struct list_head *head) 204062306a36Sopenharmony_ci{ 204162306a36Sopenharmony_ci struct xlog_recover_item *item; 204262306a36Sopenharmony_ci 204362306a36Sopenharmony_ci item = kmem_zalloc(sizeof(struct xlog_recover_item), 0); 204462306a36Sopenharmony_ci INIT_LIST_HEAD(&item->ri_list); 204562306a36Sopenharmony_ci list_add_tail(&item->ri_list, head); 204662306a36Sopenharmony_ci} 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ciSTATIC int 204962306a36Sopenharmony_cixlog_recover_add_to_cont_trans( 205062306a36Sopenharmony_ci struct xlog *log, 205162306a36Sopenharmony_ci struct xlog_recover *trans, 205262306a36Sopenharmony_ci char *dp, 205362306a36Sopenharmony_ci int len) 205462306a36Sopenharmony_ci{ 205562306a36Sopenharmony_ci struct xlog_recover_item *item; 205662306a36Sopenharmony_ci char *ptr, *old_ptr; 205762306a36Sopenharmony_ci int old_len; 205862306a36Sopenharmony_ci 205962306a36Sopenharmony_ci /* 206062306a36Sopenharmony_ci * If the transaction is empty, the header was split across this and the 206162306a36Sopenharmony_ci * previous record. Copy the rest of the header. 206262306a36Sopenharmony_ci */ 206362306a36Sopenharmony_ci if (list_empty(&trans->r_itemq)) { 206462306a36Sopenharmony_ci ASSERT(len <= sizeof(struct xfs_trans_header)); 206562306a36Sopenharmony_ci if (len > sizeof(struct xfs_trans_header)) { 206662306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: bad header length", __func__); 206762306a36Sopenharmony_ci return -EFSCORRUPTED; 206862306a36Sopenharmony_ci } 206962306a36Sopenharmony_ci 207062306a36Sopenharmony_ci xlog_recover_add_item(&trans->r_itemq); 207162306a36Sopenharmony_ci ptr = (char *)&trans->r_theader + 207262306a36Sopenharmony_ci sizeof(struct xfs_trans_header) - len; 207362306a36Sopenharmony_ci memcpy(ptr, dp, len); 207462306a36Sopenharmony_ci return 0; 207562306a36Sopenharmony_ci } 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci /* take the tail entry */ 207862306a36Sopenharmony_ci item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, 207962306a36Sopenharmony_ci ri_list); 208062306a36Sopenharmony_ci 208162306a36Sopenharmony_ci old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; 208262306a36Sopenharmony_ci old_len = item->ri_buf[item->ri_cnt-1].i_len; 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); 208562306a36Sopenharmony_ci if (!ptr) 208662306a36Sopenharmony_ci return -ENOMEM; 208762306a36Sopenharmony_ci memcpy(&ptr[old_len], dp, len); 208862306a36Sopenharmony_ci item->ri_buf[item->ri_cnt-1].i_len += len; 208962306a36Sopenharmony_ci item->ri_buf[item->ri_cnt-1].i_addr = ptr; 209062306a36Sopenharmony_ci trace_xfs_log_recover_item_add_cont(log, trans, item, 0); 209162306a36Sopenharmony_ci return 0; 209262306a36Sopenharmony_ci} 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci/* 209562306a36Sopenharmony_ci * The next region to add is the start of a new region. It could be 209662306a36Sopenharmony_ci * a whole region or it could be the first part of a new region. Because 209762306a36Sopenharmony_ci * of this, the assumption here is that the type and size fields of all 209862306a36Sopenharmony_ci * format structures fit into the first 32 bits of the structure. 209962306a36Sopenharmony_ci * 210062306a36Sopenharmony_ci * This works because all regions must be 32 bit aligned. Therefore, we 210162306a36Sopenharmony_ci * either have both fields or we have neither field. In the case we have 210262306a36Sopenharmony_ci * neither field, the data part of the region is zero length. We only have 210362306a36Sopenharmony_ci * a log_op_header and can throw away the header since a new one will appear 210462306a36Sopenharmony_ci * later. If we have at least 4 bytes, then we can determine how many regions 210562306a36Sopenharmony_ci * will appear in the current log item. 210662306a36Sopenharmony_ci */ 210762306a36Sopenharmony_ciSTATIC int 210862306a36Sopenharmony_cixlog_recover_add_to_trans( 210962306a36Sopenharmony_ci struct xlog *log, 211062306a36Sopenharmony_ci struct xlog_recover *trans, 211162306a36Sopenharmony_ci char *dp, 211262306a36Sopenharmony_ci int len) 211362306a36Sopenharmony_ci{ 211462306a36Sopenharmony_ci struct xfs_inode_log_format *in_f; /* any will do */ 211562306a36Sopenharmony_ci struct xlog_recover_item *item; 211662306a36Sopenharmony_ci char *ptr; 211762306a36Sopenharmony_ci 211862306a36Sopenharmony_ci if (!len) 211962306a36Sopenharmony_ci return 0; 212062306a36Sopenharmony_ci if (list_empty(&trans->r_itemq)) { 212162306a36Sopenharmony_ci /* we need to catch log corruptions here */ 212262306a36Sopenharmony_ci if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { 212362306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: bad header magic number", 212462306a36Sopenharmony_ci __func__); 212562306a36Sopenharmony_ci ASSERT(0); 212662306a36Sopenharmony_ci return -EFSCORRUPTED; 212762306a36Sopenharmony_ci } 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_ci if (len > sizeof(struct xfs_trans_header)) { 213062306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: bad header length", __func__); 213162306a36Sopenharmony_ci ASSERT(0); 213262306a36Sopenharmony_ci return -EFSCORRUPTED; 213362306a36Sopenharmony_ci } 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ci /* 213662306a36Sopenharmony_ci * The transaction header can be arbitrarily split across op 213762306a36Sopenharmony_ci * records. If we don't have the whole thing here, copy what we 213862306a36Sopenharmony_ci * do have and handle the rest in the next record. 213962306a36Sopenharmony_ci */ 214062306a36Sopenharmony_ci if (len == sizeof(struct xfs_trans_header)) 214162306a36Sopenharmony_ci xlog_recover_add_item(&trans->r_itemq); 214262306a36Sopenharmony_ci memcpy(&trans->r_theader, dp, len); 214362306a36Sopenharmony_ci return 0; 214462306a36Sopenharmony_ci } 214562306a36Sopenharmony_ci 214662306a36Sopenharmony_ci ptr = kmem_alloc(len, 0); 214762306a36Sopenharmony_ci memcpy(ptr, dp, len); 214862306a36Sopenharmony_ci in_f = (struct xfs_inode_log_format *)ptr; 214962306a36Sopenharmony_ci 215062306a36Sopenharmony_ci /* take the tail entry */ 215162306a36Sopenharmony_ci item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, 215262306a36Sopenharmony_ci ri_list); 215362306a36Sopenharmony_ci if (item->ri_total != 0 && 215462306a36Sopenharmony_ci item->ri_total == item->ri_cnt) { 215562306a36Sopenharmony_ci /* tail item is in use, get a new one */ 215662306a36Sopenharmony_ci xlog_recover_add_item(&trans->r_itemq); 215762306a36Sopenharmony_ci item = list_entry(trans->r_itemq.prev, 215862306a36Sopenharmony_ci struct xlog_recover_item, ri_list); 215962306a36Sopenharmony_ci } 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci if (item->ri_total == 0) { /* first region to be added */ 216262306a36Sopenharmony_ci if (in_f->ilf_size == 0 || 216362306a36Sopenharmony_ci in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { 216462306a36Sopenharmony_ci xfs_warn(log->l_mp, 216562306a36Sopenharmony_ci "bad number of regions (%d) in inode log format", 216662306a36Sopenharmony_ci in_f->ilf_size); 216762306a36Sopenharmony_ci ASSERT(0); 216862306a36Sopenharmony_ci kmem_free(ptr); 216962306a36Sopenharmony_ci return -EFSCORRUPTED; 217062306a36Sopenharmony_ci } 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci item->ri_total = in_f->ilf_size; 217362306a36Sopenharmony_ci item->ri_buf = 217462306a36Sopenharmony_ci kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), 217562306a36Sopenharmony_ci 0); 217662306a36Sopenharmony_ci } 217762306a36Sopenharmony_ci 217862306a36Sopenharmony_ci if (item->ri_total <= item->ri_cnt) { 217962306a36Sopenharmony_ci xfs_warn(log->l_mp, 218062306a36Sopenharmony_ci "log item region count (%d) overflowed size (%d)", 218162306a36Sopenharmony_ci item->ri_cnt, item->ri_total); 218262306a36Sopenharmony_ci ASSERT(0); 218362306a36Sopenharmony_ci kmem_free(ptr); 218462306a36Sopenharmony_ci return -EFSCORRUPTED; 218562306a36Sopenharmony_ci } 218662306a36Sopenharmony_ci 218762306a36Sopenharmony_ci /* Description region is ri_buf[0] */ 218862306a36Sopenharmony_ci item->ri_buf[item->ri_cnt].i_addr = ptr; 218962306a36Sopenharmony_ci item->ri_buf[item->ri_cnt].i_len = len; 219062306a36Sopenharmony_ci item->ri_cnt++; 219162306a36Sopenharmony_ci trace_xfs_log_recover_item_add(log, trans, item, 0); 219262306a36Sopenharmony_ci return 0; 219362306a36Sopenharmony_ci} 219462306a36Sopenharmony_ci 219562306a36Sopenharmony_ci/* 219662306a36Sopenharmony_ci * Free up any resources allocated by the transaction 219762306a36Sopenharmony_ci * 219862306a36Sopenharmony_ci * Remember that EFIs, EFDs, and IUNLINKs are handled later. 219962306a36Sopenharmony_ci */ 220062306a36Sopenharmony_ciSTATIC void 220162306a36Sopenharmony_cixlog_recover_free_trans( 220262306a36Sopenharmony_ci struct xlog_recover *trans) 220362306a36Sopenharmony_ci{ 220462306a36Sopenharmony_ci struct xlog_recover_item *item, *n; 220562306a36Sopenharmony_ci int i; 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci hlist_del_init(&trans->r_list); 220862306a36Sopenharmony_ci 220962306a36Sopenharmony_ci list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { 221062306a36Sopenharmony_ci /* Free the regions in the item. */ 221162306a36Sopenharmony_ci list_del(&item->ri_list); 221262306a36Sopenharmony_ci for (i = 0; i < item->ri_cnt; i++) 221362306a36Sopenharmony_ci kmem_free(item->ri_buf[i].i_addr); 221462306a36Sopenharmony_ci /* Free the item itself */ 221562306a36Sopenharmony_ci kmem_free(item->ri_buf); 221662306a36Sopenharmony_ci kmem_free(item); 221762306a36Sopenharmony_ci } 221862306a36Sopenharmony_ci /* Free the transaction recover structure */ 221962306a36Sopenharmony_ci kmem_free(trans); 222062306a36Sopenharmony_ci} 222162306a36Sopenharmony_ci 222262306a36Sopenharmony_ci/* 222362306a36Sopenharmony_ci * On error or completion, trans is freed. 222462306a36Sopenharmony_ci */ 222562306a36Sopenharmony_ciSTATIC int 222662306a36Sopenharmony_cixlog_recovery_process_trans( 222762306a36Sopenharmony_ci struct xlog *log, 222862306a36Sopenharmony_ci struct xlog_recover *trans, 222962306a36Sopenharmony_ci char *dp, 223062306a36Sopenharmony_ci unsigned int len, 223162306a36Sopenharmony_ci unsigned int flags, 223262306a36Sopenharmony_ci int pass, 223362306a36Sopenharmony_ci struct list_head *buffer_list) 223462306a36Sopenharmony_ci{ 223562306a36Sopenharmony_ci int error = 0; 223662306a36Sopenharmony_ci bool freeit = false; 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_ci /* mask off ophdr transaction container flags */ 223962306a36Sopenharmony_ci flags &= ~XLOG_END_TRANS; 224062306a36Sopenharmony_ci if (flags & XLOG_WAS_CONT_TRANS) 224162306a36Sopenharmony_ci flags &= ~XLOG_CONTINUE_TRANS; 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci /* 224462306a36Sopenharmony_ci * Callees must not free the trans structure. We'll decide if we need to 224562306a36Sopenharmony_ci * free it or not based on the operation being done and it's result. 224662306a36Sopenharmony_ci */ 224762306a36Sopenharmony_ci switch (flags) { 224862306a36Sopenharmony_ci /* expected flag values */ 224962306a36Sopenharmony_ci case 0: 225062306a36Sopenharmony_ci case XLOG_CONTINUE_TRANS: 225162306a36Sopenharmony_ci error = xlog_recover_add_to_trans(log, trans, dp, len); 225262306a36Sopenharmony_ci break; 225362306a36Sopenharmony_ci case XLOG_WAS_CONT_TRANS: 225462306a36Sopenharmony_ci error = xlog_recover_add_to_cont_trans(log, trans, dp, len); 225562306a36Sopenharmony_ci break; 225662306a36Sopenharmony_ci case XLOG_COMMIT_TRANS: 225762306a36Sopenharmony_ci error = xlog_recover_commit_trans(log, trans, pass, 225862306a36Sopenharmony_ci buffer_list); 225962306a36Sopenharmony_ci /* success or fail, we are now done with this transaction. */ 226062306a36Sopenharmony_ci freeit = true; 226162306a36Sopenharmony_ci break; 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci /* unexpected flag values */ 226462306a36Sopenharmony_ci case XLOG_UNMOUNT_TRANS: 226562306a36Sopenharmony_ci /* just skip trans */ 226662306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: Unmount LR", __func__); 226762306a36Sopenharmony_ci freeit = true; 226862306a36Sopenharmony_ci break; 226962306a36Sopenharmony_ci case XLOG_START_TRANS: 227062306a36Sopenharmony_ci default: 227162306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); 227262306a36Sopenharmony_ci ASSERT(0); 227362306a36Sopenharmony_ci error = -EFSCORRUPTED; 227462306a36Sopenharmony_ci break; 227562306a36Sopenharmony_ci } 227662306a36Sopenharmony_ci if (error || freeit) 227762306a36Sopenharmony_ci xlog_recover_free_trans(trans); 227862306a36Sopenharmony_ci return error; 227962306a36Sopenharmony_ci} 228062306a36Sopenharmony_ci 228162306a36Sopenharmony_ci/* 228262306a36Sopenharmony_ci * Lookup the transaction recovery structure associated with the ID in the 228362306a36Sopenharmony_ci * current ophdr. If the transaction doesn't exist and the start flag is set in 228462306a36Sopenharmony_ci * the ophdr, then allocate a new transaction for future ID matches to find. 228562306a36Sopenharmony_ci * Either way, return what we found during the lookup - an existing transaction 228662306a36Sopenharmony_ci * or nothing. 228762306a36Sopenharmony_ci */ 228862306a36Sopenharmony_ciSTATIC struct xlog_recover * 228962306a36Sopenharmony_cixlog_recover_ophdr_to_trans( 229062306a36Sopenharmony_ci struct hlist_head rhash[], 229162306a36Sopenharmony_ci struct xlog_rec_header *rhead, 229262306a36Sopenharmony_ci struct xlog_op_header *ohead) 229362306a36Sopenharmony_ci{ 229462306a36Sopenharmony_ci struct xlog_recover *trans; 229562306a36Sopenharmony_ci xlog_tid_t tid; 229662306a36Sopenharmony_ci struct hlist_head *rhp; 229762306a36Sopenharmony_ci 229862306a36Sopenharmony_ci tid = be32_to_cpu(ohead->oh_tid); 229962306a36Sopenharmony_ci rhp = &rhash[XLOG_RHASH(tid)]; 230062306a36Sopenharmony_ci hlist_for_each_entry(trans, rhp, r_list) { 230162306a36Sopenharmony_ci if (trans->r_log_tid == tid) 230262306a36Sopenharmony_ci return trans; 230362306a36Sopenharmony_ci } 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci /* 230662306a36Sopenharmony_ci * skip over non-start transaction headers - we could be 230762306a36Sopenharmony_ci * processing slack space before the next transaction starts 230862306a36Sopenharmony_ci */ 230962306a36Sopenharmony_ci if (!(ohead->oh_flags & XLOG_START_TRANS)) 231062306a36Sopenharmony_ci return NULL; 231162306a36Sopenharmony_ci 231262306a36Sopenharmony_ci ASSERT(be32_to_cpu(ohead->oh_len) == 0); 231362306a36Sopenharmony_ci 231462306a36Sopenharmony_ci /* 231562306a36Sopenharmony_ci * This is a new transaction so allocate a new recovery container to 231662306a36Sopenharmony_ci * hold the recovery ops that will follow. 231762306a36Sopenharmony_ci */ 231862306a36Sopenharmony_ci trans = kmem_zalloc(sizeof(struct xlog_recover), 0); 231962306a36Sopenharmony_ci trans->r_log_tid = tid; 232062306a36Sopenharmony_ci trans->r_lsn = be64_to_cpu(rhead->h_lsn); 232162306a36Sopenharmony_ci INIT_LIST_HEAD(&trans->r_itemq); 232262306a36Sopenharmony_ci INIT_HLIST_NODE(&trans->r_list); 232362306a36Sopenharmony_ci hlist_add_head(&trans->r_list, rhp); 232462306a36Sopenharmony_ci 232562306a36Sopenharmony_ci /* 232662306a36Sopenharmony_ci * Nothing more to do for this ophdr. Items to be added to this new 232762306a36Sopenharmony_ci * transaction will be in subsequent ophdr containers. 232862306a36Sopenharmony_ci */ 232962306a36Sopenharmony_ci return NULL; 233062306a36Sopenharmony_ci} 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_ciSTATIC int 233362306a36Sopenharmony_cixlog_recover_process_ophdr( 233462306a36Sopenharmony_ci struct xlog *log, 233562306a36Sopenharmony_ci struct hlist_head rhash[], 233662306a36Sopenharmony_ci struct xlog_rec_header *rhead, 233762306a36Sopenharmony_ci struct xlog_op_header *ohead, 233862306a36Sopenharmony_ci char *dp, 233962306a36Sopenharmony_ci char *end, 234062306a36Sopenharmony_ci int pass, 234162306a36Sopenharmony_ci struct list_head *buffer_list) 234262306a36Sopenharmony_ci{ 234362306a36Sopenharmony_ci struct xlog_recover *trans; 234462306a36Sopenharmony_ci unsigned int len; 234562306a36Sopenharmony_ci int error; 234662306a36Sopenharmony_ci 234762306a36Sopenharmony_ci /* Do we understand who wrote this op? */ 234862306a36Sopenharmony_ci if (ohead->oh_clientid != XFS_TRANSACTION && 234962306a36Sopenharmony_ci ohead->oh_clientid != XFS_LOG) { 235062306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: bad clientid 0x%x", 235162306a36Sopenharmony_ci __func__, ohead->oh_clientid); 235262306a36Sopenharmony_ci ASSERT(0); 235362306a36Sopenharmony_ci return -EFSCORRUPTED; 235462306a36Sopenharmony_ci } 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci /* 235762306a36Sopenharmony_ci * Check the ophdr contains all the data it is supposed to contain. 235862306a36Sopenharmony_ci */ 235962306a36Sopenharmony_ci len = be32_to_cpu(ohead->oh_len); 236062306a36Sopenharmony_ci if (dp + len > end) { 236162306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); 236262306a36Sopenharmony_ci WARN_ON(1); 236362306a36Sopenharmony_ci return -EFSCORRUPTED; 236462306a36Sopenharmony_ci } 236562306a36Sopenharmony_ci 236662306a36Sopenharmony_ci trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); 236762306a36Sopenharmony_ci if (!trans) { 236862306a36Sopenharmony_ci /* nothing to do, so skip over this ophdr */ 236962306a36Sopenharmony_ci return 0; 237062306a36Sopenharmony_ci } 237162306a36Sopenharmony_ci 237262306a36Sopenharmony_ci /* 237362306a36Sopenharmony_ci * The recovered buffer queue is drained only once we know that all 237462306a36Sopenharmony_ci * recovery items for the current LSN have been processed. This is 237562306a36Sopenharmony_ci * required because: 237662306a36Sopenharmony_ci * 237762306a36Sopenharmony_ci * - Buffer write submission updates the metadata LSN of the buffer. 237862306a36Sopenharmony_ci * - Log recovery skips items with a metadata LSN >= the current LSN of 237962306a36Sopenharmony_ci * the recovery item. 238062306a36Sopenharmony_ci * - Separate recovery items against the same metadata buffer can share 238162306a36Sopenharmony_ci * a current LSN. I.e., consider that the LSN of a recovery item is 238262306a36Sopenharmony_ci * defined as the starting LSN of the first record in which its 238362306a36Sopenharmony_ci * transaction appears, that a record can hold multiple transactions, 238462306a36Sopenharmony_ci * and/or that a transaction can span multiple records. 238562306a36Sopenharmony_ci * 238662306a36Sopenharmony_ci * In other words, we are allowed to submit a buffer from log recovery 238762306a36Sopenharmony_ci * once per current LSN. Otherwise, we may incorrectly skip recovery 238862306a36Sopenharmony_ci * items and cause corruption. 238962306a36Sopenharmony_ci * 239062306a36Sopenharmony_ci * We don't know up front whether buffers are updated multiple times per 239162306a36Sopenharmony_ci * LSN. Therefore, track the current LSN of each commit log record as it 239262306a36Sopenharmony_ci * is processed and drain the queue when it changes. Use commit records 239362306a36Sopenharmony_ci * because they are ordered correctly by the logging code. 239462306a36Sopenharmony_ci */ 239562306a36Sopenharmony_ci if (log->l_recovery_lsn != trans->r_lsn && 239662306a36Sopenharmony_ci ohead->oh_flags & XLOG_COMMIT_TRANS) { 239762306a36Sopenharmony_ci error = xfs_buf_delwri_submit(buffer_list); 239862306a36Sopenharmony_ci if (error) 239962306a36Sopenharmony_ci return error; 240062306a36Sopenharmony_ci log->l_recovery_lsn = trans->r_lsn; 240162306a36Sopenharmony_ci } 240262306a36Sopenharmony_ci 240362306a36Sopenharmony_ci return xlog_recovery_process_trans(log, trans, dp, len, 240462306a36Sopenharmony_ci ohead->oh_flags, pass, buffer_list); 240562306a36Sopenharmony_ci} 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci/* 240862306a36Sopenharmony_ci * There are two valid states of the r_state field. 0 indicates that the 240962306a36Sopenharmony_ci * transaction structure is in a normal state. We have either seen the 241062306a36Sopenharmony_ci * start of the transaction or the last operation we added was not a partial 241162306a36Sopenharmony_ci * operation. If the last operation we added to the transaction was a 241262306a36Sopenharmony_ci * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. 241362306a36Sopenharmony_ci * 241462306a36Sopenharmony_ci * NOTE: skip LRs with 0 data length. 241562306a36Sopenharmony_ci */ 241662306a36Sopenharmony_ciSTATIC int 241762306a36Sopenharmony_cixlog_recover_process_data( 241862306a36Sopenharmony_ci struct xlog *log, 241962306a36Sopenharmony_ci struct hlist_head rhash[], 242062306a36Sopenharmony_ci struct xlog_rec_header *rhead, 242162306a36Sopenharmony_ci char *dp, 242262306a36Sopenharmony_ci int pass, 242362306a36Sopenharmony_ci struct list_head *buffer_list) 242462306a36Sopenharmony_ci{ 242562306a36Sopenharmony_ci struct xlog_op_header *ohead; 242662306a36Sopenharmony_ci char *end; 242762306a36Sopenharmony_ci int num_logops; 242862306a36Sopenharmony_ci int error; 242962306a36Sopenharmony_ci 243062306a36Sopenharmony_ci end = dp + be32_to_cpu(rhead->h_len); 243162306a36Sopenharmony_ci num_logops = be32_to_cpu(rhead->h_num_logops); 243262306a36Sopenharmony_ci 243362306a36Sopenharmony_ci /* check the log format matches our own - else we can't recover */ 243462306a36Sopenharmony_ci if (xlog_header_check_recover(log->l_mp, rhead)) 243562306a36Sopenharmony_ci return -EIO; 243662306a36Sopenharmony_ci 243762306a36Sopenharmony_ci trace_xfs_log_recover_record(log, rhead, pass); 243862306a36Sopenharmony_ci while ((dp < end) && num_logops) { 243962306a36Sopenharmony_ci 244062306a36Sopenharmony_ci ohead = (struct xlog_op_header *)dp; 244162306a36Sopenharmony_ci dp += sizeof(*ohead); 244262306a36Sopenharmony_ci ASSERT(dp <= end); 244362306a36Sopenharmony_ci 244462306a36Sopenharmony_ci /* errors will abort recovery */ 244562306a36Sopenharmony_ci error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, 244662306a36Sopenharmony_ci dp, end, pass, buffer_list); 244762306a36Sopenharmony_ci if (error) 244862306a36Sopenharmony_ci return error; 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci dp += be32_to_cpu(ohead->oh_len); 245162306a36Sopenharmony_ci num_logops--; 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci return 0; 245462306a36Sopenharmony_ci} 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci/* Take all the collected deferred ops and finish them in order. */ 245762306a36Sopenharmony_cistatic int 245862306a36Sopenharmony_cixlog_finish_defer_ops( 245962306a36Sopenharmony_ci struct xfs_mount *mp, 246062306a36Sopenharmony_ci struct list_head *capture_list) 246162306a36Sopenharmony_ci{ 246262306a36Sopenharmony_ci struct xfs_defer_capture *dfc, *next; 246362306a36Sopenharmony_ci struct xfs_trans *tp; 246462306a36Sopenharmony_ci int error = 0; 246562306a36Sopenharmony_ci 246662306a36Sopenharmony_ci list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { 246762306a36Sopenharmony_ci struct xfs_trans_res resv; 246862306a36Sopenharmony_ci struct xfs_defer_resources dres; 246962306a36Sopenharmony_ci 247062306a36Sopenharmony_ci /* 247162306a36Sopenharmony_ci * Create a new transaction reservation from the captured 247262306a36Sopenharmony_ci * information. Set logcount to 1 to force the new transaction 247362306a36Sopenharmony_ci * to regrant every roll so that we can make forward progress 247462306a36Sopenharmony_ci * in recovery no matter how full the log might be. 247562306a36Sopenharmony_ci */ 247662306a36Sopenharmony_ci resv.tr_logres = dfc->dfc_logres; 247762306a36Sopenharmony_ci resv.tr_logcount = 1; 247862306a36Sopenharmony_ci resv.tr_logflags = XFS_TRANS_PERM_LOG_RES; 247962306a36Sopenharmony_ci 248062306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, 248162306a36Sopenharmony_ci dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); 248262306a36Sopenharmony_ci if (error) { 248362306a36Sopenharmony_ci xlog_force_shutdown(mp->m_log, SHUTDOWN_LOG_IO_ERROR); 248462306a36Sopenharmony_ci return error; 248562306a36Sopenharmony_ci } 248662306a36Sopenharmony_ci 248762306a36Sopenharmony_ci /* 248862306a36Sopenharmony_ci * Transfer to this new transaction all the dfops we captured 248962306a36Sopenharmony_ci * from recovering a single intent item. 249062306a36Sopenharmony_ci */ 249162306a36Sopenharmony_ci list_del_init(&dfc->dfc_list); 249262306a36Sopenharmony_ci xfs_defer_ops_continue(dfc, tp, &dres); 249362306a36Sopenharmony_ci error = xfs_trans_commit(tp); 249462306a36Sopenharmony_ci xfs_defer_resources_rele(&dres); 249562306a36Sopenharmony_ci if (error) 249662306a36Sopenharmony_ci return error; 249762306a36Sopenharmony_ci } 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_ci ASSERT(list_empty(capture_list)); 250062306a36Sopenharmony_ci return 0; 250162306a36Sopenharmony_ci} 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ci/* Release all the captured defer ops and capture structures in this list. */ 250462306a36Sopenharmony_cistatic void 250562306a36Sopenharmony_cixlog_abort_defer_ops( 250662306a36Sopenharmony_ci struct xfs_mount *mp, 250762306a36Sopenharmony_ci struct list_head *capture_list) 250862306a36Sopenharmony_ci{ 250962306a36Sopenharmony_ci struct xfs_defer_capture *dfc; 251062306a36Sopenharmony_ci struct xfs_defer_capture *next; 251162306a36Sopenharmony_ci 251262306a36Sopenharmony_ci list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { 251362306a36Sopenharmony_ci list_del_init(&dfc->dfc_list); 251462306a36Sopenharmony_ci xfs_defer_ops_capture_abort(mp, dfc); 251562306a36Sopenharmony_ci } 251662306a36Sopenharmony_ci} 251762306a36Sopenharmony_ci 251862306a36Sopenharmony_ci/* 251962306a36Sopenharmony_ci * When this is called, all of the log intent items which did not have 252062306a36Sopenharmony_ci * corresponding log done items should be in the AIL. What we do now is update 252162306a36Sopenharmony_ci * the data structures associated with each one. 252262306a36Sopenharmony_ci * 252362306a36Sopenharmony_ci * Since we process the log intent items in normal transactions, they will be 252462306a36Sopenharmony_ci * removed at some point after the commit. This prevents us from just walking 252562306a36Sopenharmony_ci * down the list processing each one. We'll use a flag in the intent item to 252662306a36Sopenharmony_ci * skip those that we've already processed and use the AIL iteration mechanism's 252762306a36Sopenharmony_ci * generation count to try to speed this up at least a bit. 252862306a36Sopenharmony_ci * 252962306a36Sopenharmony_ci * When we start, we know that the intents are the only things in the AIL. As we 253062306a36Sopenharmony_ci * process them, however, other items are added to the AIL. Hence we know we 253162306a36Sopenharmony_ci * have started recovery on all the pending intents when we find an non-intent 253262306a36Sopenharmony_ci * item in the AIL. 253362306a36Sopenharmony_ci */ 253462306a36Sopenharmony_ciSTATIC int 253562306a36Sopenharmony_cixlog_recover_process_intents( 253662306a36Sopenharmony_ci struct xlog *log) 253762306a36Sopenharmony_ci{ 253862306a36Sopenharmony_ci LIST_HEAD(capture_list); 253962306a36Sopenharmony_ci struct xfs_ail_cursor cur; 254062306a36Sopenharmony_ci struct xfs_log_item *lip; 254162306a36Sopenharmony_ci struct xfs_ail *ailp; 254262306a36Sopenharmony_ci int error = 0; 254362306a36Sopenharmony_ci#if defined(DEBUG) || defined(XFS_WARN) 254462306a36Sopenharmony_ci xfs_lsn_t last_lsn; 254562306a36Sopenharmony_ci#endif 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_ci ailp = log->l_ailp; 254862306a36Sopenharmony_ci spin_lock(&ailp->ail_lock); 254962306a36Sopenharmony_ci#if defined(DEBUG) || defined(XFS_WARN) 255062306a36Sopenharmony_ci last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); 255162306a36Sopenharmony_ci#endif 255262306a36Sopenharmony_ci for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 255362306a36Sopenharmony_ci lip != NULL; 255462306a36Sopenharmony_ci lip = xfs_trans_ail_cursor_next(ailp, &cur)) { 255562306a36Sopenharmony_ci const struct xfs_item_ops *ops; 255662306a36Sopenharmony_ci 255762306a36Sopenharmony_ci if (!xlog_item_is_intent(lip)) 255862306a36Sopenharmony_ci break; 255962306a36Sopenharmony_ci 256062306a36Sopenharmony_ci /* 256162306a36Sopenharmony_ci * We should never see a redo item with a LSN higher than 256262306a36Sopenharmony_ci * the last transaction we found in the log at the start 256362306a36Sopenharmony_ci * of recovery. 256462306a36Sopenharmony_ci */ 256562306a36Sopenharmony_ci ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); 256662306a36Sopenharmony_ci 256762306a36Sopenharmony_ci /* 256862306a36Sopenharmony_ci * NOTE: If your intent processing routine can create more 256962306a36Sopenharmony_ci * deferred ops, you /must/ attach them to the capture list in 257062306a36Sopenharmony_ci * the recover routine or else those subsequent intents will be 257162306a36Sopenharmony_ci * replayed in the wrong order! 257262306a36Sopenharmony_ci * 257362306a36Sopenharmony_ci * The recovery function can free the log item, so we must not 257462306a36Sopenharmony_ci * access lip after it returns. 257562306a36Sopenharmony_ci */ 257662306a36Sopenharmony_ci spin_unlock(&ailp->ail_lock); 257762306a36Sopenharmony_ci ops = lip->li_ops; 257862306a36Sopenharmony_ci error = ops->iop_recover(lip, &capture_list); 257962306a36Sopenharmony_ci spin_lock(&ailp->ail_lock); 258062306a36Sopenharmony_ci if (error) { 258162306a36Sopenharmony_ci trace_xlog_intent_recovery_failed(log->l_mp, error, 258262306a36Sopenharmony_ci ops->iop_recover); 258362306a36Sopenharmony_ci break; 258462306a36Sopenharmony_ci } 258562306a36Sopenharmony_ci } 258662306a36Sopenharmony_ci 258762306a36Sopenharmony_ci xfs_trans_ail_cursor_done(&cur); 258862306a36Sopenharmony_ci spin_unlock(&ailp->ail_lock); 258962306a36Sopenharmony_ci if (error) 259062306a36Sopenharmony_ci goto err; 259162306a36Sopenharmony_ci 259262306a36Sopenharmony_ci error = xlog_finish_defer_ops(log->l_mp, &capture_list); 259362306a36Sopenharmony_ci if (error) 259462306a36Sopenharmony_ci goto err; 259562306a36Sopenharmony_ci 259662306a36Sopenharmony_ci return 0; 259762306a36Sopenharmony_cierr: 259862306a36Sopenharmony_ci xlog_abort_defer_ops(log->l_mp, &capture_list); 259962306a36Sopenharmony_ci return error; 260062306a36Sopenharmony_ci} 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci/* 260362306a36Sopenharmony_ci * A cancel occurs when the mount has failed and we're bailing out. Release all 260462306a36Sopenharmony_ci * pending log intent items that we haven't started recovery on so they don't 260562306a36Sopenharmony_ci * pin the AIL. 260662306a36Sopenharmony_ci */ 260762306a36Sopenharmony_ciSTATIC void 260862306a36Sopenharmony_cixlog_recover_cancel_intents( 260962306a36Sopenharmony_ci struct xlog *log) 261062306a36Sopenharmony_ci{ 261162306a36Sopenharmony_ci struct xfs_log_item *lip; 261262306a36Sopenharmony_ci struct xfs_ail_cursor cur; 261362306a36Sopenharmony_ci struct xfs_ail *ailp; 261462306a36Sopenharmony_ci 261562306a36Sopenharmony_ci ailp = log->l_ailp; 261662306a36Sopenharmony_ci spin_lock(&ailp->ail_lock); 261762306a36Sopenharmony_ci lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); 261862306a36Sopenharmony_ci while (lip != NULL) { 261962306a36Sopenharmony_ci if (!xlog_item_is_intent(lip)) 262062306a36Sopenharmony_ci break; 262162306a36Sopenharmony_ci 262262306a36Sopenharmony_ci spin_unlock(&ailp->ail_lock); 262362306a36Sopenharmony_ci lip->li_ops->iop_release(lip); 262462306a36Sopenharmony_ci spin_lock(&ailp->ail_lock); 262562306a36Sopenharmony_ci lip = xfs_trans_ail_cursor_next(ailp, &cur); 262662306a36Sopenharmony_ci } 262762306a36Sopenharmony_ci 262862306a36Sopenharmony_ci xfs_trans_ail_cursor_done(&cur); 262962306a36Sopenharmony_ci spin_unlock(&ailp->ail_lock); 263062306a36Sopenharmony_ci} 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci/* 263362306a36Sopenharmony_ci * This routine performs a transaction to null out a bad inode pointer 263462306a36Sopenharmony_ci * in an agi unlinked inode hash bucket. 263562306a36Sopenharmony_ci */ 263662306a36Sopenharmony_ciSTATIC void 263762306a36Sopenharmony_cixlog_recover_clear_agi_bucket( 263862306a36Sopenharmony_ci struct xfs_perag *pag, 263962306a36Sopenharmony_ci int bucket) 264062306a36Sopenharmony_ci{ 264162306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 264262306a36Sopenharmony_ci struct xfs_trans *tp; 264362306a36Sopenharmony_ci struct xfs_agi *agi; 264462306a36Sopenharmony_ci struct xfs_buf *agibp; 264562306a36Sopenharmony_ci int offset; 264662306a36Sopenharmony_ci int error; 264762306a36Sopenharmony_ci 264862306a36Sopenharmony_ci error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp); 264962306a36Sopenharmony_ci if (error) 265062306a36Sopenharmony_ci goto out_error; 265162306a36Sopenharmony_ci 265262306a36Sopenharmony_ci error = xfs_read_agi(pag, tp, &agibp); 265362306a36Sopenharmony_ci if (error) 265462306a36Sopenharmony_ci goto out_abort; 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci agi = agibp->b_addr; 265762306a36Sopenharmony_ci agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); 265862306a36Sopenharmony_ci offset = offsetof(xfs_agi_t, agi_unlinked) + 265962306a36Sopenharmony_ci (sizeof(xfs_agino_t) * bucket); 266062306a36Sopenharmony_ci xfs_trans_log_buf(tp, agibp, offset, 266162306a36Sopenharmony_ci (offset + sizeof(xfs_agino_t) - 1)); 266262306a36Sopenharmony_ci 266362306a36Sopenharmony_ci error = xfs_trans_commit(tp); 266462306a36Sopenharmony_ci if (error) 266562306a36Sopenharmony_ci goto out_error; 266662306a36Sopenharmony_ci return; 266762306a36Sopenharmony_ci 266862306a36Sopenharmony_ciout_abort: 266962306a36Sopenharmony_ci xfs_trans_cancel(tp); 267062306a36Sopenharmony_ciout_error: 267162306a36Sopenharmony_ci xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, 267262306a36Sopenharmony_ci pag->pag_agno); 267362306a36Sopenharmony_ci return; 267462306a36Sopenharmony_ci} 267562306a36Sopenharmony_ci 267662306a36Sopenharmony_cistatic int 267762306a36Sopenharmony_cixlog_recover_iunlink_bucket( 267862306a36Sopenharmony_ci struct xfs_perag *pag, 267962306a36Sopenharmony_ci struct xfs_agi *agi, 268062306a36Sopenharmony_ci int bucket) 268162306a36Sopenharmony_ci{ 268262306a36Sopenharmony_ci struct xfs_mount *mp = pag->pag_mount; 268362306a36Sopenharmony_ci struct xfs_inode *prev_ip = NULL; 268462306a36Sopenharmony_ci struct xfs_inode *ip; 268562306a36Sopenharmony_ci xfs_agino_t prev_agino, agino; 268662306a36Sopenharmony_ci int error = 0; 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci agino = be32_to_cpu(agi->agi_unlinked[bucket]); 268962306a36Sopenharmony_ci while (agino != NULLAGINO) { 269062306a36Sopenharmony_ci error = xfs_iget(mp, NULL, 269162306a36Sopenharmony_ci XFS_AGINO_TO_INO(mp, pag->pag_agno, agino), 269262306a36Sopenharmony_ci 0, 0, &ip); 269362306a36Sopenharmony_ci if (error) 269462306a36Sopenharmony_ci break; 269562306a36Sopenharmony_ci 269662306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_nlink == 0); 269762306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_mode != 0); 269862306a36Sopenharmony_ci xfs_iflags_clear(ip, XFS_IRECOVERY); 269962306a36Sopenharmony_ci agino = ip->i_next_unlinked; 270062306a36Sopenharmony_ci 270162306a36Sopenharmony_ci if (prev_ip) { 270262306a36Sopenharmony_ci ip->i_prev_unlinked = prev_agino; 270362306a36Sopenharmony_ci xfs_irele(prev_ip); 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci /* 270662306a36Sopenharmony_ci * Ensure the inode is removed from the unlinked list 270762306a36Sopenharmony_ci * before we continue so that it won't race with 270862306a36Sopenharmony_ci * building the in-memory list here. This could be 270962306a36Sopenharmony_ci * serialised with the agibp lock, but that just 271062306a36Sopenharmony_ci * serialises via lockstepping and it's much simpler 271162306a36Sopenharmony_ci * just to flush the inodegc queue and wait for it to 271262306a36Sopenharmony_ci * complete. 271362306a36Sopenharmony_ci */ 271462306a36Sopenharmony_ci error = xfs_inodegc_flush(mp); 271562306a36Sopenharmony_ci if (error) 271662306a36Sopenharmony_ci break; 271762306a36Sopenharmony_ci } 271862306a36Sopenharmony_ci 271962306a36Sopenharmony_ci prev_agino = agino; 272062306a36Sopenharmony_ci prev_ip = ip; 272162306a36Sopenharmony_ci } 272262306a36Sopenharmony_ci 272362306a36Sopenharmony_ci if (prev_ip) { 272462306a36Sopenharmony_ci int error2; 272562306a36Sopenharmony_ci 272662306a36Sopenharmony_ci ip->i_prev_unlinked = prev_agino; 272762306a36Sopenharmony_ci xfs_irele(prev_ip); 272862306a36Sopenharmony_ci 272962306a36Sopenharmony_ci error2 = xfs_inodegc_flush(mp); 273062306a36Sopenharmony_ci if (error2 && !error) 273162306a36Sopenharmony_ci return error2; 273262306a36Sopenharmony_ci } 273362306a36Sopenharmony_ci return error; 273462306a36Sopenharmony_ci} 273562306a36Sopenharmony_ci 273662306a36Sopenharmony_ci/* 273762306a36Sopenharmony_ci * Recover AGI unlinked lists 273862306a36Sopenharmony_ci * 273962306a36Sopenharmony_ci * This is called during recovery to process any inodes which we unlinked but 274062306a36Sopenharmony_ci * not freed when the system crashed. These inodes will be on the lists in the 274162306a36Sopenharmony_ci * AGI blocks. What we do here is scan all the AGIs and fully truncate and free 274262306a36Sopenharmony_ci * any inodes found on the lists. Each inode is removed from the lists when it 274362306a36Sopenharmony_ci * has been fully truncated and is freed. The freeing of the inode and its 274462306a36Sopenharmony_ci * removal from the list must be atomic. 274562306a36Sopenharmony_ci * 274662306a36Sopenharmony_ci * If everything we touch in the agi processing loop is already in memory, this 274762306a36Sopenharmony_ci * loop can hold the cpu for a long time. It runs without lock contention, 274862306a36Sopenharmony_ci * memory allocation contention, the need wait for IO, etc, and so will run 274962306a36Sopenharmony_ci * until we either run out of inodes to process, run low on memory or we run out 275062306a36Sopenharmony_ci * of log space. 275162306a36Sopenharmony_ci * 275262306a36Sopenharmony_ci * This behaviour is bad for latency on single CPU and non-preemptible kernels, 275362306a36Sopenharmony_ci * and can prevent other filesystem work (such as CIL pushes) from running. This 275462306a36Sopenharmony_ci * can lead to deadlocks if the recovery process runs out of log reservation 275562306a36Sopenharmony_ci * space. Hence we need to yield the CPU when there is other kernel work 275662306a36Sopenharmony_ci * scheduled on this CPU to ensure other scheduled work can run without undue 275762306a36Sopenharmony_ci * latency. 275862306a36Sopenharmony_ci */ 275962306a36Sopenharmony_cistatic void 276062306a36Sopenharmony_cixlog_recover_iunlink_ag( 276162306a36Sopenharmony_ci struct xfs_perag *pag) 276262306a36Sopenharmony_ci{ 276362306a36Sopenharmony_ci struct xfs_agi *agi; 276462306a36Sopenharmony_ci struct xfs_buf *agibp; 276562306a36Sopenharmony_ci int bucket; 276662306a36Sopenharmony_ci int error; 276762306a36Sopenharmony_ci 276862306a36Sopenharmony_ci error = xfs_read_agi(pag, NULL, &agibp); 276962306a36Sopenharmony_ci if (error) { 277062306a36Sopenharmony_ci /* 277162306a36Sopenharmony_ci * AGI is b0rked. Don't process it. 277262306a36Sopenharmony_ci * 277362306a36Sopenharmony_ci * We should probably mark the filesystem as corrupt after we've 277462306a36Sopenharmony_ci * recovered all the ag's we can.... 277562306a36Sopenharmony_ci */ 277662306a36Sopenharmony_ci return; 277762306a36Sopenharmony_ci } 277862306a36Sopenharmony_ci 277962306a36Sopenharmony_ci /* 278062306a36Sopenharmony_ci * Unlock the buffer so that it can be acquired in the normal course of 278162306a36Sopenharmony_ci * the transaction to truncate and free each inode. Because we are not 278262306a36Sopenharmony_ci * racing with anyone else here for the AGI buffer, we don't even need 278362306a36Sopenharmony_ci * to hold it locked to read the initial unlinked bucket entries out of 278462306a36Sopenharmony_ci * the buffer. We keep buffer reference though, so that it stays pinned 278562306a36Sopenharmony_ci * in memory while we need the buffer. 278662306a36Sopenharmony_ci */ 278762306a36Sopenharmony_ci agi = agibp->b_addr; 278862306a36Sopenharmony_ci xfs_buf_unlock(agibp); 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { 279162306a36Sopenharmony_ci error = xlog_recover_iunlink_bucket(pag, agi, bucket); 279262306a36Sopenharmony_ci if (error) { 279362306a36Sopenharmony_ci /* 279462306a36Sopenharmony_ci * Bucket is unrecoverable, so only a repair scan can 279562306a36Sopenharmony_ci * free the remaining unlinked inodes. Just empty the 279662306a36Sopenharmony_ci * bucket and remaining inodes on it unreferenced and 279762306a36Sopenharmony_ci * unfreeable. 279862306a36Sopenharmony_ci */ 279962306a36Sopenharmony_ci xlog_recover_clear_agi_bucket(pag, bucket); 280062306a36Sopenharmony_ci } 280162306a36Sopenharmony_ci } 280262306a36Sopenharmony_ci 280362306a36Sopenharmony_ci xfs_buf_rele(agibp); 280462306a36Sopenharmony_ci} 280562306a36Sopenharmony_ci 280662306a36Sopenharmony_cistatic void 280762306a36Sopenharmony_cixlog_recover_process_iunlinks( 280862306a36Sopenharmony_ci struct xlog *log) 280962306a36Sopenharmony_ci{ 281062306a36Sopenharmony_ci struct xfs_perag *pag; 281162306a36Sopenharmony_ci xfs_agnumber_t agno; 281262306a36Sopenharmony_ci 281362306a36Sopenharmony_ci for_each_perag(log->l_mp, agno, pag) 281462306a36Sopenharmony_ci xlog_recover_iunlink_ag(pag); 281562306a36Sopenharmony_ci} 281662306a36Sopenharmony_ci 281762306a36Sopenharmony_ciSTATIC void 281862306a36Sopenharmony_cixlog_unpack_data( 281962306a36Sopenharmony_ci struct xlog_rec_header *rhead, 282062306a36Sopenharmony_ci char *dp, 282162306a36Sopenharmony_ci struct xlog *log) 282262306a36Sopenharmony_ci{ 282362306a36Sopenharmony_ci int i, j, k; 282462306a36Sopenharmony_ci 282562306a36Sopenharmony_ci for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 282662306a36Sopenharmony_ci i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 282762306a36Sopenharmony_ci *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; 282862306a36Sopenharmony_ci dp += BBSIZE; 282962306a36Sopenharmony_ci } 283062306a36Sopenharmony_ci 283162306a36Sopenharmony_ci if (xfs_has_logv2(log->l_mp)) { 283262306a36Sopenharmony_ci xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; 283362306a36Sopenharmony_ci for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { 283462306a36Sopenharmony_ci j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 283562306a36Sopenharmony_ci k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); 283662306a36Sopenharmony_ci *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; 283762306a36Sopenharmony_ci dp += BBSIZE; 283862306a36Sopenharmony_ci } 283962306a36Sopenharmony_ci } 284062306a36Sopenharmony_ci} 284162306a36Sopenharmony_ci 284262306a36Sopenharmony_ci/* 284362306a36Sopenharmony_ci * CRC check, unpack and process a log record. 284462306a36Sopenharmony_ci */ 284562306a36Sopenharmony_ciSTATIC int 284662306a36Sopenharmony_cixlog_recover_process( 284762306a36Sopenharmony_ci struct xlog *log, 284862306a36Sopenharmony_ci struct hlist_head rhash[], 284962306a36Sopenharmony_ci struct xlog_rec_header *rhead, 285062306a36Sopenharmony_ci char *dp, 285162306a36Sopenharmony_ci int pass, 285262306a36Sopenharmony_ci struct list_head *buffer_list) 285362306a36Sopenharmony_ci{ 285462306a36Sopenharmony_ci __le32 old_crc = rhead->h_crc; 285562306a36Sopenharmony_ci __le32 crc; 285662306a36Sopenharmony_ci 285762306a36Sopenharmony_ci crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); 285862306a36Sopenharmony_ci 285962306a36Sopenharmony_ci /* 286062306a36Sopenharmony_ci * Nothing else to do if this is a CRC verification pass. Just return 286162306a36Sopenharmony_ci * if this a record with a non-zero crc. Unfortunately, mkfs always 286262306a36Sopenharmony_ci * sets old_crc to 0 so we must consider this valid even on v5 supers. 286362306a36Sopenharmony_ci * Otherwise, return EFSBADCRC on failure so the callers up the stack 286462306a36Sopenharmony_ci * know precisely what failed. 286562306a36Sopenharmony_ci */ 286662306a36Sopenharmony_ci if (pass == XLOG_RECOVER_CRCPASS) { 286762306a36Sopenharmony_ci if (old_crc && crc != old_crc) 286862306a36Sopenharmony_ci return -EFSBADCRC; 286962306a36Sopenharmony_ci return 0; 287062306a36Sopenharmony_ci } 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci /* 287362306a36Sopenharmony_ci * We're in the normal recovery path. Issue a warning if and only if the 287462306a36Sopenharmony_ci * CRC in the header is non-zero. This is an advisory warning and the 287562306a36Sopenharmony_ci * zero CRC check prevents warnings from being emitted when upgrading 287662306a36Sopenharmony_ci * the kernel from one that does not add CRCs by default. 287762306a36Sopenharmony_ci */ 287862306a36Sopenharmony_ci if (crc != old_crc) { 287962306a36Sopenharmony_ci if (old_crc || xfs_has_crc(log->l_mp)) { 288062306a36Sopenharmony_ci xfs_alert(log->l_mp, 288162306a36Sopenharmony_ci "log record CRC mismatch: found 0x%x, expected 0x%x.", 288262306a36Sopenharmony_ci le32_to_cpu(old_crc), 288362306a36Sopenharmony_ci le32_to_cpu(crc)); 288462306a36Sopenharmony_ci xfs_hex_dump(dp, 32); 288562306a36Sopenharmony_ci } 288662306a36Sopenharmony_ci 288762306a36Sopenharmony_ci /* 288862306a36Sopenharmony_ci * If the filesystem is CRC enabled, this mismatch becomes a 288962306a36Sopenharmony_ci * fatal log corruption failure. 289062306a36Sopenharmony_ci */ 289162306a36Sopenharmony_ci if (xfs_has_crc(log->l_mp)) { 289262306a36Sopenharmony_ci XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); 289362306a36Sopenharmony_ci return -EFSCORRUPTED; 289462306a36Sopenharmony_ci } 289562306a36Sopenharmony_ci } 289662306a36Sopenharmony_ci 289762306a36Sopenharmony_ci xlog_unpack_data(rhead, dp, log); 289862306a36Sopenharmony_ci 289962306a36Sopenharmony_ci return xlog_recover_process_data(log, rhash, rhead, dp, pass, 290062306a36Sopenharmony_ci buffer_list); 290162306a36Sopenharmony_ci} 290262306a36Sopenharmony_ci 290362306a36Sopenharmony_ciSTATIC int 290462306a36Sopenharmony_cixlog_valid_rec_header( 290562306a36Sopenharmony_ci struct xlog *log, 290662306a36Sopenharmony_ci struct xlog_rec_header *rhead, 290762306a36Sopenharmony_ci xfs_daddr_t blkno, 290862306a36Sopenharmony_ci int bufsize) 290962306a36Sopenharmony_ci{ 291062306a36Sopenharmony_ci int hlen; 291162306a36Sopenharmony_ci 291262306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, 291362306a36Sopenharmony_ci rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) 291462306a36Sopenharmony_ci return -EFSCORRUPTED; 291562306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, 291662306a36Sopenharmony_ci (!rhead->h_version || 291762306a36Sopenharmony_ci (be32_to_cpu(rhead->h_version) & 291862306a36Sopenharmony_ci (~XLOG_VERSION_OKBITS))))) { 291962306a36Sopenharmony_ci xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", 292062306a36Sopenharmony_ci __func__, be32_to_cpu(rhead->h_version)); 292162306a36Sopenharmony_ci return -EFSCORRUPTED; 292262306a36Sopenharmony_ci } 292362306a36Sopenharmony_ci 292462306a36Sopenharmony_ci /* 292562306a36Sopenharmony_ci * LR body must have data (or it wouldn't have been written) 292662306a36Sopenharmony_ci * and h_len must not be greater than LR buffer size. 292762306a36Sopenharmony_ci */ 292862306a36Sopenharmony_ci hlen = be32_to_cpu(rhead->h_len); 292962306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) 293062306a36Sopenharmony_ci return -EFSCORRUPTED; 293162306a36Sopenharmony_ci 293262306a36Sopenharmony_ci if (XFS_IS_CORRUPT(log->l_mp, 293362306a36Sopenharmony_ci blkno > log->l_logBBsize || blkno > INT_MAX)) 293462306a36Sopenharmony_ci return -EFSCORRUPTED; 293562306a36Sopenharmony_ci return 0; 293662306a36Sopenharmony_ci} 293762306a36Sopenharmony_ci 293862306a36Sopenharmony_ci/* 293962306a36Sopenharmony_ci * Read the log from tail to head and process the log records found. 294062306a36Sopenharmony_ci * Handle the two cases where the tail and head are in the same cycle 294162306a36Sopenharmony_ci * and where the active portion of the log wraps around the end of 294262306a36Sopenharmony_ci * the physical log separately. The pass parameter is passed through 294362306a36Sopenharmony_ci * to the routines called to process the data and is not looked at 294462306a36Sopenharmony_ci * here. 294562306a36Sopenharmony_ci */ 294662306a36Sopenharmony_ciSTATIC int 294762306a36Sopenharmony_cixlog_do_recovery_pass( 294862306a36Sopenharmony_ci struct xlog *log, 294962306a36Sopenharmony_ci xfs_daddr_t head_blk, 295062306a36Sopenharmony_ci xfs_daddr_t tail_blk, 295162306a36Sopenharmony_ci int pass, 295262306a36Sopenharmony_ci xfs_daddr_t *first_bad) /* out: first bad log rec */ 295362306a36Sopenharmony_ci{ 295462306a36Sopenharmony_ci xlog_rec_header_t *rhead; 295562306a36Sopenharmony_ci xfs_daddr_t blk_no, rblk_no; 295662306a36Sopenharmony_ci xfs_daddr_t rhead_blk; 295762306a36Sopenharmony_ci char *offset; 295862306a36Sopenharmony_ci char *hbp, *dbp; 295962306a36Sopenharmony_ci int error = 0, h_size, h_len; 296062306a36Sopenharmony_ci int error2 = 0; 296162306a36Sopenharmony_ci int bblks, split_bblks; 296262306a36Sopenharmony_ci int hblks, split_hblks, wrapped_hblks; 296362306a36Sopenharmony_ci int i; 296462306a36Sopenharmony_ci struct hlist_head rhash[XLOG_RHASH_SIZE]; 296562306a36Sopenharmony_ci LIST_HEAD (buffer_list); 296662306a36Sopenharmony_ci 296762306a36Sopenharmony_ci ASSERT(head_blk != tail_blk); 296862306a36Sopenharmony_ci blk_no = rhead_blk = tail_blk; 296962306a36Sopenharmony_ci 297062306a36Sopenharmony_ci for (i = 0; i < XLOG_RHASH_SIZE; i++) 297162306a36Sopenharmony_ci INIT_HLIST_HEAD(&rhash[i]); 297262306a36Sopenharmony_ci 297362306a36Sopenharmony_ci /* 297462306a36Sopenharmony_ci * Read the header of the tail block and get the iclog buffer size from 297562306a36Sopenharmony_ci * h_size. Use this to tell how many sectors make up the log header. 297662306a36Sopenharmony_ci */ 297762306a36Sopenharmony_ci if (xfs_has_logv2(log->l_mp)) { 297862306a36Sopenharmony_ci /* 297962306a36Sopenharmony_ci * When using variable length iclogs, read first sector of 298062306a36Sopenharmony_ci * iclog header and extract the header size from it. Get a 298162306a36Sopenharmony_ci * new hbp that is the correct size. 298262306a36Sopenharmony_ci */ 298362306a36Sopenharmony_ci hbp = xlog_alloc_buffer(log, 1); 298462306a36Sopenharmony_ci if (!hbp) 298562306a36Sopenharmony_ci return -ENOMEM; 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_ci error = xlog_bread(log, tail_blk, 1, hbp, &offset); 298862306a36Sopenharmony_ci if (error) 298962306a36Sopenharmony_ci goto bread_err1; 299062306a36Sopenharmony_ci 299162306a36Sopenharmony_ci rhead = (xlog_rec_header_t *)offset; 299262306a36Sopenharmony_ci 299362306a36Sopenharmony_ci /* 299462306a36Sopenharmony_ci * xfsprogs has a bug where record length is based on lsunit but 299562306a36Sopenharmony_ci * h_size (iclog size) is hardcoded to 32k. Now that we 299662306a36Sopenharmony_ci * unconditionally CRC verify the unmount record, this means the 299762306a36Sopenharmony_ci * log buffer can be too small for the record and cause an 299862306a36Sopenharmony_ci * overrun. 299962306a36Sopenharmony_ci * 300062306a36Sopenharmony_ci * Detect this condition here. Use lsunit for the buffer size as 300162306a36Sopenharmony_ci * long as this looks like the mkfs case. Otherwise, return an 300262306a36Sopenharmony_ci * error to avoid a buffer overrun. 300362306a36Sopenharmony_ci */ 300462306a36Sopenharmony_ci h_size = be32_to_cpu(rhead->h_size); 300562306a36Sopenharmony_ci h_len = be32_to_cpu(rhead->h_len); 300662306a36Sopenharmony_ci if (h_len > h_size && h_len <= log->l_mp->m_logbsize && 300762306a36Sopenharmony_ci rhead->h_num_logops == cpu_to_be32(1)) { 300862306a36Sopenharmony_ci xfs_warn(log->l_mp, 300962306a36Sopenharmony_ci "invalid iclog size (%d bytes), using lsunit (%d bytes)", 301062306a36Sopenharmony_ci h_size, log->l_mp->m_logbsize); 301162306a36Sopenharmony_ci h_size = log->l_mp->m_logbsize; 301262306a36Sopenharmony_ci } 301362306a36Sopenharmony_ci 301462306a36Sopenharmony_ci error = xlog_valid_rec_header(log, rhead, tail_blk, h_size); 301562306a36Sopenharmony_ci if (error) 301662306a36Sopenharmony_ci goto bread_err1; 301762306a36Sopenharmony_ci 301862306a36Sopenharmony_ci hblks = xlog_logrec_hblks(log, rhead); 301962306a36Sopenharmony_ci if (hblks != 1) { 302062306a36Sopenharmony_ci kmem_free(hbp); 302162306a36Sopenharmony_ci hbp = xlog_alloc_buffer(log, hblks); 302262306a36Sopenharmony_ci } 302362306a36Sopenharmony_ci } else { 302462306a36Sopenharmony_ci ASSERT(log->l_sectBBsize == 1); 302562306a36Sopenharmony_ci hblks = 1; 302662306a36Sopenharmony_ci hbp = xlog_alloc_buffer(log, 1); 302762306a36Sopenharmony_ci h_size = XLOG_BIG_RECORD_BSIZE; 302862306a36Sopenharmony_ci } 302962306a36Sopenharmony_ci 303062306a36Sopenharmony_ci if (!hbp) 303162306a36Sopenharmony_ci return -ENOMEM; 303262306a36Sopenharmony_ci dbp = xlog_alloc_buffer(log, BTOBB(h_size)); 303362306a36Sopenharmony_ci if (!dbp) { 303462306a36Sopenharmony_ci kmem_free(hbp); 303562306a36Sopenharmony_ci return -ENOMEM; 303662306a36Sopenharmony_ci } 303762306a36Sopenharmony_ci 303862306a36Sopenharmony_ci memset(rhash, 0, sizeof(rhash)); 303962306a36Sopenharmony_ci if (tail_blk > head_blk) { 304062306a36Sopenharmony_ci /* 304162306a36Sopenharmony_ci * Perform recovery around the end of the physical log. 304262306a36Sopenharmony_ci * When the head is not on the same cycle number as the tail, 304362306a36Sopenharmony_ci * we can't do a sequential recovery. 304462306a36Sopenharmony_ci */ 304562306a36Sopenharmony_ci while (blk_no < log->l_logBBsize) { 304662306a36Sopenharmony_ci /* 304762306a36Sopenharmony_ci * Check for header wrapping around physical end-of-log 304862306a36Sopenharmony_ci */ 304962306a36Sopenharmony_ci offset = hbp; 305062306a36Sopenharmony_ci split_hblks = 0; 305162306a36Sopenharmony_ci wrapped_hblks = 0; 305262306a36Sopenharmony_ci if (blk_no + hblks <= log->l_logBBsize) { 305362306a36Sopenharmony_ci /* Read header in one read */ 305462306a36Sopenharmony_ci error = xlog_bread(log, blk_no, hblks, hbp, 305562306a36Sopenharmony_ci &offset); 305662306a36Sopenharmony_ci if (error) 305762306a36Sopenharmony_ci goto bread_err2; 305862306a36Sopenharmony_ci } else { 305962306a36Sopenharmony_ci /* This LR is split across physical log end */ 306062306a36Sopenharmony_ci if (blk_no != log->l_logBBsize) { 306162306a36Sopenharmony_ci /* some data before physical log end */ 306262306a36Sopenharmony_ci ASSERT(blk_no <= INT_MAX); 306362306a36Sopenharmony_ci split_hblks = log->l_logBBsize - (int)blk_no; 306462306a36Sopenharmony_ci ASSERT(split_hblks > 0); 306562306a36Sopenharmony_ci error = xlog_bread(log, blk_no, 306662306a36Sopenharmony_ci split_hblks, hbp, 306762306a36Sopenharmony_ci &offset); 306862306a36Sopenharmony_ci if (error) 306962306a36Sopenharmony_ci goto bread_err2; 307062306a36Sopenharmony_ci } 307162306a36Sopenharmony_ci 307262306a36Sopenharmony_ci /* 307362306a36Sopenharmony_ci * Note: this black magic still works with 307462306a36Sopenharmony_ci * large sector sizes (non-512) only because: 307562306a36Sopenharmony_ci * - we increased the buffer size originally 307662306a36Sopenharmony_ci * by 1 sector giving us enough extra space 307762306a36Sopenharmony_ci * for the second read; 307862306a36Sopenharmony_ci * - the log start is guaranteed to be sector 307962306a36Sopenharmony_ci * aligned; 308062306a36Sopenharmony_ci * - we read the log end (LR header start) 308162306a36Sopenharmony_ci * _first_, then the log start (LR header end) 308262306a36Sopenharmony_ci * - order is important. 308362306a36Sopenharmony_ci */ 308462306a36Sopenharmony_ci wrapped_hblks = hblks - split_hblks; 308562306a36Sopenharmony_ci error = xlog_bread_noalign(log, 0, 308662306a36Sopenharmony_ci wrapped_hblks, 308762306a36Sopenharmony_ci offset + BBTOB(split_hblks)); 308862306a36Sopenharmony_ci if (error) 308962306a36Sopenharmony_ci goto bread_err2; 309062306a36Sopenharmony_ci } 309162306a36Sopenharmony_ci rhead = (xlog_rec_header_t *)offset; 309262306a36Sopenharmony_ci error = xlog_valid_rec_header(log, rhead, 309362306a36Sopenharmony_ci split_hblks ? blk_no : 0, h_size); 309462306a36Sopenharmony_ci if (error) 309562306a36Sopenharmony_ci goto bread_err2; 309662306a36Sopenharmony_ci 309762306a36Sopenharmony_ci bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 309862306a36Sopenharmony_ci blk_no += hblks; 309962306a36Sopenharmony_ci 310062306a36Sopenharmony_ci /* 310162306a36Sopenharmony_ci * Read the log record data in multiple reads if it 310262306a36Sopenharmony_ci * wraps around the end of the log. Note that if the 310362306a36Sopenharmony_ci * header already wrapped, blk_no could point past the 310462306a36Sopenharmony_ci * end of the log. The record data is contiguous in 310562306a36Sopenharmony_ci * that case. 310662306a36Sopenharmony_ci */ 310762306a36Sopenharmony_ci if (blk_no + bblks <= log->l_logBBsize || 310862306a36Sopenharmony_ci blk_no >= log->l_logBBsize) { 310962306a36Sopenharmony_ci rblk_no = xlog_wrap_logbno(log, blk_no); 311062306a36Sopenharmony_ci error = xlog_bread(log, rblk_no, bblks, dbp, 311162306a36Sopenharmony_ci &offset); 311262306a36Sopenharmony_ci if (error) 311362306a36Sopenharmony_ci goto bread_err2; 311462306a36Sopenharmony_ci } else { 311562306a36Sopenharmony_ci /* This log record is split across the 311662306a36Sopenharmony_ci * physical end of log */ 311762306a36Sopenharmony_ci offset = dbp; 311862306a36Sopenharmony_ci split_bblks = 0; 311962306a36Sopenharmony_ci if (blk_no != log->l_logBBsize) { 312062306a36Sopenharmony_ci /* some data is before the physical 312162306a36Sopenharmony_ci * end of log */ 312262306a36Sopenharmony_ci ASSERT(!wrapped_hblks); 312362306a36Sopenharmony_ci ASSERT(blk_no <= INT_MAX); 312462306a36Sopenharmony_ci split_bblks = 312562306a36Sopenharmony_ci log->l_logBBsize - (int)blk_no; 312662306a36Sopenharmony_ci ASSERT(split_bblks > 0); 312762306a36Sopenharmony_ci error = xlog_bread(log, blk_no, 312862306a36Sopenharmony_ci split_bblks, dbp, 312962306a36Sopenharmony_ci &offset); 313062306a36Sopenharmony_ci if (error) 313162306a36Sopenharmony_ci goto bread_err2; 313262306a36Sopenharmony_ci } 313362306a36Sopenharmony_ci 313462306a36Sopenharmony_ci /* 313562306a36Sopenharmony_ci * Note: this black magic still works with 313662306a36Sopenharmony_ci * large sector sizes (non-512) only because: 313762306a36Sopenharmony_ci * - we increased the buffer size originally 313862306a36Sopenharmony_ci * by 1 sector giving us enough extra space 313962306a36Sopenharmony_ci * for the second read; 314062306a36Sopenharmony_ci * - the log start is guaranteed to be sector 314162306a36Sopenharmony_ci * aligned; 314262306a36Sopenharmony_ci * - we read the log end (LR header start) 314362306a36Sopenharmony_ci * _first_, then the log start (LR header end) 314462306a36Sopenharmony_ci * - order is important. 314562306a36Sopenharmony_ci */ 314662306a36Sopenharmony_ci error = xlog_bread_noalign(log, 0, 314762306a36Sopenharmony_ci bblks - split_bblks, 314862306a36Sopenharmony_ci offset + BBTOB(split_bblks)); 314962306a36Sopenharmony_ci if (error) 315062306a36Sopenharmony_ci goto bread_err2; 315162306a36Sopenharmony_ci } 315262306a36Sopenharmony_ci 315362306a36Sopenharmony_ci error = xlog_recover_process(log, rhash, rhead, offset, 315462306a36Sopenharmony_ci pass, &buffer_list); 315562306a36Sopenharmony_ci if (error) 315662306a36Sopenharmony_ci goto bread_err2; 315762306a36Sopenharmony_ci 315862306a36Sopenharmony_ci blk_no += bblks; 315962306a36Sopenharmony_ci rhead_blk = blk_no; 316062306a36Sopenharmony_ci } 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_ci ASSERT(blk_no >= log->l_logBBsize); 316362306a36Sopenharmony_ci blk_no -= log->l_logBBsize; 316462306a36Sopenharmony_ci rhead_blk = blk_no; 316562306a36Sopenharmony_ci } 316662306a36Sopenharmony_ci 316762306a36Sopenharmony_ci /* read first part of physical log */ 316862306a36Sopenharmony_ci while (blk_no < head_blk) { 316962306a36Sopenharmony_ci error = xlog_bread(log, blk_no, hblks, hbp, &offset); 317062306a36Sopenharmony_ci if (error) 317162306a36Sopenharmony_ci goto bread_err2; 317262306a36Sopenharmony_ci 317362306a36Sopenharmony_ci rhead = (xlog_rec_header_t *)offset; 317462306a36Sopenharmony_ci error = xlog_valid_rec_header(log, rhead, blk_no, h_size); 317562306a36Sopenharmony_ci if (error) 317662306a36Sopenharmony_ci goto bread_err2; 317762306a36Sopenharmony_ci 317862306a36Sopenharmony_ci /* blocks in data section */ 317962306a36Sopenharmony_ci bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); 318062306a36Sopenharmony_ci error = xlog_bread(log, blk_no+hblks, bblks, dbp, 318162306a36Sopenharmony_ci &offset); 318262306a36Sopenharmony_ci if (error) 318362306a36Sopenharmony_ci goto bread_err2; 318462306a36Sopenharmony_ci 318562306a36Sopenharmony_ci error = xlog_recover_process(log, rhash, rhead, offset, pass, 318662306a36Sopenharmony_ci &buffer_list); 318762306a36Sopenharmony_ci if (error) 318862306a36Sopenharmony_ci goto bread_err2; 318962306a36Sopenharmony_ci 319062306a36Sopenharmony_ci blk_no += bblks + hblks; 319162306a36Sopenharmony_ci rhead_blk = blk_no; 319262306a36Sopenharmony_ci } 319362306a36Sopenharmony_ci 319462306a36Sopenharmony_ci bread_err2: 319562306a36Sopenharmony_ci kmem_free(dbp); 319662306a36Sopenharmony_ci bread_err1: 319762306a36Sopenharmony_ci kmem_free(hbp); 319862306a36Sopenharmony_ci 319962306a36Sopenharmony_ci /* 320062306a36Sopenharmony_ci * Submit buffers that have been added from the last record processed, 320162306a36Sopenharmony_ci * regardless of error status. 320262306a36Sopenharmony_ci */ 320362306a36Sopenharmony_ci if (!list_empty(&buffer_list)) 320462306a36Sopenharmony_ci error2 = xfs_buf_delwri_submit(&buffer_list); 320562306a36Sopenharmony_ci 320662306a36Sopenharmony_ci if (error && first_bad) 320762306a36Sopenharmony_ci *first_bad = rhead_blk; 320862306a36Sopenharmony_ci 320962306a36Sopenharmony_ci /* 321062306a36Sopenharmony_ci * Transactions are freed at commit time but transactions without commit 321162306a36Sopenharmony_ci * records on disk are never committed. Free any that may be left in the 321262306a36Sopenharmony_ci * hash table. 321362306a36Sopenharmony_ci */ 321462306a36Sopenharmony_ci for (i = 0; i < XLOG_RHASH_SIZE; i++) { 321562306a36Sopenharmony_ci struct hlist_node *tmp; 321662306a36Sopenharmony_ci struct xlog_recover *trans; 321762306a36Sopenharmony_ci 321862306a36Sopenharmony_ci hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list) 321962306a36Sopenharmony_ci xlog_recover_free_trans(trans); 322062306a36Sopenharmony_ci } 322162306a36Sopenharmony_ci 322262306a36Sopenharmony_ci return error ? error : error2; 322362306a36Sopenharmony_ci} 322462306a36Sopenharmony_ci 322562306a36Sopenharmony_ci/* 322662306a36Sopenharmony_ci * Do the recovery of the log. We actually do this in two phases. 322762306a36Sopenharmony_ci * The two passes are necessary in order to implement the function 322862306a36Sopenharmony_ci * of cancelling a record written into the log. The first pass 322962306a36Sopenharmony_ci * determines those things which have been cancelled, and the 323062306a36Sopenharmony_ci * second pass replays log items normally except for those which 323162306a36Sopenharmony_ci * have been cancelled. The handling of the replay and cancellations 323262306a36Sopenharmony_ci * takes place in the log item type specific routines. 323362306a36Sopenharmony_ci * 323462306a36Sopenharmony_ci * The table of items which have cancel records in the log is allocated 323562306a36Sopenharmony_ci * and freed at this level, since only here do we know when all of 323662306a36Sopenharmony_ci * the log recovery has been completed. 323762306a36Sopenharmony_ci */ 323862306a36Sopenharmony_ciSTATIC int 323962306a36Sopenharmony_cixlog_do_log_recovery( 324062306a36Sopenharmony_ci struct xlog *log, 324162306a36Sopenharmony_ci xfs_daddr_t head_blk, 324262306a36Sopenharmony_ci xfs_daddr_t tail_blk) 324362306a36Sopenharmony_ci{ 324462306a36Sopenharmony_ci int error; 324562306a36Sopenharmony_ci 324662306a36Sopenharmony_ci ASSERT(head_blk != tail_blk); 324762306a36Sopenharmony_ci 324862306a36Sopenharmony_ci /* 324962306a36Sopenharmony_ci * First do a pass to find all of the cancelled buf log items. 325062306a36Sopenharmony_ci * Store them in the buf_cancel_table for use in the second pass. 325162306a36Sopenharmony_ci */ 325262306a36Sopenharmony_ci error = xlog_alloc_buf_cancel_table(log); 325362306a36Sopenharmony_ci if (error) 325462306a36Sopenharmony_ci return error; 325562306a36Sopenharmony_ci 325662306a36Sopenharmony_ci error = xlog_do_recovery_pass(log, head_blk, tail_blk, 325762306a36Sopenharmony_ci XLOG_RECOVER_PASS1, NULL); 325862306a36Sopenharmony_ci if (error != 0) 325962306a36Sopenharmony_ci goto out_cancel; 326062306a36Sopenharmony_ci 326162306a36Sopenharmony_ci /* 326262306a36Sopenharmony_ci * Then do a second pass to actually recover the items in the log. 326362306a36Sopenharmony_ci * When it is complete free the table of buf cancel items. 326462306a36Sopenharmony_ci */ 326562306a36Sopenharmony_ci error = xlog_do_recovery_pass(log, head_blk, tail_blk, 326662306a36Sopenharmony_ci XLOG_RECOVER_PASS2, NULL); 326762306a36Sopenharmony_ci if (!error) 326862306a36Sopenharmony_ci xlog_check_buf_cancel_table(log); 326962306a36Sopenharmony_ciout_cancel: 327062306a36Sopenharmony_ci xlog_free_buf_cancel_table(log); 327162306a36Sopenharmony_ci return error; 327262306a36Sopenharmony_ci} 327362306a36Sopenharmony_ci 327462306a36Sopenharmony_ci/* 327562306a36Sopenharmony_ci * Do the actual recovery 327662306a36Sopenharmony_ci */ 327762306a36Sopenharmony_ciSTATIC int 327862306a36Sopenharmony_cixlog_do_recover( 327962306a36Sopenharmony_ci struct xlog *log, 328062306a36Sopenharmony_ci xfs_daddr_t head_blk, 328162306a36Sopenharmony_ci xfs_daddr_t tail_blk) 328262306a36Sopenharmony_ci{ 328362306a36Sopenharmony_ci struct xfs_mount *mp = log->l_mp; 328462306a36Sopenharmony_ci struct xfs_buf *bp = mp->m_sb_bp; 328562306a36Sopenharmony_ci struct xfs_sb *sbp = &mp->m_sb; 328662306a36Sopenharmony_ci int error; 328762306a36Sopenharmony_ci 328862306a36Sopenharmony_ci trace_xfs_log_recover(log, head_blk, tail_blk); 328962306a36Sopenharmony_ci 329062306a36Sopenharmony_ci /* 329162306a36Sopenharmony_ci * First replay the images in the log. 329262306a36Sopenharmony_ci */ 329362306a36Sopenharmony_ci error = xlog_do_log_recovery(log, head_blk, tail_blk); 329462306a36Sopenharmony_ci if (error) 329562306a36Sopenharmony_ci return error; 329662306a36Sopenharmony_ci 329762306a36Sopenharmony_ci if (xlog_is_shutdown(log)) 329862306a36Sopenharmony_ci return -EIO; 329962306a36Sopenharmony_ci 330062306a36Sopenharmony_ci /* 330162306a36Sopenharmony_ci * We now update the tail_lsn since much of the recovery has completed 330262306a36Sopenharmony_ci * and there may be space available to use. If there were no extent 330362306a36Sopenharmony_ci * or iunlinks, we can free up the entire log and set the tail_lsn to 330462306a36Sopenharmony_ci * be the last_sync_lsn. This was set in xlog_find_tail to be the 330562306a36Sopenharmony_ci * lsn of the last known good LR on disk. If there are extent frees 330662306a36Sopenharmony_ci * or iunlinks they will have some entries in the AIL; so we look at 330762306a36Sopenharmony_ci * the AIL to determine how to set the tail_lsn. 330862306a36Sopenharmony_ci */ 330962306a36Sopenharmony_ci xlog_assign_tail_lsn(mp); 331062306a36Sopenharmony_ci 331162306a36Sopenharmony_ci /* 331262306a36Sopenharmony_ci * Now that we've finished replaying all buffer and inode updates, 331362306a36Sopenharmony_ci * re-read the superblock and reverify it. 331462306a36Sopenharmony_ci */ 331562306a36Sopenharmony_ci xfs_buf_lock(bp); 331662306a36Sopenharmony_ci xfs_buf_hold(bp); 331762306a36Sopenharmony_ci error = _xfs_buf_read(bp, XBF_READ); 331862306a36Sopenharmony_ci if (error) { 331962306a36Sopenharmony_ci if (!xlog_is_shutdown(log)) { 332062306a36Sopenharmony_ci xfs_buf_ioerror_alert(bp, __this_address); 332162306a36Sopenharmony_ci ASSERT(0); 332262306a36Sopenharmony_ci } 332362306a36Sopenharmony_ci xfs_buf_relse(bp); 332462306a36Sopenharmony_ci return error; 332562306a36Sopenharmony_ci } 332662306a36Sopenharmony_ci 332762306a36Sopenharmony_ci /* Convert superblock from on-disk format */ 332862306a36Sopenharmony_ci xfs_sb_from_disk(sbp, bp->b_addr); 332962306a36Sopenharmony_ci xfs_buf_relse(bp); 333062306a36Sopenharmony_ci 333162306a36Sopenharmony_ci /* re-initialise in-core superblock and geometry structures */ 333262306a36Sopenharmony_ci mp->m_features |= xfs_sb_version_to_features(sbp); 333362306a36Sopenharmony_ci xfs_reinit_percpu_counters(mp); 333462306a36Sopenharmony_ci error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, 333562306a36Sopenharmony_ci &mp->m_maxagi); 333662306a36Sopenharmony_ci if (error) { 333762306a36Sopenharmony_ci xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); 333862306a36Sopenharmony_ci return error; 333962306a36Sopenharmony_ci } 334062306a36Sopenharmony_ci mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 334162306a36Sopenharmony_ci 334262306a36Sopenharmony_ci /* Normal transactions can now occur */ 334362306a36Sopenharmony_ci clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); 334462306a36Sopenharmony_ci return 0; 334562306a36Sopenharmony_ci} 334662306a36Sopenharmony_ci 334762306a36Sopenharmony_ci/* 334862306a36Sopenharmony_ci * Perform recovery and re-initialize some log variables in xlog_find_tail. 334962306a36Sopenharmony_ci * 335062306a36Sopenharmony_ci * Return error or zero. 335162306a36Sopenharmony_ci */ 335262306a36Sopenharmony_ciint 335362306a36Sopenharmony_cixlog_recover( 335462306a36Sopenharmony_ci struct xlog *log) 335562306a36Sopenharmony_ci{ 335662306a36Sopenharmony_ci xfs_daddr_t head_blk, tail_blk; 335762306a36Sopenharmony_ci int error; 335862306a36Sopenharmony_ci 335962306a36Sopenharmony_ci /* find the tail of the log */ 336062306a36Sopenharmony_ci error = xlog_find_tail(log, &head_blk, &tail_blk); 336162306a36Sopenharmony_ci if (error) 336262306a36Sopenharmony_ci return error; 336362306a36Sopenharmony_ci 336462306a36Sopenharmony_ci /* 336562306a36Sopenharmony_ci * The superblock was read before the log was available and thus the LSN 336662306a36Sopenharmony_ci * could not be verified. Check the superblock LSN against the current 336762306a36Sopenharmony_ci * LSN now that it's known. 336862306a36Sopenharmony_ci */ 336962306a36Sopenharmony_ci if (xfs_has_crc(log->l_mp) && 337062306a36Sopenharmony_ci !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) 337162306a36Sopenharmony_ci return -EINVAL; 337262306a36Sopenharmony_ci 337362306a36Sopenharmony_ci if (tail_blk != head_blk) { 337462306a36Sopenharmony_ci /* There used to be a comment here: 337562306a36Sopenharmony_ci * 337662306a36Sopenharmony_ci * disallow recovery on read-only mounts. note -- mount 337762306a36Sopenharmony_ci * checks for ENOSPC and turns it into an intelligent 337862306a36Sopenharmony_ci * error message. 337962306a36Sopenharmony_ci * ...but this is no longer true. Now, unless you specify 338062306a36Sopenharmony_ci * NORECOVERY (in which case this function would never be 338162306a36Sopenharmony_ci * called), we just go ahead and recover. We do this all 338262306a36Sopenharmony_ci * under the vfs layer, so we can get away with it unless 338362306a36Sopenharmony_ci * the device itself is read-only, in which case we fail. 338462306a36Sopenharmony_ci */ 338562306a36Sopenharmony_ci if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { 338662306a36Sopenharmony_ci return error; 338762306a36Sopenharmony_ci } 338862306a36Sopenharmony_ci 338962306a36Sopenharmony_ci /* 339062306a36Sopenharmony_ci * Version 5 superblock log feature mask validation. We know the 339162306a36Sopenharmony_ci * log is dirty so check if there are any unknown log features 339262306a36Sopenharmony_ci * in what we need to recover. If there are unknown features 339362306a36Sopenharmony_ci * (e.g. unsupported transactions, then simply reject the 339462306a36Sopenharmony_ci * attempt at recovery before touching anything. 339562306a36Sopenharmony_ci */ 339662306a36Sopenharmony_ci if (xfs_sb_is_v5(&log->l_mp->m_sb) && 339762306a36Sopenharmony_ci xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb, 339862306a36Sopenharmony_ci XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { 339962306a36Sopenharmony_ci xfs_warn(log->l_mp, 340062306a36Sopenharmony_ci"Superblock has unknown incompatible log features (0x%x) enabled.", 340162306a36Sopenharmony_ci (log->l_mp->m_sb.sb_features_log_incompat & 340262306a36Sopenharmony_ci XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); 340362306a36Sopenharmony_ci xfs_warn(log->l_mp, 340462306a36Sopenharmony_ci"The log can not be fully and/or safely recovered by this kernel."); 340562306a36Sopenharmony_ci xfs_warn(log->l_mp, 340662306a36Sopenharmony_ci"Please recover the log on a kernel that supports the unknown features."); 340762306a36Sopenharmony_ci return -EINVAL; 340862306a36Sopenharmony_ci } 340962306a36Sopenharmony_ci 341062306a36Sopenharmony_ci /* 341162306a36Sopenharmony_ci * Delay log recovery if the debug hook is set. This is debug 341262306a36Sopenharmony_ci * instrumentation to coordinate simulation of I/O failures with 341362306a36Sopenharmony_ci * log recovery. 341462306a36Sopenharmony_ci */ 341562306a36Sopenharmony_ci if (xfs_globals.log_recovery_delay) { 341662306a36Sopenharmony_ci xfs_notice(log->l_mp, 341762306a36Sopenharmony_ci "Delaying log recovery for %d seconds.", 341862306a36Sopenharmony_ci xfs_globals.log_recovery_delay); 341962306a36Sopenharmony_ci msleep(xfs_globals.log_recovery_delay * 1000); 342062306a36Sopenharmony_ci } 342162306a36Sopenharmony_ci 342262306a36Sopenharmony_ci xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", 342362306a36Sopenharmony_ci log->l_mp->m_logname ? log->l_mp->m_logname 342462306a36Sopenharmony_ci : "internal"); 342562306a36Sopenharmony_ci 342662306a36Sopenharmony_ci error = xlog_do_recover(log, head_blk, tail_blk); 342762306a36Sopenharmony_ci set_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); 342862306a36Sopenharmony_ci } 342962306a36Sopenharmony_ci return error; 343062306a36Sopenharmony_ci} 343162306a36Sopenharmony_ci 343262306a36Sopenharmony_ci/* 343362306a36Sopenharmony_ci * In the first part of recovery we replay inodes and buffers and build up the 343462306a36Sopenharmony_ci * list of intents which need to be processed. Here we process the intents and 343562306a36Sopenharmony_ci * clean up the on disk unlinked inode lists. This is separated from the first 343662306a36Sopenharmony_ci * part of recovery so that the root and real-time bitmap inodes can be read in 343762306a36Sopenharmony_ci * from disk in between the two stages. This is necessary so that we can free 343862306a36Sopenharmony_ci * space in the real-time portion of the file system. 343962306a36Sopenharmony_ci */ 344062306a36Sopenharmony_ciint 344162306a36Sopenharmony_cixlog_recover_finish( 344262306a36Sopenharmony_ci struct xlog *log) 344362306a36Sopenharmony_ci{ 344462306a36Sopenharmony_ci int error; 344562306a36Sopenharmony_ci 344662306a36Sopenharmony_ci error = xlog_recover_process_intents(log); 344762306a36Sopenharmony_ci if (error) { 344862306a36Sopenharmony_ci /* 344962306a36Sopenharmony_ci * Cancel all the unprocessed intent items now so that we don't 345062306a36Sopenharmony_ci * leave them pinned in the AIL. This can cause the AIL to 345162306a36Sopenharmony_ci * livelock on the pinned item if anyone tries to push the AIL 345262306a36Sopenharmony_ci * (inode reclaim does this) before we get around to 345362306a36Sopenharmony_ci * xfs_log_mount_cancel. 345462306a36Sopenharmony_ci */ 345562306a36Sopenharmony_ci xlog_recover_cancel_intents(log); 345662306a36Sopenharmony_ci xfs_alert(log->l_mp, "Failed to recover intents"); 345762306a36Sopenharmony_ci xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); 345862306a36Sopenharmony_ci return error; 345962306a36Sopenharmony_ci } 346062306a36Sopenharmony_ci 346162306a36Sopenharmony_ci /* 346262306a36Sopenharmony_ci * Sync the log to get all the intents out of the AIL. This isn't 346362306a36Sopenharmony_ci * absolutely necessary, but it helps in case the unlink transactions 346462306a36Sopenharmony_ci * would have problems pushing the intents out of the way. 346562306a36Sopenharmony_ci */ 346662306a36Sopenharmony_ci xfs_log_force(log->l_mp, XFS_LOG_SYNC); 346762306a36Sopenharmony_ci 346862306a36Sopenharmony_ci /* 346962306a36Sopenharmony_ci * Now that we've recovered the log and all the intents, we can clear 347062306a36Sopenharmony_ci * the log incompat feature bits in the superblock because there's no 347162306a36Sopenharmony_ci * longer anything to protect. We rely on the AIL push to write out the 347262306a36Sopenharmony_ci * updated superblock after everything else. 347362306a36Sopenharmony_ci */ 347462306a36Sopenharmony_ci if (xfs_clear_incompat_log_features(log->l_mp)) { 347562306a36Sopenharmony_ci error = xfs_sync_sb(log->l_mp, false); 347662306a36Sopenharmony_ci if (error < 0) { 347762306a36Sopenharmony_ci xfs_alert(log->l_mp, 347862306a36Sopenharmony_ci "Failed to clear log incompat features on recovery"); 347962306a36Sopenharmony_ci return error; 348062306a36Sopenharmony_ci } 348162306a36Sopenharmony_ci } 348262306a36Sopenharmony_ci 348362306a36Sopenharmony_ci xlog_recover_process_iunlinks(log); 348462306a36Sopenharmony_ci 348562306a36Sopenharmony_ci /* 348662306a36Sopenharmony_ci * Recover any CoW staging blocks that are still referenced by the 348762306a36Sopenharmony_ci * ondisk refcount metadata. During mount there cannot be any live 348862306a36Sopenharmony_ci * staging extents as we have not permitted any user modifications. 348962306a36Sopenharmony_ci * Therefore, it is safe to free them all right now, even on a 349062306a36Sopenharmony_ci * read-only mount. 349162306a36Sopenharmony_ci */ 349262306a36Sopenharmony_ci error = xfs_reflink_recover_cow(log->l_mp); 349362306a36Sopenharmony_ci if (error) { 349462306a36Sopenharmony_ci xfs_alert(log->l_mp, 349562306a36Sopenharmony_ci "Failed to recover leftover CoW staging extents, err %d.", 349662306a36Sopenharmony_ci error); 349762306a36Sopenharmony_ci /* 349862306a36Sopenharmony_ci * If we get an error here, make sure the log is shut down 349962306a36Sopenharmony_ci * but return zero so that any log items committed since the 350062306a36Sopenharmony_ci * end of intents processing can be pushed through the CIL 350162306a36Sopenharmony_ci * and AIL. 350262306a36Sopenharmony_ci */ 350362306a36Sopenharmony_ci xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); 350462306a36Sopenharmony_ci } 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ci return 0; 350762306a36Sopenharmony_ci} 350862306a36Sopenharmony_ci 350962306a36Sopenharmony_civoid 351062306a36Sopenharmony_cixlog_recover_cancel( 351162306a36Sopenharmony_ci struct xlog *log) 351262306a36Sopenharmony_ci{ 351362306a36Sopenharmony_ci if (xlog_recovery_needed(log)) 351462306a36Sopenharmony_ci xlog_recover_cancel_intents(log); 351562306a36Sopenharmony_ci} 351662306a36Sopenharmony_ci 3517