162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 462306a36Sopenharmony_ci * Author: Darrick J. Wong <djwong@kernel.org> 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1162306a36Sopenharmony_ci#include "xfs_mount.h" 1262306a36Sopenharmony_ci#include "xfs_btree.h" 1362306a36Sopenharmony_ci#include "xfs_log_format.h" 1462306a36Sopenharmony_ci#include "xfs_trans.h" 1562306a36Sopenharmony_ci#include "xfs_inode.h" 1662306a36Sopenharmony_ci#include "xfs_icache.h" 1762306a36Sopenharmony_ci#include "xfs_alloc.h" 1862306a36Sopenharmony_ci#include "xfs_alloc_btree.h" 1962306a36Sopenharmony_ci#include "xfs_ialloc.h" 2062306a36Sopenharmony_ci#include "xfs_ialloc_btree.h" 2162306a36Sopenharmony_ci#include "xfs_refcount_btree.h" 2262306a36Sopenharmony_ci#include "xfs_rmap.h" 2362306a36Sopenharmony_ci#include "xfs_rmap_btree.h" 2462306a36Sopenharmony_ci#include "xfs_log.h" 2562306a36Sopenharmony_ci#include "xfs_trans_priv.h" 2662306a36Sopenharmony_ci#include "xfs_da_format.h" 2762306a36Sopenharmony_ci#include "xfs_da_btree.h" 2862306a36Sopenharmony_ci#include "xfs_attr.h" 2962306a36Sopenharmony_ci#include "xfs_reflink.h" 3062306a36Sopenharmony_ci#include "xfs_ag.h" 3162306a36Sopenharmony_ci#include "scrub/scrub.h" 3262306a36Sopenharmony_ci#include "scrub/common.h" 3362306a36Sopenharmony_ci#include "scrub/trace.h" 3462306a36Sopenharmony_ci#include "scrub/repair.h" 3562306a36Sopenharmony_ci#include "scrub/health.h" 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* Common code for the metadata scrubbers. */ 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/* 4062306a36Sopenharmony_ci * Handling operational errors. 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * The *_process_error() family of functions are used to process error return 4362306a36Sopenharmony_ci * codes from functions called as part of a scrub operation. 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * If there's no error, we return true to tell the caller that it's ok 4662306a36Sopenharmony_ci * to move on to the next check in its list. 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * For non-verifier errors (e.g. ENOMEM) we return false to tell the 4962306a36Sopenharmony_ci * caller that something bad happened, and we preserve *error so that 5062306a36Sopenharmony_ci * the caller can return the *error up the stack to userspace. 5162306a36Sopenharmony_ci * 5262306a36Sopenharmony_ci * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting 5362306a36Sopenharmony_ci * OFLAG_CORRUPT in sm_flags and the *error is cleared. In other words, 5462306a36Sopenharmony_ci * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT, 5562306a36Sopenharmony_ci * not via return codes. We return false to tell the caller that 5662306a36Sopenharmony_ci * something bad happened. Since the error has been cleared, the caller 5762306a36Sopenharmony_ci * will (presumably) return that zero and scrubbing will move on to 5862306a36Sopenharmony_ci * whatever's next. 5962306a36Sopenharmony_ci * 6062306a36Sopenharmony_ci * ftrace can be used to record the precise metadata location and the 6162306a36Sopenharmony_ci * approximate code location of the failed operation. 6262306a36Sopenharmony_ci */ 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci/* Check for operational errors. */ 6562306a36Sopenharmony_cistatic bool 6662306a36Sopenharmony_ci__xchk_process_error( 6762306a36Sopenharmony_ci struct xfs_scrub *sc, 6862306a36Sopenharmony_ci xfs_agnumber_t agno, 6962306a36Sopenharmony_ci xfs_agblock_t bno, 7062306a36Sopenharmony_ci int *error, 7162306a36Sopenharmony_ci __u32 errflag, 7262306a36Sopenharmony_ci void *ret_ip) 7362306a36Sopenharmony_ci{ 7462306a36Sopenharmony_ci switch (*error) { 7562306a36Sopenharmony_ci case 0: 7662306a36Sopenharmony_ci return true; 7762306a36Sopenharmony_ci case -EDEADLOCK: 7862306a36Sopenharmony_ci case -ECHRNG: 7962306a36Sopenharmony_ci /* Used to restart an op with deadlock avoidance. */ 8062306a36Sopenharmony_ci trace_xchk_deadlock_retry( 8162306a36Sopenharmony_ci sc->ip ? sc->ip : XFS_I(file_inode(sc->file)), 8262306a36Sopenharmony_ci sc->sm, *error); 8362306a36Sopenharmony_ci break; 8462306a36Sopenharmony_ci case -EFSBADCRC: 8562306a36Sopenharmony_ci case -EFSCORRUPTED: 8662306a36Sopenharmony_ci /* Note the badness but don't abort. */ 8762306a36Sopenharmony_ci sc->sm->sm_flags |= errflag; 8862306a36Sopenharmony_ci *error = 0; 8962306a36Sopenharmony_ci fallthrough; 9062306a36Sopenharmony_ci default: 9162306a36Sopenharmony_ci trace_xchk_op_error(sc, agno, bno, *error, 9262306a36Sopenharmony_ci ret_ip); 9362306a36Sopenharmony_ci break; 9462306a36Sopenharmony_ci } 9562306a36Sopenharmony_ci return false; 9662306a36Sopenharmony_ci} 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_cibool 9962306a36Sopenharmony_cixchk_process_error( 10062306a36Sopenharmony_ci struct xfs_scrub *sc, 10162306a36Sopenharmony_ci xfs_agnumber_t agno, 10262306a36Sopenharmony_ci xfs_agblock_t bno, 10362306a36Sopenharmony_ci int *error) 10462306a36Sopenharmony_ci{ 10562306a36Sopenharmony_ci return __xchk_process_error(sc, agno, bno, error, 10662306a36Sopenharmony_ci XFS_SCRUB_OFLAG_CORRUPT, __return_address); 10762306a36Sopenharmony_ci} 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cibool 11062306a36Sopenharmony_cixchk_xref_process_error( 11162306a36Sopenharmony_ci struct xfs_scrub *sc, 11262306a36Sopenharmony_ci xfs_agnumber_t agno, 11362306a36Sopenharmony_ci xfs_agblock_t bno, 11462306a36Sopenharmony_ci int *error) 11562306a36Sopenharmony_ci{ 11662306a36Sopenharmony_ci return __xchk_process_error(sc, agno, bno, error, 11762306a36Sopenharmony_ci XFS_SCRUB_OFLAG_XFAIL, __return_address); 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/* Check for operational errors for a file offset. */ 12162306a36Sopenharmony_cistatic bool 12262306a36Sopenharmony_ci__xchk_fblock_process_error( 12362306a36Sopenharmony_ci struct xfs_scrub *sc, 12462306a36Sopenharmony_ci int whichfork, 12562306a36Sopenharmony_ci xfs_fileoff_t offset, 12662306a36Sopenharmony_ci int *error, 12762306a36Sopenharmony_ci __u32 errflag, 12862306a36Sopenharmony_ci void *ret_ip) 12962306a36Sopenharmony_ci{ 13062306a36Sopenharmony_ci switch (*error) { 13162306a36Sopenharmony_ci case 0: 13262306a36Sopenharmony_ci return true; 13362306a36Sopenharmony_ci case -EDEADLOCK: 13462306a36Sopenharmony_ci case -ECHRNG: 13562306a36Sopenharmony_ci /* Used to restart an op with deadlock avoidance. */ 13662306a36Sopenharmony_ci trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 13762306a36Sopenharmony_ci break; 13862306a36Sopenharmony_ci case -EFSBADCRC: 13962306a36Sopenharmony_ci case -EFSCORRUPTED: 14062306a36Sopenharmony_ci /* Note the badness but don't abort. */ 14162306a36Sopenharmony_ci sc->sm->sm_flags |= errflag; 14262306a36Sopenharmony_ci *error = 0; 14362306a36Sopenharmony_ci fallthrough; 14462306a36Sopenharmony_ci default: 14562306a36Sopenharmony_ci trace_xchk_file_op_error(sc, whichfork, offset, *error, 14662306a36Sopenharmony_ci ret_ip); 14762306a36Sopenharmony_ci break; 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci return false; 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_cibool 15362306a36Sopenharmony_cixchk_fblock_process_error( 15462306a36Sopenharmony_ci struct xfs_scrub *sc, 15562306a36Sopenharmony_ci int whichfork, 15662306a36Sopenharmony_ci xfs_fileoff_t offset, 15762306a36Sopenharmony_ci int *error) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci return __xchk_fblock_process_error(sc, whichfork, offset, error, 16062306a36Sopenharmony_ci XFS_SCRUB_OFLAG_CORRUPT, __return_address); 16162306a36Sopenharmony_ci} 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_cibool 16462306a36Sopenharmony_cixchk_fblock_xref_process_error( 16562306a36Sopenharmony_ci struct xfs_scrub *sc, 16662306a36Sopenharmony_ci int whichfork, 16762306a36Sopenharmony_ci xfs_fileoff_t offset, 16862306a36Sopenharmony_ci int *error) 16962306a36Sopenharmony_ci{ 17062306a36Sopenharmony_ci return __xchk_fblock_process_error(sc, whichfork, offset, error, 17162306a36Sopenharmony_ci XFS_SCRUB_OFLAG_XFAIL, __return_address); 17262306a36Sopenharmony_ci} 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci/* 17562306a36Sopenharmony_ci * Handling scrub corruption/optimization/warning checks. 17662306a36Sopenharmony_ci * 17762306a36Sopenharmony_ci * The *_set_{corrupt,preen,warning}() family of functions are used to 17862306a36Sopenharmony_ci * record the presence of metadata that is incorrect (corrupt), could be 17962306a36Sopenharmony_ci * optimized somehow (preen), or should be flagged for administrative 18062306a36Sopenharmony_ci * review but is not incorrect (warn). 18162306a36Sopenharmony_ci * 18262306a36Sopenharmony_ci * ftrace can be used to record the precise metadata location and 18362306a36Sopenharmony_ci * approximate code location of the failed check. 18462306a36Sopenharmony_ci */ 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci/* Record a block which could be optimized. */ 18762306a36Sopenharmony_civoid 18862306a36Sopenharmony_cixchk_block_set_preen( 18962306a36Sopenharmony_ci struct xfs_scrub *sc, 19062306a36Sopenharmony_ci struct xfs_buf *bp) 19162306a36Sopenharmony_ci{ 19262306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 19362306a36Sopenharmony_ci trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address); 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci/* 19762306a36Sopenharmony_ci * Record an inode which could be optimized. The trace data will 19862306a36Sopenharmony_ci * include the block given by bp if bp is given; otherwise it will use 19962306a36Sopenharmony_ci * the block location of the inode record itself. 20062306a36Sopenharmony_ci */ 20162306a36Sopenharmony_civoid 20262306a36Sopenharmony_cixchk_ino_set_preen( 20362306a36Sopenharmony_ci struct xfs_scrub *sc, 20462306a36Sopenharmony_ci xfs_ino_t ino) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 20762306a36Sopenharmony_ci trace_xchk_ino_preen(sc, ino, __return_address); 20862306a36Sopenharmony_ci} 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci/* Record something being wrong with the filesystem primary superblock. */ 21162306a36Sopenharmony_civoid 21262306a36Sopenharmony_cixchk_set_corrupt( 21362306a36Sopenharmony_ci struct xfs_scrub *sc) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 21662306a36Sopenharmony_ci trace_xchk_fs_error(sc, 0, __return_address); 21762306a36Sopenharmony_ci} 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci/* Record a corrupt block. */ 22062306a36Sopenharmony_civoid 22162306a36Sopenharmony_cixchk_block_set_corrupt( 22262306a36Sopenharmony_ci struct xfs_scrub *sc, 22362306a36Sopenharmony_ci struct xfs_buf *bp) 22462306a36Sopenharmony_ci{ 22562306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 22662306a36Sopenharmony_ci trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); 22762306a36Sopenharmony_ci} 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci/* Record a corruption while cross-referencing. */ 23062306a36Sopenharmony_civoid 23162306a36Sopenharmony_cixchk_block_xref_set_corrupt( 23262306a36Sopenharmony_ci struct xfs_scrub *sc, 23362306a36Sopenharmony_ci struct xfs_buf *bp) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 23662306a36Sopenharmony_ci trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci/* 24062306a36Sopenharmony_ci * Record a corrupt inode. The trace data will include the block given 24162306a36Sopenharmony_ci * by bp if bp is given; otherwise it will use the block location of the 24262306a36Sopenharmony_ci * inode record itself. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_civoid 24562306a36Sopenharmony_cixchk_ino_set_corrupt( 24662306a36Sopenharmony_ci struct xfs_scrub *sc, 24762306a36Sopenharmony_ci xfs_ino_t ino) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 25062306a36Sopenharmony_ci trace_xchk_ino_error(sc, ino, __return_address); 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci/* Record a corruption while cross-referencing with an inode. */ 25462306a36Sopenharmony_civoid 25562306a36Sopenharmony_cixchk_ino_xref_set_corrupt( 25662306a36Sopenharmony_ci struct xfs_scrub *sc, 25762306a36Sopenharmony_ci xfs_ino_t ino) 25862306a36Sopenharmony_ci{ 25962306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 26062306a36Sopenharmony_ci trace_xchk_ino_error(sc, ino, __return_address); 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci/* Record corruption in a block indexed by a file fork. */ 26462306a36Sopenharmony_civoid 26562306a36Sopenharmony_cixchk_fblock_set_corrupt( 26662306a36Sopenharmony_ci struct xfs_scrub *sc, 26762306a36Sopenharmony_ci int whichfork, 26862306a36Sopenharmony_ci xfs_fileoff_t offset) 26962306a36Sopenharmony_ci{ 27062306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 27162306a36Sopenharmony_ci trace_xchk_fblock_error(sc, whichfork, offset, __return_address); 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci/* Record a corruption while cross-referencing a fork block. */ 27562306a36Sopenharmony_civoid 27662306a36Sopenharmony_cixchk_fblock_xref_set_corrupt( 27762306a36Sopenharmony_ci struct xfs_scrub *sc, 27862306a36Sopenharmony_ci int whichfork, 27962306a36Sopenharmony_ci xfs_fileoff_t offset) 28062306a36Sopenharmony_ci{ 28162306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT; 28262306a36Sopenharmony_ci trace_xchk_fblock_error(sc, whichfork, offset, __return_address); 28362306a36Sopenharmony_ci} 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci/* 28662306a36Sopenharmony_ci * Warn about inodes that need administrative review but is not 28762306a36Sopenharmony_ci * incorrect. 28862306a36Sopenharmony_ci */ 28962306a36Sopenharmony_civoid 29062306a36Sopenharmony_cixchk_ino_set_warning( 29162306a36Sopenharmony_ci struct xfs_scrub *sc, 29262306a36Sopenharmony_ci xfs_ino_t ino) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 29562306a36Sopenharmony_ci trace_xchk_ino_warning(sc, ino, __return_address); 29662306a36Sopenharmony_ci} 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci/* Warn about a block indexed by a file fork that needs review. */ 29962306a36Sopenharmony_civoid 30062306a36Sopenharmony_cixchk_fblock_set_warning( 30162306a36Sopenharmony_ci struct xfs_scrub *sc, 30262306a36Sopenharmony_ci int whichfork, 30362306a36Sopenharmony_ci xfs_fileoff_t offset) 30462306a36Sopenharmony_ci{ 30562306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING; 30662306a36Sopenharmony_ci trace_xchk_fblock_warning(sc, whichfork, offset, __return_address); 30762306a36Sopenharmony_ci} 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci/* Signal an incomplete scrub. */ 31062306a36Sopenharmony_civoid 31162306a36Sopenharmony_cixchk_set_incomplete( 31262306a36Sopenharmony_ci struct xfs_scrub *sc) 31362306a36Sopenharmony_ci{ 31462306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE; 31562306a36Sopenharmony_ci trace_xchk_incomplete(sc, __return_address); 31662306a36Sopenharmony_ci} 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci/* 31962306a36Sopenharmony_ci * rmap scrubbing -- compute the number of blocks with a given owner, 32062306a36Sopenharmony_ci * at least according to the reverse mapping data. 32162306a36Sopenharmony_ci */ 32262306a36Sopenharmony_ci 32362306a36Sopenharmony_cistruct xchk_rmap_ownedby_info { 32462306a36Sopenharmony_ci const struct xfs_owner_info *oinfo; 32562306a36Sopenharmony_ci xfs_filblks_t *blocks; 32662306a36Sopenharmony_ci}; 32762306a36Sopenharmony_ci 32862306a36Sopenharmony_ciSTATIC int 32962306a36Sopenharmony_cixchk_count_rmap_ownedby_irec( 33062306a36Sopenharmony_ci struct xfs_btree_cur *cur, 33162306a36Sopenharmony_ci const struct xfs_rmap_irec *rec, 33262306a36Sopenharmony_ci void *priv) 33362306a36Sopenharmony_ci{ 33462306a36Sopenharmony_ci struct xchk_rmap_ownedby_info *sroi = priv; 33562306a36Sopenharmony_ci bool irec_attr; 33662306a36Sopenharmony_ci bool oinfo_attr; 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK; 33962306a36Sopenharmony_ci oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK; 34062306a36Sopenharmony_ci 34162306a36Sopenharmony_ci if (rec->rm_owner != sroi->oinfo->oi_owner) 34262306a36Sopenharmony_ci return 0; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr) 34562306a36Sopenharmony_ci (*sroi->blocks) += rec->rm_blockcount; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci return 0; 34862306a36Sopenharmony_ci} 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci/* 35162306a36Sopenharmony_ci * Calculate the number of blocks the rmap thinks are owned by something. 35262306a36Sopenharmony_ci * The caller should pass us an rmapbt cursor. 35362306a36Sopenharmony_ci */ 35462306a36Sopenharmony_ciint 35562306a36Sopenharmony_cixchk_count_rmap_ownedby_ag( 35662306a36Sopenharmony_ci struct xfs_scrub *sc, 35762306a36Sopenharmony_ci struct xfs_btree_cur *cur, 35862306a36Sopenharmony_ci const struct xfs_owner_info *oinfo, 35962306a36Sopenharmony_ci xfs_filblks_t *blocks) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci struct xchk_rmap_ownedby_info sroi = { 36262306a36Sopenharmony_ci .oinfo = oinfo, 36362306a36Sopenharmony_ci .blocks = blocks, 36462306a36Sopenharmony_ci }; 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci *blocks = 0; 36762306a36Sopenharmony_ci return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec, 36862306a36Sopenharmony_ci &sroi); 36962306a36Sopenharmony_ci} 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci/* 37262306a36Sopenharmony_ci * AG scrubbing 37362306a36Sopenharmony_ci * 37462306a36Sopenharmony_ci * These helpers facilitate locking an allocation group's header 37562306a36Sopenharmony_ci * buffers, setting up cursors for all btrees that are present, and 37662306a36Sopenharmony_ci * cleaning everything up once we're through. 37762306a36Sopenharmony_ci */ 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci/* Decide if we want to return an AG header read failure. */ 38062306a36Sopenharmony_cistatic inline bool 38162306a36Sopenharmony_ciwant_ag_read_header_failure( 38262306a36Sopenharmony_ci struct xfs_scrub *sc, 38362306a36Sopenharmony_ci unsigned int type) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci /* Return all AG header read failures when scanning btrees. */ 38662306a36Sopenharmony_ci if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF && 38762306a36Sopenharmony_ci sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL && 38862306a36Sopenharmony_ci sc->sm->sm_type != XFS_SCRUB_TYPE_AGI) 38962306a36Sopenharmony_ci return true; 39062306a36Sopenharmony_ci /* 39162306a36Sopenharmony_ci * If we're scanning a given type of AG header, we only want to 39262306a36Sopenharmony_ci * see read failures from that specific header. We'd like the 39362306a36Sopenharmony_ci * other headers to cross-check them, but this isn't required. 39462306a36Sopenharmony_ci */ 39562306a36Sopenharmony_ci if (sc->sm->sm_type == type) 39662306a36Sopenharmony_ci return true; 39762306a36Sopenharmony_ci return false; 39862306a36Sopenharmony_ci} 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci/* 40162306a36Sopenharmony_ci * Grab the AG header buffers for the attached perag structure. 40262306a36Sopenharmony_ci * 40362306a36Sopenharmony_ci * The headers should be released by xchk_ag_free, but as a fail safe we attach 40462306a36Sopenharmony_ci * all the buffers we grab to the scrub transaction so they'll all be freed 40562306a36Sopenharmony_ci * when we cancel it. 40662306a36Sopenharmony_ci */ 40762306a36Sopenharmony_cistatic inline int 40862306a36Sopenharmony_cixchk_perag_read_headers( 40962306a36Sopenharmony_ci struct xfs_scrub *sc, 41062306a36Sopenharmony_ci struct xchk_ag *sa) 41162306a36Sopenharmony_ci{ 41262306a36Sopenharmony_ci int error; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp); 41562306a36Sopenharmony_ci if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) 41662306a36Sopenharmony_ci return error; 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp); 41962306a36Sopenharmony_ci if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) 42062306a36Sopenharmony_ci return error; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci return 0; 42362306a36Sopenharmony_ci} 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci/* 42662306a36Sopenharmony_ci * Grab the AG headers for the attached perag structure and wait for pending 42762306a36Sopenharmony_ci * intents to drain. 42862306a36Sopenharmony_ci */ 42962306a36Sopenharmony_cistatic int 43062306a36Sopenharmony_cixchk_perag_drain_and_lock( 43162306a36Sopenharmony_ci struct xfs_scrub *sc) 43262306a36Sopenharmony_ci{ 43362306a36Sopenharmony_ci struct xchk_ag *sa = &sc->sa; 43462306a36Sopenharmony_ci int error = 0; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci ASSERT(sa->pag != NULL); 43762306a36Sopenharmony_ci ASSERT(sa->agi_bp == NULL); 43862306a36Sopenharmony_ci ASSERT(sa->agf_bp == NULL); 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci do { 44162306a36Sopenharmony_ci if (xchk_should_terminate(sc, &error)) 44262306a36Sopenharmony_ci return error; 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci error = xchk_perag_read_headers(sc, sa); 44562306a36Sopenharmony_ci if (error) 44662306a36Sopenharmony_ci return error; 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci /* 44962306a36Sopenharmony_ci * If we've grabbed an inode for scrubbing then we assume that 45062306a36Sopenharmony_ci * holding its ILOCK will suffice to coordinate with any intent 45162306a36Sopenharmony_ci * chains involving this inode. 45262306a36Sopenharmony_ci */ 45362306a36Sopenharmony_ci if (sc->ip) 45462306a36Sopenharmony_ci return 0; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci /* 45762306a36Sopenharmony_ci * Decide if this AG is quiet enough for all metadata to be 45862306a36Sopenharmony_ci * consistent with each other. XFS allows the AG header buffer 45962306a36Sopenharmony_ci * locks to cycle across transaction rolls while processing 46062306a36Sopenharmony_ci * chains of deferred ops, which means that there could be 46162306a36Sopenharmony_ci * other threads in the middle of processing a chain of 46262306a36Sopenharmony_ci * deferred ops. For regular operations we are careful about 46362306a36Sopenharmony_ci * ordering operations to prevent collisions between threads 46462306a36Sopenharmony_ci * (which is why we don't need a per-AG lock), but scrub and 46562306a36Sopenharmony_ci * repair have to serialize against chained operations. 46662306a36Sopenharmony_ci * 46762306a36Sopenharmony_ci * We just locked all the AG headers buffers; now take a look 46862306a36Sopenharmony_ci * to see if there are any intents in progress. If there are, 46962306a36Sopenharmony_ci * drop the AG headers and wait for the intents to drain. 47062306a36Sopenharmony_ci * Since we hold all the AG header locks for the duration of 47162306a36Sopenharmony_ci * the scrub, this is the only time we have to sample the 47262306a36Sopenharmony_ci * intents counter; any threads increasing it after this point 47362306a36Sopenharmony_ci * can't possibly be in the middle of a chain of AG metadata 47462306a36Sopenharmony_ci * updates. 47562306a36Sopenharmony_ci * 47662306a36Sopenharmony_ci * Obviously, this should be slanted against scrub and in favor 47762306a36Sopenharmony_ci * of runtime threads. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_ci if (!xfs_perag_intent_busy(sa->pag)) 48062306a36Sopenharmony_ci return 0; 48162306a36Sopenharmony_ci 48262306a36Sopenharmony_ci if (sa->agf_bp) { 48362306a36Sopenharmony_ci xfs_trans_brelse(sc->tp, sa->agf_bp); 48462306a36Sopenharmony_ci sa->agf_bp = NULL; 48562306a36Sopenharmony_ci } 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci if (sa->agi_bp) { 48862306a36Sopenharmony_ci xfs_trans_brelse(sc->tp, sa->agi_bp); 48962306a36Sopenharmony_ci sa->agi_bp = NULL; 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci if (!(sc->flags & XCHK_FSGATES_DRAIN)) 49362306a36Sopenharmony_ci return -ECHRNG; 49462306a36Sopenharmony_ci error = xfs_perag_intent_drain(sa->pag); 49562306a36Sopenharmony_ci if (error == -ERESTARTSYS) 49662306a36Sopenharmony_ci error = -EINTR; 49762306a36Sopenharmony_ci } while (!error); 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci return error; 50062306a36Sopenharmony_ci} 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci/* 50362306a36Sopenharmony_ci * Grab the per-AG structure, grab all AG header buffers, and wait until there 50462306a36Sopenharmony_ci * aren't any pending intents. Returns -ENOENT if we can't grab the perag 50562306a36Sopenharmony_ci * structure. 50662306a36Sopenharmony_ci */ 50762306a36Sopenharmony_ciint 50862306a36Sopenharmony_cixchk_ag_read_headers( 50962306a36Sopenharmony_ci struct xfs_scrub *sc, 51062306a36Sopenharmony_ci xfs_agnumber_t agno, 51162306a36Sopenharmony_ci struct xchk_ag *sa) 51262306a36Sopenharmony_ci{ 51362306a36Sopenharmony_ci struct xfs_mount *mp = sc->mp; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci ASSERT(!sa->pag); 51662306a36Sopenharmony_ci sa->pag = xfs_perag_get(mp, agno); 51762306a36Sopenharmony_ci if (!sa->pag) 51862306a36Sopenharmony_ci return -ENOENT; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci return xchk_perag_drain_and_lock(sc); 52162306a36Sopenharmony_ci} 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci/* Release all the AG btree cursors. */ 52462306a36Sopenharmony_civoid 52562306a36Sopenharmony_cixchk_ag_btcur_free( 52662306a36Sopenharmony_ci struct xchk_ag *sa) 52762306a36Sopenharmony_ci{ 52862306a36Sopenharmony_ci if (sa->refc_cur) 52962306a36Sopenharmony_ci xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR); 53062306a36Sopenharmony_ci if (sa->rmap_cur) 53162306a36Sopenharmony_ci xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR); 53262306a36Sopenharmony_ci if (sa->fino_cur) 53362306a36Sopenharmony_ci xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR); 53462306a36Sopenharmony_ci if (sa->ino_cur) 53562306a36Sopenharmony_ci xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR); 53662306a36Sopenharmony_ci if (sa->cnt_cur) 53762306a36Sopenharmony_ci xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR); 53862306a36Sopenharmony_ci if (sa->bno_cur) 53962306a36Sopenharmony_ci xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR); 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci sa->refc_cur = NULL; 54262306a36Sopenharmony_ci sa->rmap_cur = NULL; 54362306a36Sopenharmony_ci sa->fino_cur = NULL; 54462306a36Sopenharmony_ci sa->ino_cur = NULL; 54562306a36Sopenharmony_ci sa->bno_cur = NULL; 54662306a36Sopenharmony_ci sa->cnt_cur = NULL; 54762306a36Sopenharmony_ci} 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci/* Initialize all the btree cursors for an AG. */ 55062306a36Sopenharmony_civoid 55162306a36Sopenharmony_cixchk_ag_btcur_init( 55262306a36Sopenharmony_ci struct xfs_scrub *sc, 55362306a36Sopenharmony_ci struct xchk_ag *sa) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci struct xfs_mount *mp = sc->mp; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci if (sa->agf_bp && 55862306a36Sopenharmony_ci xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) { 55962306a36Sopenharmony_ci /* Set up a bnobt cursor for cross-referencing. */ 56062306a36Sopenharmony_ci sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 56162306a36Sopenharmony_ci sa->pag, XFS_BTNUM_BNO); 56262306a36Sopenharmony_ci } 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci if (sa->agf_bp && 56562306a36Sopenharmony_ci xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) { 56662306a36Sopenharmony_ci /* Set up a cntbt cursor for cross-referencing. */ 56762306a36Sopenharmony_ci sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, 56862306a36Sopenharmony_ci sa->pag, XFS_BTNUM_CNT); 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci /* Set up a inobt cursor for cross-referencing. */ 57262306a36Sopenharmony_ci if (sa->agi_bp && 57362306a36Sopenharmony_ci xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { 57462306a36Sopenharmony_ci sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp, 57562306a36Sopenharmony_ci XFS_BTNUM_INO); 57662306a36Sopenharmony_ci } 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci /* Set up a finobt cursor for cross-referencing. */ 57962306a36Sopenharmony_ci if (sa->agi_bp && xfs_has_finobt(mp) && 58062306a36Sopenharmony_ci xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { 58162306a36Sopenharmony_ci sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp, 58262306a36Sopenharmony_ci XFS_BTNUM_FINO); 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci /* Set up a rmapbt cursor for cross-referencing. */ 58662306a36Sopenharmony_ci if (sa->agf_bp && xfs_has_rmapbt(mp) && 58762306a36Sopenharmony_ci xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) { 58862306a36Sopenharmony_ci sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, 58962306a36Sopenharmony_ci sa->pag); 59062306a36Sopenharmony_ci } 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci /* Set up a refcountbt cursor for cross-referencing. */ 59362306a36Sopenharmony_ci if (sa->agf_bp && xfs_has_reflink(mp) && 59462306a36Sopenharmony_ci xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) { 59562306a36Sopenharmony_ci sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, 59662306a36Sopenharmony_ci sa->agf_bp, sa->pag); 59762306a36Sopenharmony_ci } 59862306a36Sopenharmony_ci} 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci/* Release the AG header context and btree cursors. */ 60162306a36Sopenharmony_civoid 60262306a36Sopenharmony_cixchk_ag_free( 60362306a36Sopenharmony_ci struct xfs_scrub *sc, 60462306a36Sopenharmony_ci struct xchk_ag *sa) 60562306a36Sopenharmony_ci{ 60662306a36Sopenharmony_ci xchk_ag_btcur_free(sa); 60762306a36Sopenharmony_ci if (sa->agf_bp) { 60862306a36Sopenharmony_ci xfs_trans_brelse(sc->tp, sa->agf_bp); 60962306a36Sopenharmony_ci sa->agf_bp = NULL; 61062306a36Sopenharmony_ci } 61162306a36Sopenharmony_ci if (sa->agi_bp) { 61262306a36Sopenharmony_ci xfs_trans_brelse(sc->tp, sa->agi_bp); 61362306a36Sopenharmony_ci sa->agi_bp = NULL; 61462306a36Sopenharmony_ci } 61562306a36Sopenharmony_ci if (sa->pag) { 61662306a36Sopenharmony_ci xfs_perag_put(sa->pag); 61762306a36Sopenharmony_ci sa->pag = NULL; 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci} 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci/* 62262306a36Sopenharmony_ci * For scrub, grab the perag structure, the AGI, and the AGF headers, in that 62362306a36Sopenharmony_ci * order. Locking order requires us to get the AGI before the AGF. We use the 62462306a36Sopenharmony_ci * transaction to avoid deadlocking on crosslinked metadata buffers; either the 62562306a36Sopenharmony_ci * caller passes one in (bmap scrub) or we have to create a transaction 62662306a36Sopenharmony_ci * ourselves. Returns ENOENT if the perag struct cannot be grabbed. 62762306a36Sopenharmony_ci */ 62862306a36Sopenharmony_ciint 62962306a36Sopenharmony_cixchk_ag_init( 63062306a36Sopenharmony_ci struct xfs_scrub *sc, 63162306a36Sopenharmony_ci xfs_agnumber_t agno, 63262306a36Sopenharmony_ci struct xchk_ag *sa) 63362306a36Sopenharmony_ci{ 63462306a36Sopenharmony_ci int error; 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci error = xchk_ag_read_headers(sc, agno, sa); 63762306a36Sopenharmony_ci if (error) 63862306a36Sopenharmony_ci return error; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci xchk_ag_btcur_init(sc, sa); 64162306a36Sopenharmony_ci return 0; 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci/* Per-scrubber setup functions */ 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_civoid 64762306a36Sopenharmony_cixchk_trans_cancel( 64862306a36Sopenharmony_ci struct xfs_scrub *sc) 64962306a36Sopenharmony_ci{ 65062306a36Sopenharmony_ci xfs_trans_cancel(sc->tp); 65162306a36Sopenharmony_ci sc->tp = NULL; 65262306a36Sopenharmony_ci} 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_ci/* 65562306a36Sopenharmony_ci * Grab an empty transaction so that we can re-grab locked buffers if 65662306a36Sopenharmony_ci * one of our btrees turns out to be cyclic. 65762306a36Sopenharmony_ci * 65862306a36Sopenharmony_ci * If we're going to repair something, we need to ask for the largest possible 65962306a36Sopenharmony_ci * log reservation so that we can handle the worst case scenario for metadata 66062306a36Sopenharmony_ci * updates while rebuilding a metadata item. We also need to reserve as many 66162306a36Sopenharmony_ci * blocks in the head transaction as we think we're going to need to rebuild 66262306a36Sopenharmony_ci * the metadata object. 66362306a36Sopenharmony_ci */ 66462306a36Sopenharmony_ciint 66562306a36Sopenharmony_cixchk_trans_alloc( 66662306a36Sopenharmony_ci struct xfs_scrub *sc, 66762306a36Sopenharmony_ci uint resblks) 66862306a36Sopenharmony_ci{ 66962306a36Sopenharmony_ci if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) 67062306a36Sopenharmony_ci return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, 67162306a36Sopenharmony_ci resblks, 0, 0, &sc->tp); 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci return xfs_trans_alloc_empty(sc->mp, &sc->tp); 67462306a36Sopenharmony_ci} 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci/* Set us up with a transaction and an empty context. */ 67762306a36Sopenharmony_ciint 67862306a36Sopenharmony_cixchk_setup_fs( 67962306a36Sopenharmony_ci struct xfs_scrub *sc) 68062306a36Sopenharmony_ci{ 68162306a36Sopenharmony_ci uint resblks; 68262306a36Sopenharmony_ci 68362306a36Sopenharmony_ci resblks = xrep_calc_ag_resblks(sc); 68462306a36Sopenharmony_ci return xchk_trans_alloc(sc, resblks); 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci/* Set us up with AG headers and btree cursors. */ 68862306a36Sopenharmony_ciint 68962306a36Sopenharmony_cixchk_setup_ag_btree( 69062306a36Sopenharmony_ci struct xfs_scrub *sc, 69162306a36Sopenharmony_ci bool force_log) 69262306a36Sopenharmony_ci{ 69362306a36Sopenharmony_ci struct xfs_mount *mp = sc->mp; 69462306a36Sopenharmony_ci int error; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci /* 69762306a36Sopenharmony_ci * If the caller asks us to checkpont the log, do so. This 69862306a36Sopenharmony_ci * expensive operation should be performed infrequently and only 69962306a36Sopenharmony_ci * as a last resort. Any caller that sets force_log should 70062306a36Sopenharmony_ci * document why they need to do so. 70162306a36Sopenharmony_ci */ 70262306a36Sopenharmony_ci if (force_log) { 70362306a36Sopenharmony_ci error = xchk_checkpoint_log(mp); 70462306a36Sopenharmony_ci if (error) 70562306a36Sopenharmony_ci return error; 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci error = xchk_setup_fs(sc); 70962306a36Sopenharmony_ci if (error) 71062306a36Sopenharmony_ci return error; 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa); 71362306a36Sopenharmony_ci} 71462306a36Sopenharmony_ci 71562306a36Sopenharmony_ci/* Push everything out of the log onto disk. */ 71662306a36Sopenharmony_ciint 71762306a36Sopenharmony_cixchk_checkpoint_log( 71862306a36Sopenharmony_ci struct xfs_mount *mp) 71962306a36Sopenharmony_ci{ 72062306a36Sopenharmony_ci int error; 72162306a36Sopenharmony_ci 72262306a36Sopenharmony_ci error = xfs_log_force(mp, XFS_LOG_SYNC); 72362306a36Sopenharmony_ci if (error) 72462306a36Sopenharmony_ci return error; 72562306a36Sopenharmony_ci xfs_ail_push_all_sync(mp->m_ail); 72662306a36Sopenharmony_ci return 0; 72762306a36Sopenharmony_ci} 72862306a36Sopenharmony_ci 72962306a36Sopenharmony_ci/* Verify that an inode is allocated ondisk, then return its cached inode. */ 73062306a36Sopenharmony_ciint 73162306a36Sopenharmony_cixchk_iget( 73262306a36Sopenharmony_ci struct xfs_scrub *sc, 73362306a36Sopenharmony_ci xfs_ino_t inum, 73462306a36Sopenharmony_ci struct xfs_inode **ipp) 73562306a36Sopenharmony_ci{ 73662306a36Sopenharmony_ci return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp); 73762306a36Sopenharmony_ci} 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci/* 74062306a36Sopenharmony_ci * Try to grab an inode in a manner that avoids races with physical inode 74162306a36Sopenharmony_ci * allocation. If we can't, return the locked AGI buffer so that the caller 74262306a36Sopenharmony_ci * can single-step the loading process to see where things went wrong. 74362306a36Sopenharmony_ci * Callers must have a valid scrub transaction. 74462306a36Sopenharmony_ci * 74562306a36Sopenharmony_ci * If the iget succeeds, return 0, a NULL AGI, and the inode. 74662306a36Sopenharmony_ci * 74762306a36Sopenharmony_ci * If the iget fails, return the error, the locked AGI, and a NULL inode. This 74862306a36Sopenharmony_ci * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are 74962306a36Sopenharmony_ci * no longer allocated; or any other corruption or runtime error. 75062306a36Sopenharmony_ci * 75162306a36Sopenharmony_ci * If the AGI read fails, return the error, a NULL AGI, and NULL inode. 75262306a36Sopenharmony_ci * 75362306a36Sopenharmony_ci * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode. 75462306a36Sopenharmony_ci */ 75562306a36Sopenharmony_ciint 75662306a36Sopenharmony_cixchk_iget_agi( 75762306a36Sopenharmony_ci struct xfs_scrub *sc, 75862306a36Sopenharmony_ci xfs_ino_t inum, 75962306a36Sopenharmony_ci struct xfs_buf **agi_bpp, 76062306a36Sopenharmony_ci struct xfs_inode **ipp) 76162306a36Sopenharmony_ci{ 76262306a36Sopenharmony_ci struct xfs_mount *mp = sc->mp; 76362306a36Sopenharmony_ci struct xfs_trans *tp = sc->tp; 76462306a36Sopenharmony_ci struct xfs_perag *pag; 76562306a36Sopenharmony_ci int error; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci ASSERT(sc->tp != NULL); 76862306a36Sopenharmony_ci 76962306a36Sopenharmony_ciagain: 77062306a36Sopenharmony_ci *agi_bpp = NULL; 77162306a36Sopenharmony_ci *ipp = NULL; 77262306a36Sopenharmony_ci error = 0; 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci if (xchk_should_terminate(sc, &error)) 77562306a36Sopenharmony_ci return error; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci /* 77862306a36Sopenharmony_ci * Attach the AGI buffer to the scrub transaction to avoid deadlocks 77962306a36Sopenharmony_ci * in the iget cache miss path. 78062306a36Sopenharmony_ci */ 78162306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 78262306a36Sopenharmony_ci error = xfs_ialloc_read_agi(pag, tp, agi_bpp); 78362306a36Sopenharmony_ci xfs_perag_put(pag); 78462306a36Sopenharmony_ci if (error) 78562306a36Sopenharmony_ci return error; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci error = xfs_iget(mp, tp, inum, 78862306a36Sopenharmony_ci XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp); 78962306a36Sopenharmony_ci if (error == -EAGAIN) { 79062306a36Sopenharmony_ci /* 79162306a36Sopenharmony_ci * The inode may be in core but temporarily unavailable and may 79262306a36Sopenharmony_ci * require the AGI buffer before it can be returned. Drop the 79362306a36Sopenharmony_ci * AGI buffer and retry the lookup. 79462306a36Sopenharmony_ci * 79562306a36Sopenharmony_ci * Incore lookup will fail with EAGAIN on a cache hit if the 79662306a36Sopenharmony_ci * inode is queued to the inactivation list. The inactivation 79762306a36Sopenharmony_ci * worker may remove the inode from the unlinked list and hence 79862306a36Sopenharmony_ci * needs the AGI. 79962306a36Sopenharmony_ci * 80062306a36Sopenharmony_ci * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN 80162306a36Sopenharmony_ci * to allow inodegc to make progress and move the inode to 80262306a36Sopenharmony_ci * IRECLAIMABLE state where xfs_iget will be able to return it 80362306a36Sopenharmony_ci * again if it can lock the inode. 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_ci xfs_trans_brelse(tp, *agi_bpp); 80662306a36Sopenharmony_ci delay(1); 80762306a36Sopenharmony_ci goto again; 80862306a36Sopenharmony_ci } 80962306a36Sopenharmony_ci if (error) 81062306a36Sopenharmony_ci return error; 81162306a36Sopenharmony_ci 81262306a36Sopenharmony_ci /* We got the inode, so we can release the AGI. */ 81362306a36Sopenharmony_ci ASSERT(*ipp != NULL); 81462306a36Sopenharmony_ci xfs_trans_brelse(tp, *agi_bpp); 81562306a36Sopenharmony_ci *agi_bpp = NULL; 81662306a36Sopenharmony_ci return 0; 81762306a36Sopenharmony_ci} 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci/* Install an inode that we opened by handle for scrubbing. */ 82062306a36Sopenharmony_ciint 82162306a36Sopenharmony_cixchk_install_handle_inode( 82262306a36Sopenharmony_ci struct xfs_scrub *sc, 82362306a36Sopenharmony_ci struct xfs_inode *ip) 82462306a36Sopenharmony_ci{ 82562306a36Sopenharmony_ci if (VFS_I(ip)->i_generation != sc->sm->sm_gen) { 82662306a36Sopenharmony_ci xchk_irele(sc, ip); 82762306a36Sopenharmony_ci return -ENOENT; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci 83062306a36Sopenharmony_ci sc->ip = ip; 83162306a36Sopenharmony_ci return 0; 83262306a36Sopenharmony_ci} 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci/* 83562306a36Sopenharmony_ci * Install an already-referenced inode for scrubbing. Get our own reference to 83662306a36Sopenharmony_ci * the inode to make disposal simpler. The inode must not be in I_FREEING or 83762306a36Sopenharmony_ci * I_WILL_FREE state! 83862306a36Sopenharmony_ci */ 83962306a36Sopenharmony_ciint 84062306a36Sopenharmony_cixchk_install_live_inode( 84162306a36Sopenharmony_ci struct xfs_scrub *sc, 84262306a36Sopenharmony_ci struct xfs_inode *ip) 84362306a36Sopenharmony_ci{ 84462306a36Sopenharmony_ci if (!igrab(VFS_I(ip))) { 84562306a36Sopenharmony_ci xchk_ino_set_corrupt(sc, ip->i_ino); 84662306a36Sopenharmony_ci return -EFSCORRUPTED; 84762306a36Sopenharmony_ci } 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci sc->ip = ip; 85062306a36Sopenharmony_ci return 0; 85162306a36Sopenharmony_ci} 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci/* 85462306a36Sopenharmony_ci * In preparation to scrub metadata structures that hang off of an inode, 85562306a36Sopenharmony_ci * grab either the inode referenced in the scrub control structure or the 85662306a36Sopenharmony_ci * inode passed in. If the inumber does not reference an allocated inode 85762306a36Sopenharmony_ci * record, the function returns ENOENT to end the scrub early. The inode 85862306a36Sopenharmony_ci * is not locked. 85962306a36Sopenharmony_ci */ 86062306a36Sopenharmony_ciint 86162306a36Sopenharmony_cixchk_iget_for_scrubbing( 86262306a36Sopenharmony_ci struct xfs_scrub *sc) 86362306a36Sopenharmony_ci{ 86462306a36Sopenharmony_ci struct xfs_imap imap; 86562306a36Sopenharmony_ci struct xfs_mount *mp = sc->mp; 86662306a36Sopenharmony_ci struct xfs_perag *pag; 86762306a36Sopenharmony_ci struct xfs_buf *agi_bp; 86862306a36Sopenharmony_ci struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); 86962306a36Sopenharmony_ci struct xfs_inode *ip = NULL; 87062306a36Sopenharmony_ci xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino); 87162306a36Sopenharmony_ci int error; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci ASSERT(sc->tp == NULL); 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci /* We want to scan the inode we already had opened. */ 87662306a36Sopenharmony_ci if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) 87762306a36Sopenharmony_ci return xchk_install_live_inode(sc, ip_in); 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci /* Reject internal metadata files and obviously bad inode numbers. */ 88062306a36Sopenharmony_ci if (xfs_internal_inum(mp, sc->sm->sm_ino)) 88162306a36Sopenharmony_ci return -ENOENT; 88262306a36Sopenharmony_ci if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) 88362306a36Sopenharmony_ci return -ENOENT; 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci /* Try a regular untrusted iget. */ 88662306a36Sopenharmony_ci error = xchk_iget(sc, sc->sm->sm_ino, &ip); 88762306a36Sopenharmony_ci if (!error) 88862306a36Sopenharmony_ci return xchk_install_handle_inode(sc, ip); 88962306a36Sopenharmony_ci if (error == -ENOENT) 89062306a36Sopenharmony_ci return error; 89162306a36Sopenharmony_ci if (error != -EINVAL) 89262306a36Sopenharmony_ci goto out_error; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci /* 89562306a36Sopenharmony_ci * EINVAL with IGET_UNTRUSTED probably means one of several things: 89662306a36Sopenharmony_ci * userspace gave us an inode number that doesn't correspond to fs 89762306a36Sopenharmony_ci * space; the inode btree lacks a record for this inode; or there is a 89862306a36Sopenharmony_ci * record, and it says this inode is free. 89962306a36Sopenharmony_ci * 90062306a36Sopenharmony_ci * We want to look up this inode in the inobt to distinguish two 90162306a36Sopenharmony_ci * scenarios: (1) the inobt says the inode is free, in which case 90262306a36Sopenharmony_ci * there's nothing to do; and (2) the inobt says the inode is 90362306a36Sopenharmony_ci * allocated, but loading it failed due to corruption. 90462306a36Sopenharmony_ci * 90562306a36Sopenharmony_ci * Allocate a transaction and grab the AGI to prevent inobt activity 90662306a36Sopenharmony_ci * in this AG. Retry the iget in case someone allocated a new inode 90762306a36Sopenharmony_ci * after the first iget failed. 90862306a36Sopenharmony_ci */ 90962306a36Sopenharmony_ci error = xchk_trans_alloc(sc, 0); 91062306a36Sopenharmony_ci if (error) 91162306a36Sopenharmony_ci goto out_error; 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip); 91462306a36Sopenharmony_ci if (error == 0) { 91562306a36Sopenharmony_ci /* Actually got the inode, so install it. */ 91662306a36Sopenharmony_ci xchk_trans_cancel(sc); 91762306a36Sopenharmony_ci return xchk_install_handle_inode(sc, ip); 91862306a36Sopenharmony_ci } 91962306a36Sopenharmony_ci if (error == -ENOENT) 92062306a36Sopenharmony_ci goto out_gone; 92162306a36Sopenharmony_ci if (error != -EINVAL) 92262306a36Sopenharmony_ci goto out_cancel; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci /* Ensure that we have protected against inode allocation/freeing. */ 92562306a36Sopenharmony_ci if (agi_bp == NULL) { 92662306a36Sopenharmony_ci ASSERT(agi_bp != NULL); 92762306a36Sopenharmony_ci error = -ECANCELED; 92862306a36Sopenharmony_ci goto out_cancel; 92962306a36Sopenharmony_ci } 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci /* 93262306a36Sopenharmony_ci * Untrusted iget failed a second time. Let's try an inobt lookup. 93362306a36Sopenharmony_ci * If the inobt thinks this the inode neither can exist inside the 93462306a36Sopenharmony_ci * filesystem nor is allocated, return ENOENT to signal that the check 93562306a36Sopenharmony_ci * can be skipped. 93662306a36Sopenharmony_ci * 93762306a36Sopenharmony_ci * If the lookup returns corruption, we'll mark this inode corrupt and 93862306a36Sopenharmony_ci * exit to userspace. There's little chance of fixing anything until 93962306a36Sopenharmony_ci * the inobt is straightened out, but there's nothing we can do here. 94062306a36Sopenharmony_ci * 94162306a36Sopenharmony_ci * If the lookup encounters any other error, exit to userspace. 94262306a36Sopenharmony_ci * 94362306a36Sopenharmony_ci * If the lookup succeeds, something else must be very wrong in the fs 94462306a36Sopenharmony_ci * such that setting up the incore inode failed in some strange way. 94562306a36Sopenharmony_ci * Treat those as corruptions. 94662306a36Sopenharmony_ci */ 94762306a36Sopenharmony_ci pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); 94862306a36Sopenharmony_ci if (!pag) { 94962306a36Sopenharmony_ci error = -EFSCORRUPTED; 95062306a36Sopenharmony_ci goto out_cancel; 95162306a36Sopenharmony_ci } 95262306a36Sopenharmony_ci 95362306a36Sopenharmony_ci error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, 95462306a36Sopenharmony_ci XFS_IGET_UNTRUSTED); 95562306a36Sopenharmony_ci xfs_perag_put(pag); 95662306a36Sopenharmony_ci if (error == -EINVAL || error == -ENOENT) 95762306a36Sopenharmony_ci goto out_gone; 95862306a36Sopenharmony_ci if (!error) 95962306a36Sopenharmony_ci error = -EFSCORRUPTED; 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ciout_cancel: 96262306a36Sopenharmony_ci xchk_trans_cancel(sc); 96362306a36Sopenharmony_ciout_error: 96462306a36Sopenharmony_ci trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 96562306a36Sopenharmony_ci error, __return_address); 96662306a36Sopenharmony_ci return error; 96762306a36Sopenharmony_ciout_gone: 96862306a36Sopenharmony_ci /* The file is gone, so there's nothing to check. */ 96962306a36Sopenharmony_ci xchk_trans_cancel(sc); 97062306a36Sopenharmony_ci return -ENOENT; 97162306a36Sopenharmony_ci} 97262306a36Sopenharmony_ci 97362306a36Sopenharmony_ci/* Release an inode, possibly dropping it in the process. */ 97462306a36Sopenharmony_civoid 97562306a36Sopenharmony_cixchk_irele( 97662306a36Sopenharmony_ci struct xfs_scrub *sc, 97762306a36Sopenharmony_ci struct xfs_inode *ip) 97862306a36Sopenharmony_ci{ 97962306a36Sopenharmony_ci if (current->journal_info != NULL) { 98062306a36Sopenharmony_ci ASSERT(current->journal_info == sc->tp); 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_ci /* 98362306a36Sopenharmony_ci * If we are in a transaction, we /cannot/ drop the inode 98462306a36Sopenharmony_ci * ourselves, because the VFS will trigger writeback, which 98562306a36Sopenharmony_ci * can require a transaction. Clear DONTCACHE to force the 98662306a36Sopenharmony_ci * inode to the LRU, where someone else can take care of 98762306a36Sopenharmony_ci * dropping it. 98862306a36Sopenharmony_ci * 98962306a36Sopenharmony_ci * Note that when we grabbed our reference to the inode, it 99062306a36Sopenharmony_ci * could have had an active ref and DONTCACHE set if a sysadmin 99162306a36Sopenharmony_ci * is trying to coerce a change in file access mode. icache 99262306a36Sopenharmony_ci * hits do not clear DONTCACHE, so we must do it here. 99362306a36Sopenharmony_ci */ 99462306a36Sopenharmony_ci spin_lock(&VFS_I(ip)->i_lock); 99562306a36Sopenharmony_ci VFS_I(ip)->i_state &= ~I_DONTCACHE; 99662306a36Sopenharmony_ci spin_unlock(&VFS_I(ip)->i_lock); 99762306a36Sopenharmony_ci } else if (atomic_read(&VFS_I(ip)->i_count) == 1) { 99862306a36Sopenharmony_ci /* 99962306a36Sopenharmony_ci * If this is the last reference to the inode and the caller 100062306a36Sopenharmony_ci * permits it, set DONTCACHE to avoid thrashing. 100162306a36Sopenharmony_ci */ 100262306a36Sopenharmony_ci d_mark_dontcache(VFS_I(ip)); 100362306a36Sopenharmony_ci } 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci xfs_irele(ip); 100662306a36Sopenharmony_ci} 100762306a36Sopenharmony_ci 100862306a36Sopenharmony_ci/* 100962306a36Sopenharmony_ci * Set us up to scrub metadata mapped by a file's fork. Callers must not use 101062306a36Sopenharmony_ci * this to operate on user-accessible regular file data because the MMAPLOCK is 101162306a36Sopenharmony_ci * not taken. 101262306a36Sopenharmony_ci */ 101362306a36Sopenharmony_ciint 101462306a36Sopenharmony_cixchk_setup_inode_contents( 101562306a36Sopenharmony_ci struct xfs_scrub *sc, 101662306a36Sopenharmony_ci unsigned int resblks) 101762306a36Sopenharmony_ci{ 101862306a36Sopenharmony_ci int error; 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci error = xchk_iget_for_scrubbing(sc); 102162306a36Sopenharmony_ci if (error) 102262306a36Sopenharmony_ci return error; 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci /* Lock the inode so the VFS cannot touch this file. */ 102562306a36Sopenharmony_ci xchk_ilock(sc, XFS_IOLOCK_EXCL); 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci error = xchk_trans_alloc(sc, resblks); 102862306a36Sopenharmony_ci if (error) 102962306a36Sopenharmony_ci goto out; 103062306a36Sopenharmony_ci xchk_ilock(sc, XFS_ILOCK_EXCL); 103162306a36Sopenharmony_ciout: 103262306a36Sopenharmony_ci /* scrub teardown will unlock and release the inode for us */ 103362306a36Sopenharmony_ci return error; 103462306a36Sopenharmony_ci} 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_civoid 103762306a36Sopenharmony_cixchk_ilock( 103862306a36Sopenharmony_ci struct xfs_scrub *sc, 103962306a36Sopenharmony_ci unsigned int ilock_flags) 104062306a36Sopenharmony_ci{ 104162306a36Sopenharmony_ci xfs_ilock(sc->ip, ilock_flags); 104262306a36Sopenharmony_ci sc->ilock_flags |= ilock_flags; 104362306a36Sopenharmony_ci} 104462306a36Sopenharmony_ci 104562306a36Sopenharmony_cibool 104662306a36Sopenharmony_cixchk_ilock_nowait( 104762306a36Sopenharmony_ci struct xfs_scrub *sc, 104862306a36Sopenharmony_ci unsigned int ilock_flags) 104962306a36Sopenharmony_ci{ 105062306a36Sopenharmony_ci if (xfs_ilock_nowait(sc->ip, ilock_flags)) { 105162306a36Sopenharmony_ci sc->ilock_flags |= ilock_flags; 105262306a36Sopenharmony_ci return true; 105362306a36Sopenharmony_ci } 105462306a36Sopenharmony_ci 105562306a36Sopenharmony_ci return false; 105662306a36Sopenharmony_ci} 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_civoid 105962306a36Sopenharmony_cixchk_iunlock( 106062306a36Sopenharmony_ci struct xfs_scrub *sc, 106162306a36Sopenharmony_ci unsigned int ilock_flags) 106262306a36Sopenharmony_ci{ 106362306a36Sopenharmony_ci sc->ilock_flags &= ~ilock_flags; 106462306a36Sopenharmony_ci xfs_iunlock(sc->ip, ilock_flags); 106562306a36Sopenharmony_ci} 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci/* 106862306a36Sopenharmony_ci * Predicate that decides if we need to evaluate the cross-reference check. 106962306a36Sopenharmony_ci * If there was an error accessing the cross-reference btree, just delete 107062306a36Sopenharmony_ci * the cursor and skip the check. 107162306a36Sopenharmony_ci */ 107262306a36Sopenharmony_cibool 107362306a36Sopenharmony_cixchk_should_check_xref( 107462306a36Sopenharmony_ci struct xfs_scrub *sc, 107562306a36Sopenharmony_ci int *error, 107662306a36Sopenharmony_ci struct xfs_btree_cur **curpp) 107762306a36Sopenharmony_ci{ 107862306a36Sopenharmony_ci /* No point in xref if we already know we're corrupt. */ 107962306a36Sopenharmony_ci if (xchk_skip_xref(sc->sm)) 108062306a36Sopenharmony_ci return false; 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci if (*error == 0) 108362306a36Sopenharmony_ci return true; 108462306a36Sopenharmony_ci 108562306a36Sopenharmony_ci if (curpp) { 108662306a36Sopenharmony_ci /* If we've already given up on xref, just bail out. */ 108762306a36Sopenharmony_ci if (!*curpp) 108862306a36Sopenharmony_ci return false; 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci /* xref error, delete cursor and bail out. */ 109162306a36Sopenharmony_ci xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR); 109262306a36Sopenharmony_ci *curpp = NULL; 109362306a36Sopenharmony_ci } 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 109662306a36Sopenharmony_ci trace_xchk_xref_error(sc, *error, __return_address); 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci /* 109962306a36Sopenharmony_ci * Errors encountered during cross-referencing with another 110062306a36Sopenharmony_ci * data structure should not cause this scrubber to abort. 110162306a36Sopenharmony_ci */ 110262306a36Sopenharmony_ci *error = 0; 110362306a36Sopenharmony_ci return false; 110462306a36Sopenharmony_ci} 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci/* Run the structure verifiers on in-memory buffers to detect bad memory. */ 110762306a36Sopenharmony_civoid 110862306a36Sopenharmony_cixchk_buffer_recheck( 110962306a36Sopenharmony_ci struct xfs_scrub *sc, 111062306a36Sopenharmony_ci struct xfs_buf *bp) 111162306a36Sopenharmony_ci{ 111262306a36Sopenharmony_ci xfs_failaddr_t fa; 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci if (bp->b_ops == NULL) { 111562306a36Sopenharmony_ci xchk_block_set_corrupt(sc, bp); 111662306a36Sopenharmony_ci return; 111762306a36Sopenharmony_ci } 111862306a36Sopenharmony_ci if (bp->b_ops->verify_struct == NULL) { 111962306a36Sopenharmony_ci xchk_set_incomplete(sc); 112062306a36Sopenharmony_ci return; 112162306a36Sopenharmony_ci } 112262306a36Sopenharmony_ci fa = bp->b_ops->verify_struct(bp); 112362306a36Sopenharmony_ci if (!fa) 112462306a36Sopenharmony_ci return; 112562306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 112662306a36Sopenharmony_ci trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa); 112762306a36Sopenharmony_ci} 112862306a36Sopenharmony_ci 112962306a36Sopenharmony_cistatic inline int 113062306a36Sopenharmony_cixchk_metadata_inode_subtype( 113162306a36Sopenharmony_ci struct xfs_scrub *sc, 113262306a36Sopenharmony_ci unsigned int scrub_type) 113362306a36Sopenharmony_ci{ 113462306a36Sopenharmony_ci __u32 smtype = sc->sm->sm_type; 113562306a36Sopenharmony_ci int error; 113662306a36Sopenharmony_ci 113762306a36Sopenharmony_ci sc->sm->sm_type = scrub_type; 113862306a36Sopenharmony_ci 113962306a36Sopenharmony_ci switch (scrub_type) { 114062306a36Sopenharmony_ci case XFS_SCRUB_TYPE_INODE: 114162306a36Sopenharmony_ci error = xchk_inode(sc); 114262306a36Sopenharmony_ci break; 114362306a36Sopenharmony_ci case XFS_SCRUB_TYPE_BMBTD: 114462306a36Sopenharmony_ci error = xchk_bmap_data(sc); 114562306a36Sopenharmony_ci break; 114662306a36Sopenharmony_ci default: 114762306a36Sopenharmony_ci ASSERT(0); 114862306a36Sopenharmony_ci error = -EFSCORRUPTED; 114962306a36Sopenharmony_ci break; 115062306a36Sopenharmony_ci } 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci sc->sm->sm_type = smtype; 115362306a36Sopenharmony_ci return error; 115462306a36Sopenharmony_ci} 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci/* 115762306a36Sopenharmony_ci * Scrub the attr/data forks of a metadata inode. The metadata inode must be 115862306a36Sopenharmony_ci * pointed to by sc->ip and the ILOCK must be held. 115962306a36Sopenharmony_ci */ 116062306a36Sopenharmony_ciint 116162306a36Sopenharmony_cixchk_metadata_inode_forks( 116262306a36Sopenharmony_ci struct xfs_scrub *sc) 116362306a36Sopenharmony_ci{ 116462306a36Sopenharmony_ci bool shared; 116562306a36Sopenharmony_ci int error; 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_ci if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 116862306a36Sopenharmony_ci return 0; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci /* Check the inode record. */ 117162306a36Sopenharmony_ci error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE); 117262306a36Sopenharmony_ci if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 117362306a36Sopenharmony_ci return error; 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_ci /* Metadata inodes don't live on the rt device. */ 117662306a36Sopenharmony_ci if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) { 117762306a36Sopenharmony_ci xchk_ino_set_corrupt(sc, sc->ip->i_ino); 117862306a36Sopenharmony_ci return 0; 117962306a36Sopenharmony_ci } 118062306a36Sopenharmony_ci 118162306a36Sopenharmony_ci /* They should never participate in reflink. */ 118262306a36Sopenharmony_ci if (xfs_is_reflink_inode(sc->ip)) { 118362306a36Sopenharmony_ci xchk_ino_set_corrupt(sc, sc->ip->i_ino); 118462306a36Sopenharmony_ci return 0; 118562306a36Sopenharmony_ci } 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci /* They also should never have extended attributes. */ 118862306a36Sopenharmony_ci if (xfs_inode_hasattr(sc->ip)) { 118962306a36Sopenharmony_ci xchk_ino_set_corrupt(sc, sc->ip->i_ino); 119062306a36Sopenharmony_ci return 0; 119162306a36Sopenharmony_ci } 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci /* Invoke the data fork scrubber. */ 119462306a36Sopenharmony_ci error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); 119562306a36Sopenharmony_ci if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 119662306a36Sopenharmony_ci return error; 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci /* Look for incorrect shared blocks. */ 119962306a36Sopenharmony_ci if (xfs_has_reflink(sc->mp)) { 120062306a36Sopenharmony_ci error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 120162306a36Sopenharmony_ci &shared); 120262306a36Sopenharmony_ci if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, 120362306a36Sopenharmony_ci &error)) 120462306a36Sopenharmony_ci return error; 120562306a36Sopenharmony_ci if (shared) 120662306a36Sopenharmony_ci xchk_ino_set_corrupt(sc, sc->ip->i_ino); 120762306a36Sopenharmony_ci } 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_ci return 0; 121062306a36Sopenharmony_ci} 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci/* 121362306a36Sopenharmony_ci * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub 121462306a36Sopenharmony_ci * operation. Callers must not hold any locks that intersect with the CPU 121562306a36Sopenharmony_ci * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs 121662306a36Sopenharmony_ci * to change kernel code. 121762306a36Sopenharmony_ci */ 121862306a36Sopenharmony_civoid 121962306a36Sopenharmony_cixchk_fsgates_enable( 122062306a36Sopenharmony_ci struct xfs_scrub *sc, 122162306a36Sopenharmony_ci unsigned int scrub_fsgates) 122262306a36Sopenharmony_ci{ 122362306a36Sopenharmony_ci ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL)); 122462306a36Sopenharmony_ci ASSERT(!(sc->flags & scrub_fsgates)); 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci trace_xchk_fsgates_enable(sc, scrub_fsgates); 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_ci if (scrub_fsgates & XCHK_FSGATES_DRAIN) 122962306a36Sopenharmony_ci xfs_drain_wait_enable(); 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci sc->flags |= scrub_fsgates; 123262306a36Sopenharmony_ci} 123362306a36Sopenharmony_ci 123462306a36Sopenharmony_ci/* 123562306a36Sopenharmony_ci * Decide if this is this a cached inode that's also allocated. The caller 123662306a36Sopenharmony_ci * must hold a reference to an AG and the AGI buffer lock to prevent inodes 123762306a36Sopenharmony_ci * from being allocated or freed. 123862306a36Sopenharmony_ci * 123962306a36Sopenharmony_ci * Look up an inode by number in the given file system. If the inode number 124062306a36Sopenharmony_ci * is invalid, return -EINVAL. If the inode is not in cache, return -ENODATA. 124162306a36Sopenharmony_ci * If the inode is being reclaimed, return -ENODATA because we know the inode 124262306a36Sopenharmony_ci * cache cannot be updating the ondisk metadata. 124362306a36Sopenharmony_ci * 124462306a36Sopenharmony_ci * Otherwise, the incore inode is the one we want, and it is either live, 124562306a36Sopenharmony_ci * somewhere in the inactivation machinery, or reclaimable. The inode is 124662306a36Sopenharmony_ci * allocated if i_mode is nonzero. In all three cases, the cached inode will 124762306a36Sopenharmony_ci * be more up to date than the ondisk inode buffer, so we must use the incore 124862306a36Sopenharmony_ci * i_mode. 124962306a36Sopenharmony_ci */ 125062306a36Sopenharmony_ciint 125162306a36Sopenharmony_cixchk_inode_is_allocated( 125262306a36Sopenharmony_ci struct xfs_scrub *sc, 125362306a36Sopenharmony_ci xfs_agino_t agino, 125462306a36Sopenharmony_ci bool *inuse) 125562306a36Sopenharmony_ci{ 125662306a36Sopenharmony_ci struct xfs_mount *mp = sc->mp; 125762306a36Sopenharmony_ci struct xfs_perag *pag = sc->sa.pag; 125862306a36Sopenharmony_ci xfs_ino_t ino; 125962306a36Sopenharmony_ci struct xfs_inode *ip; 126062306a36Sopenharmony_ci int error; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci /* caller must hold perag reference */ 126362306a36Sopenharmony_ci if (pag == NULL) { 126462306a36Sopenharmony_ci ASSERT(pag != NULL); 126562306a36Sopenharmony_ci return -EINVAL; 126662306a36Sopenharmony_ci } 126762306a36Sopenharmony_ci 126862306a36Sopenharmony_ci /* caller must have AGI buffer */ 126962306a36Sopenharmony_ci if (sc->sa.agi_bp == NULL) { 127062306a36Sopenharmony_ci ASSERT(sc->sa.agi_bp != NULL); 127162306a36Sopenharmony_ci return -EINVAL; 127262306a36Sopenharmony_ci } 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci /* reject inode numbers outside existing AGs */ 127562306a36Sopenharmony_ci ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino); 127662306a36Sopenharmony_ci if (!xfs_verify_ino(mp, ino)) 127762306a36Sopenharmony_ci return -EINVAL; 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_ci error = -ENODATA; 128062306a36Sopenharmony_ci rcu_read_lock(); 128162306a36Sopenharmony_ci ip = radix_tree_lookup(&pag->pag_ici_root, agino); 128262306a36Sopenharmony_ci if (!ip) { 128362306a36Sopenharmony_ci /* cache miss */ 128462306a36Sopenharmony_ci goto out_rcu; 128562306a36Sopenharmony_ci } 128662306a36Sopenharmony_ci 128762306a36Sopenharmony_ci /* 128862306a36Sopenharmony_ci * If the inode number doesn't match, the incore inode got reused 128962306a36Sopenharmony_ci * during an RCU grace period and the radix tree hasn't been updated. 129062306a36Sopenharmony_ci * This isn't the inode we want. 129162306a36Sopenharmony_ci */ 129262306a36Sopenharmony_ci spin_lock(&ip->i_flags_lock); 129362306a36Sopenharmony_ci if (ip->i_ino != ino) 129462306a36Sopenharmony_ci goto out_skip; 129562306a36Sopenharmony_ci 129662306a36Sopenharmony_ci trace_xchk_inode_is_allocated(ip); 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci /* 129962306a36Sopenharmony_ci * We have an incore inode that matches the inode we want, and the 130062306a36Sopenharmony_ci * caller holds the perag structure and the AGI buffer. Let's check 130162306a36Sopenharmony_ci * our assumptions below: 130262306a36Sopenharmony_ci */ 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci#ifdef DEBUG 130562306a36Sopenharmony_ci /* 130662306a36Sopenharmony_ci * (1) If the incore inode is live (i.e. referenced from the dcache), 130762306a36Sopenharmony_ci * it will not be INEW, nor will it be in the inactivation or reclaim 130862306a36Sopenharmony_ci * machinery. The ondisk inode had better be allocated. This is the 130962306a36Sopenharmony_ci * most trivial case. 131062306a36Sopenharmony_ci */ 131162306a36Sopenharmony_ci if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE | 131262306a36Sopenharmony_ci XFS_INACTIVATING))) { 131362306a36Sopenharmony_ci /* live inode */ 131462306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_mode != 0); 131562306a36Sopenharmony_ci } 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci /* 131862306a36Sopenharmony_ci * If the incore inode is INEW, there are several possibilities: 131962306a36Sopenharmony_ci * 132062306a36Sopenharmony_ci * (2) For a file that is being created, note that we allocate the 132162306a36Sopenharmony_ci * ondisk inode before allocating, initializing, and adding the incore 132262306a36Sopenharmony_ci * inode to the radix tree. 132362306a36Sopenharmony_ci * 132462306a36Sopenharmony_ci * (3) If the incore inode is being recycled, the inode has to be 132562306a36Sopenharmony_ci * allocated because we don't allow freed inodes to be recycled. 132662306a36Sopenharmony_ci * Recycling doesn't touch i_mode. 132762306a36Sopenharmony_ci */ 132862306a36Sopenharmony_ci if (ip->i_flags & XFS_INEW) { 132962306a36Sopenharmony_ci /* created on disk already or recycling */ 133062306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_mode != 0); 133162306a36Sopenharmony_ci } 133262306a36Sopenharmony_ci 133362306a36Sopenharmony_ci /* 133462306a36Sopenharmony_ci * (4) If the inode is queued for inactivation (NEED_INACTIVE) but 133562306a36Sopenharmony_ci * inactivation has not started (!INACTIVATING), it is still allocated. 133662306a36Sopenharmony_ci */ 133762306a36Sopenharmony_ci if ((ip->i_flags & XFS_NEED_INACTIVE) && 133862306a36Sopenharmony_ci !(ip->i_flags & XFS_INACTIVATING)) { 133962306a36Sopenharmony_ci /* definitely before difree */ 134062306a36Sopenharmony_ci ASSERT(VFS_I(ip)->i_mode != 0); 134162306a36Sopenharmony_ci } 134262306a36Sopenharmony_ci#endif 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci /* 134562306a36Sopenharmony_ci * If the incore inode is undergoing inactivation (INACTIVATING), there 134662306a36Sopenharmony_ci * are two possibilities: 134762306a36Sopenharmony_ci * 134862306a36Sopenharmony_ci * (5) It is before the point where it would get freed ondisk, in which 134962306a36Sopenharmony_ci * case i_mode is still nonzero. 135062306a36Sopenharmony_ci * 135162306a36Sopenharmony_ci * (6) It has already been freed, in which case i_mode is zero. 135262306a36Sopenharmony_ci * 135362306a36Sopenharmony_ci * We don't take the ILOCK here, but difree and dialloc update the AGI, 135462306a36Sopenharmony_ci * and we've taken the AGI buffer lock, which prevents that from 135562306a36Sopenharmony_ci * happening. 135662306a36Sopenharmony_ci */ 135762306a36Sopenharmony_ci 135862306a36Sopenharmony_ci /* 135962306a36Sopenharmony_ci * (7) Inodes undergoing inactivation (INACTIVATING) or queued for 136062306a36Sopenharmony_ci * reclaim (IRECLAIMABLE) could be allocated or free. i_mode still 136162306a36Sopenharmony_ci * reflects the ondisk state. 136262306a36Sopenharmony_ci */ 136362306a36Sopenharmony_ci 136462306a36Sopenharmony_ci /* 136562306a36Sopenharmony_ci * (8) If the inode is in IFLUSHING, it's safe to query i_mode because 136662306a36Sopenharmony_ci * the flush code uses i_mode to format the ondisk inode. 136762306a36Sopenharmony_ci */ 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci /* 137062306a36Sopenharmony_ci * (9) If the inode is in IRECLAIM and was reachable via the radix 137162306a36Sopenharmony_ci * tree, it still has the same i_mode as it did before it entered 137262306a36Sopenharmony_ci * reclaim. The inode object is still alive because we hold the RCU 137362306a36Sopenharmony_ci * read lock. 137462306a36Sopenharmony_ci */ 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci *inuse = VFS_I(ip)->i_mode != 0; 137762306a36Sopenharmony_ci error = 0; 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_ciout_skip: 138062306a36Sopenharmony_ci spin_unlock(&ip->i_flags_lock); 138162306a36Sopenharmony_ciout_rcu: 138262306a36Sopenharmony_ci rcu_read_unlock(); 138362306a36Sopenharmony_ci return error; 138462306a36Sopenharmony_ci} 1385