162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2019-2023 Oracle. All Rights Reserved. 462306a36Sopenharmony_ci * Author: Darrick J. Wong <djwong@kernel.org> 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#include "xfs.h" 762306a36Sopenharmony_ci#include "xfs_fs.h" 862306a36Sopenharmony_ci#include "xfs_shared.h" 962306a36Sopenharmony_ci#include "xfs_format.h" 1062306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1162306a36Sopenharmony_ci#include "xfs_mount.h" 1262306a36Sopenharmony_ci#include "xfs_btree.h" 1362306a36Sopenharmony_ci#include "xfs_trans_resv.h" 1462306a36Sopenharmony_ci#include "xfs_mount.h" 1562306a36Sopenharmony_ci#include "xfs_ag.h" 1662306a36Sopenharmony_ci#include "xfs_health.h" 1762306a36Sopenharmony_ci#include "scrub/scrub.h" 1862306a36Sopenharmony_ci#include "scrub/health.h" 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_ci/* 2162306a36Sopenharmony_ci * Scrub and In-Core Filesystem Health Assessments 2262306a36Sopenharmony_ci * =============================================== 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * Online scrub and repair have the time and the ability to perform stronger 2562306a36Sopenharmony_ci * checks than we can do from the metadata verifiers, because they can 2662306a36Sopenharmony_ci * cross-reference records between data structures. Therefore, scrub is in a 2762306a36Sopenharmony_ci * good position to update the online filesystem health assessments to reflect 2862306a36Sopenharmony_ci * the good/bad state of the data structure. 2962306a36Sopenharmony_ci * 3062306a36Sopenharmony_ci * We therefore extend scrub in the following ways to achieve this: 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci * 1. Create a "sick_mask" field in the scrub context. When we're setting up a 3362306a36Sopenharmony_ci * scrub call, set this to the default XFS_SICK_* flag(s) for the selected 3462306a36Sopenharmony_ci * scrub type (call it A). Scrub and repair functions can override the default 3562306a36Sopenharmony_ci * sick_mask value if they choose. 3662306a36Sopenharmony_ci * 3762306a36Sopenharmony_ci * 2. If the scrubber returns a runtime error code, we exit making no changes 3862306a36Sopenharmony_ci * to the incore sick state. 3962306a36Sopenharmony_ci * 4062306a36Sopenharmony_ci * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore 4162306a36Sopenharmony_ci * sick flags before exiting. 4262306a36Sopenharmony_ci * 4362306a36Sopenharmony_ci * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore 4462306a36Sopenharmony_ci * sick flags. If the user didn't want to repair then we exit, leaving the 4562306a36Sopenharmony_ci * metadata structure unfixed and the sick flag set. 4662306a36Sopenharmony_ci * 4762306a36Sopenharmony_ci * 5. Now we know that A is corrupt and the user wants to repair, so run the 4862306a36Sopenharmony_ci * repairer. If the repairer returns an error code, we exit with that error 4962306a36Sopenharmony_ci * code, having made no further changes to the incore sick state. 5062306a36Sopenharmony_ci * 5162306a36Sopenharmony_ci * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean, 5262306a36Sopenharmony_ci * use sick_mask to clear the incore sick flags. This should have the effect 5362306a36Sopenharmony_ci * that A is no longer marked sick. 5462306a36Sopenharmony_ci * 5562306a36Sopenharmony_ci * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and 5662306a36Sopenharmony_ci * use sick_mask to set the incore sick flags. This should have no externally 5762306a36Sopenharmony_ci * visible effect since we already set them in step (4). 5862306a36Sopenharmony_ci * 5962306a36Sopenharmony_ci * There are some complications to this story, however. For certain types of 6062306a36Sopenharmony_ci * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild 6162306a36Sopenharmony_ci * both structures at the same time. The following principles apply to this 6262306a36Sopenharmony_ci * type of repair strategy: 6362306a36Sopenharmony_ci * 6462306a36Sopenharmony_ci * 8. Any repair function that rebuilds multiple structures should update 6562306a36Sopenharmony_ci * sick_mask_visible to reflect whatever other structures are rebuilt, and 6662306a36Sopenharmony_ci * verify that all the rebuilt structures can pass a scrub check. The outcomes 6762306a36Sopenharmony_ci * of 5-7 still apply, but with a sick_mask that covers everything being 6862306a36Sopenharmony_ci * rebuilt. 6962306a36Sopenharmony_ci */ 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/* Map our scrub type to a sick mask and a set of health update functions. */ 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_cienum xchk_health_group { 7462306a36Sopenharmony_ci XHG_FS = 1, 7562306a36Sopenharmony_ci XHG_RT, 7662306a36Sopenharmony_ci XHG_AG, 7762306a36Sopenharmony_ci XHG_INO, 7862306a36Sopenharmony_ci}; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_cistruct xchk_health_map { 8162306a36Sopenharmony_ci enum xchk_health_group group; 8262306a36Sopenharmony_ci unsigned int sick_mask; 8362306a36Sopenharmony_ci}; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { 8662306a36Sopenharmony_ci [XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB }, 8762306a36Sopenharmony_ci [XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF }, 8862306a36Sopenharmony_ci [XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL }, 8962306a36Sopenharmony_ci [XFS_SCRUB_TYPE_AGI] = { XHG_AG, XFS_SICK_AG_AGI }, 9062306a36Sopenharmony_ci [XFS_SCRUB_TYPE_BNOBT] = { XHG_AG, XFS_SICK_AG_BNOBT }, 9162306a36Sopenharmony_ci [XFS_SCRUB_TYPE_CNTBT] = { XHG_AG, XFS_SICK_AG_CNTBT }, 9262306a36Sopenharmony_ci [XFS_SCRUB_TYPE_INOBT] = { XHG_AG, XFS_SICK_AG_INOBT }, 9362306a36Sopenharmony_ci [XFS_SCRUB_TYPE_FINOBT] = { XHG_AG, XFS_SICK_AG_FINOBT }, 9462306a36Sopenharmony_ci [XFS_SCRUB_TYPE_RMAPBT] = { XHG_AG, XFS_SICK_AG_RMAPBT }, 9562306a36Sopenharmony_ci [XFS_SCRUB_TYPE_REFCNTBT] = { XHG_AG, XFS_SICK_AG_REFCNTBT }, 9662306a36Sopenharmony_ci [XFS_SCRUB_TYPE_INODE] = { XHG_INO, XFS_SICK_INO_CORE }, 9762306a36Sopenharmony_ci [XFS_SCRUB_TYPE_BMBTD] = { XHG_INO, XFS_SICK_INO_BMBTD }, 9862306a36Sopenharmony_ci [XFS_SCRUB_TYPE_BMBTA] = { XHG_INO, XFS_SICK_INO_BMBTA }, 9962306a36Sopenharmony_ci [XFS_SCRUB_TYPE_BMBTC] = { XHG_INO, XFS_SICK_INO_BMBTC }, 10062306a36Sopenharmony_ci [XFS_SCRUB_TYPE_DIR] = { XHG_INO, XFS_SICK_INO_DIR }, 10162306a36Sopenharmony_ci [XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR }, 10262306a36Sopenharmony_ci [XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK }, 10362306a36Sopenharmony_ci [XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT }, 10462306a36Sopenharmony_ci [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP }, 10562306a36Sopenharmony_ci [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY }, 10662306a36Sopenharmony_ci [XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA }, 10762306a36Sopenharmony_ci [XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA }, 10862306a36Sopenharmony_ci [XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA }, 10962306a36Sopenharmony_ci [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS }, 11062306a36Sopenharmony_ci}; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci/* Return the health status mask for this scrub type. */ 11362306a36Sopenharmony_ciunsigned int 11462306a36Sopenharmony_cixchk_health_mask_for_scrub_type( 11562306a36Sopenharmony_ci __u32 scrub_type) 11662306a36Sopenharmony_ci{ 11762306a36Sopenharmony_ci return type_to_health_flag[scrub_type].sick_mask; 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/* 12162306a36Sopenharmony_ci * Update filesystem health assessments based on what we found and did. 12262306a36Sopenharmony_ci * 12362306a36Sopenharmony_ci * If the scrubber finds errors, we mark sick whatever's mentioned in 12462306a36Sopenharmony_ci * sick_mask, no matter whether this is a first scan or an 12562306a36Sopenharmony_ci * evaluation of repair effectiveness. 12662306a36Sopenharmony_ci * 12762306a36Sopenharmony_ci * Otherwise, no direct corruption was found, so mark whatever's in 12862306a36Sopenharmony_ci * sick_mask as healthy. 12962306a36Sopenharmony_ci */ 13062306a36Sopenharmony_civoid 13162306a36Sopenharmony_cixchk_update_health( 13262306a36Sopenharmony_ci struct xfs_scrub *sc) 13362306a36Sopenharmony_ci{ 13462306a36Sopenharmony_ci struct xfs_perag *pag; 13562306a36Sopenharmony_ci bool bad; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci if (!sc->sick_mask) 13862306a36Sopenharmony_ci return; 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci bad = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT | 14162306a36Sopenharmony_ci XFS_SCRUB_OFLAG_XCORRUPT)); 14262306a36Sopenharmony_ci switch (type_to_health_flag[sc->sm->sm_type].group) { 14362306a36Sopenharmony_ci case XHG_AG: 14462306a36Sopenharmony_ci pag = xfs_perag_get(sc->mp, sc->sm->sm_agno); 14562306a36Sopenharmony_ci if (bad) 14662306a36Sopenharmony_ci xfs_ag_mark_sick(pag, sc->sick_mask); 14762306a36Sopenharmony_ci else 14862306a36Sopenharmony_ci xfs_ag_mark_healthy(pag, sc->sick_mask); 14962306a36Sopenharmony_ci xfs_perag_put(pag); 15062306a36Sopenharmony_ci break; 15162306a36Sopenharmony_ci case XHG_INO: 15262306a36Sopenharmony_ci if (!sc->ip) 15362306a36Sopenharmony_ci return; 15462306a36Sopenharmony_ci if (bad) 15562306a36Sopenharmony_ci xfs_inode_mark_sick(sc->ip, sc->sick_mask); 15662306a36Sopenharmony_ci else 15762306a36Sopenharmony_ci xfs_inode_mark_healthy(sc->ip, sc->sick_mask); 15862306a36Sopenharmony_ci break; 15962306a36Sopenharmony_ci case XHG_FS: 16062306a36Sopenharmony_ci if (bad) 16162306a36Sopenharmony_ci xfs_fs_mark_sick(sc->mp, sc->sick_mask); 16262306a36Sopenharmony_ci else 16362306a36Sopenharmony_ci xfs_fs_mark_healthy(sc->mp, sc->sick_mask); 16462306a36Sopenharmony_ci break; 16562306a36Sopenharmony_ci case XHG_RT: 16662306a36Sopenharmony_ci if (bad) 16762306a36Sopenharmony_ci xfs_rt_mark_sick(sc->mp, sc->sick_mask); 16862306a36Sopenharmony_ci else 16962306a36Sopenharmony_ci xfs_rt_mark_healthy(sc->mp, sc->sick_mask); 17062306a36Sopenharmony_ci break; 17162306a36Sopenharmony_ci default: 17262306a36Sopenharmony_ci ASSERT(0); 17362306a36Sopenharmony_ci break; 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci} 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci/* Is the given per-AG btree healthy enough for scanning? */ 17862306a36Sopenharmony_cibool 17962306a36Sopenharmony_cixchk_ag_btree_healthy_enough( 18062306a36Sopenharmony_ci struct xfs_scrub *sc, 18162306a36Sopenharmony_ci struct xfs_perag *pag, 18262306a36Sopenharmony_ci xfs_btnum_t btnum) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci unsigned int mask = 0; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci /* 18762306a36Sopenharmony_ci * We always want the cursor if it's the same type as whatever we're 18862306a36Sopenharmony_ci * scrubbing, even if we already know the structure is corrupt. 18962306a36Sopenharmony_ci * 19062306a36Sopenharmony_ci * Otherwise, we're only interested in the btree for cross-referencing. 19162306a36Sopenharmony_ci * If we know the btree is bad then don't bother, just set XFAIL. 19262306a36Sopenharmony_ci */ 19362306a36Sopenharmony_ci switch (btnum) { 19462306a36Sopenharmony_ci case XFS_BTNUM_BNO: 19562306a36Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) 19662306a36Sopenharmony_ci return true; 19762306a36Sopenharmony_ci mask = XFS_SICK_AG_BNOBT; 19862306a36Sopenharmony_ci break; 19962306a36Sopenharmony_ci case XFS_BTNUM_CNT: 20062306a36Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT) 20162306a36Sopenharmony_ci return true; 20262306a36Sopenharmony_ci mask = XFS_SICK_AG_CNTBT; 20362306a36Sopenharmony_ci break; 20462306a36Sopenharmony_ci case XFS_BTNUM_INO: 20562306a36Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT) 20662306a36Sopenharmony_ci return true; 20762306a36Sopenharmony_ci mask = XFS_SICK_AG_INOBT; 20862306a36Sopenharmony_ci break; 20962306a36Sopenharmony_ci case XFS_BTNUM_FINO: 21062306a36Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) 21162306a36Sopenharmony_ci return true; 21262306a36Sopenharmony_ci mask = XFS_SICK_AG_FINOBT; 21362306a36Sopenharmony_ci break; 21462306a36Sopenharmony_ci case XFS_BTNUM_RMAP: 21562306a36Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT) 21662306a36Sopenharmony_ci return true; 21762306a36Sopenharmony_ci mask = XFS_SICK_AG_RMAPBT; 21862306a36Sopenharmony_ci break; 21962306a36Sopenharmony_ci case XFS_BTNUM_REFC: 22062306a36Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT) 22162306a36Sopenharmony_ci return true; 22262306a36Sopenharmony_ci mask = XFS_SICK_AG_REFCNTBT; 22362306a36Sopenharmony_ci break; 22462306a36Sopenharmony_ci default: 22562306a36Sopenharmony_ci ASSERT(0); 22662306a36Sopenharmony_ci return true; 22762306a36Sopenharmony_ci } 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci /* 23062306a36Sopenharmony_ci * If we just repaired some AG metadata, sc->sick_mask will reflect all 23162306a36Sopenharmony_ci * the per-AG metadata types that were repaired. Exclude these from 23262306a36Sopenharmony_ci * the filesystem health query because we have not yet updated the 23362306a36Sopenharmony_ci * health status and we want everything to be scanned. 23462306a36Sopenharmony_ci */ 23562306a36Sopenharmony_ci if ((sc->flags & XREP_ALREADY_FIXED) && 23662306a36Sopenharmony_ci type_to_health_flag[sc->sm->sm_type].group == XHG_AG) 23762306a36Sopenharmony_ci mask &= ~sc->sick_mask; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (xfs_ag_has_sickness(pag, mask)) { 24062306a36Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 24162306a36Sopenharmony_ci return false; 24262306a36Sopenharmony_ci } 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci return true; 24562306a36Sopenharmony_ci} 246