18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2019 Oracle. All Rights Reserved. 48c2ecf20Sopenharmony_ci * Author: Darrick J. Wong <darrick.wong@oracle.com> 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci#include "xfs.h" 78c2ecf20Sopenharmony_ci#include "xfs_fs.h" 88c2ecf20Sopenharmony_ci#include "xfs_shared.h" 98c2ecf20Sopenharmony_ci#include "xfs_format.h" 108c2ecf20Sopenharmony_ci#include "xfs_btree.h" 118c2ecf20Sopenharmony_ci#include "xfs_sb.h" 128c2ecf20Sopenharmony_ci#include "xfs_health.h" 138c2ecf20Sopenharmony_ci#include "scrub/scrub.h" 148c2ecf20Sopenharmony_ci#include "scrub/health.h" 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci/* 178c2ecf20Sopenharmony_ci * Scrub and In-Core Filesystem Health Assessments 188c2ecf20Sopenharmony_ci * =============================================== 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Online scrub and repair have the time and the ability to perform stronger 218c2ecf20Sopenharmony_ci * checks than we can do from the metadata verifiers, because they can 228c2ecf20Sopenharmony_ci * cross-reference records between data structures. Therefore, scrub is in a 238c2ecf20Sopenharmony_ci * good position to update the online filesystem health assessments to reflect 248c2ecf20Sopenharmony_ci * the good/bad state of the data structure. 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * We therefore extend scrub in the following ways to achieve this: 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * 1. Create a "sick_mask" field in the scrub context. When we're setting up a 298c2ecf20Sopenharmony_ci * scrub call, set this to the default XFS_SICK_* flag(s) for the selected 308c2ecf20Sopenharmony_ci * scrub type (call it A). Scrub and repair functions can override the default 318c2ecf20Sopenharmony_ci * sick_mask value if they choose. 328c2ecf20Sopenharmony_ci * 338c2ecf20Sopenharmony_ci * 2. If the scrubber returns a runtime error code, we exit making no changes 348c2ecf20Sopenharmony_ci * to the incore sick state. 358c2ecf20Sopenharmony_ci * 368c2ecf20Sopenharmony_ci * 3. If the scrubber finds that A is clean, use sick_mask to clear the incore 378c2ecf20Sopenharmony_ci * sick flags before exiting. 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * 4. If the scrubber finds that A is corrupt, use sick_mask to set the incore 408c2ecf20Sopenharmony_ci * sick flags. If the user didn't want to repair then we exit, leaving the 418c2ecf20Sopenharmony_ci * metadata structure unfixed and the sick flag set. 428c2ecf20Sopenharmony_ci * 438c2ecf20Sopenharmony_ci * 5. Now we know that A is corrupt and the user wants to repair, so run the 448c2ecf20Sopenharmony_ci * repairer. If the repairer returns an error code, we exit with that error 458c2ecf20Sopenharmony_ci * code, having made no further changes to the incore sick state. 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci * 6. If repair rebuilds A correctly and the subsequent re-scrub of A is clean, 488c2ecf20Sopenharmony_ci * use sick_mask to clear the incore sick flags. This should have the effect 498c2ecf20Sopenharmony_ci * that A is no longer marked sick. 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * 7. If repair rebuilds A incorrectly, the re-scrub will find it corrupt and 528c2ecf20Sopenharmony_ci * use sick_mask to set the incore sick flags. This should have no externally 538c2ecf20Sopenharmony_ci * visible effect since we already set them in step (4). 548c2ecf20Sopenharmony_ci * 558c2ecf20Sopenharmony_ci * There are some complications to this story, however. For certain types of 568c2ecf20Sopenharmony_ci * complementary metadata indices (e.g. inobt/finobt), it is easier to rebuild 578c2ecf20Sopenharmony_ci * both structures at the same time. The following principles apply to this 588c2ecf20Sopenharmony_ci * type of repair strategy: 598c2ecf20Sopenharmony_ci * 608c2ecf20Sopenharmony_ci * 8. Any repair function that rebuilds multiple structures should update 618c2ecf20Sopenharmony_ci * sick_mask_visible to reflect whatever other structures are rebuilt, and 628c2ecf20Sopenharmony_ci * verify that all the rebuilt structures can pass a scrub check. The outcomes 638c2ecf20Sopenharmony_ci * of 5-7 still apply, but with a sick_mask that covers everything being 648c2ecf20Sopenharmony_ci * rebuilt. 658c2ecf20Sopenharmony_ci */ 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci/* Map our scrub type to a sick mask and a set of health update functions. */ 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_cienum xchk_health_group { 708c2ecf20Sopenharmony_ci XHG_FS = 1, 718c2ecf20Sopenharmony_ci XHG_RT, 728c2ecf20Sopenharmony_ci XHG_AG, 738c2ecf20Sopenharmony_ci XHG_INO, 748c2ecf20Sopenharmony_ci}; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_cistruct xchk_health_map { 778c2ecf20Sopenharmony_ci enum xchk_health_group group; 788c2ecf20Sopenharmony_ci unsigned int sick_mask; 798c2ecf20Sopenharmony_ci}; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_cistatic const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { 828c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_SB] = { XHG_AG, XFS_SICK_AG_SB }, 838c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_AGF] = { XHG_AG, XFS_SICK_AG_AGF }, 848c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_AGFL] = { XHG_AG, XFS_SICK_AG_AGFL }, 858c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_AGI] = { XHG_AG, XFS_SICK_AG_AGI }, 868c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_BNOBT] = { XHG_AG, XFS_SICK_AG_BNOBT }, 878c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_CNTBT] = { XHG_AG, XFS_SICK_AG_CNTBT }, 888c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_INOBT] = { XHG_AG, XFS_SICK_AG_INOBT }, 898c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_FINOBT] = { XHG_AG, XFS_SICK_AG_FINOBT }, 908c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_RMAPBT] = { XHG_AG, XFS_SICK_AG_RMAPBT }, 918c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_REFCNTBT] = { XHG_AG, XFS_SICK_AG_REFCNTBT }, 928c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_INODE] = { XHG_INO, XFS_SICK_INO_CORE }, 938c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_BMBTD] = { XHG_INO, XFS_SICK_INO_BMBTD }, 948c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_BMBTA] = { XHG_INO, XFS_SICK_INO_BMBTA }, 958c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_BMBTC] = { XHG_INO, XFS_SICK_INO_BMBTC }, 968c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_DIR] = { XHG_INO, XFS_SICK_INO_DIR }, 978c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR }, 988c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK }, 998c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT }, 1008c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP }, 1018c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY }, 1028c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA }, 1038c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA }, 1048c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA }, 1058c2ecf20Sopenharmony_ci [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS }, 1068c2ecf20Sopenharmony_ci}; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci/* Return the health status mask for this scrub type. */ 1098c2ecf20Sopenharmony_ciunsigned int 1108c2ecf20Sopenharmony_cixchk_health_mask_for_scrub_type( 1118c2ecf20Sopenharmony_ci __u32 scrub_type) 1128c2ecf20Sopenharmony_ci{ 1138c2ecf20Sopenharmony_ci return type_to_health_flag[scrub_type].sick_mask; 1148c2ecf20Sopenharmony_ci} 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci/* 1178c2ecf20Sopenharmony_ci * Update filesystem health assessments based on what we found and did. 1188c2ecf20Sopenharmony_ci * 1198c2ecf20Sopenharmony_ci * If the scrubber finds errors, we mark sick whatever's mentioned in 1208c2ecf20Sopenharmony_ci * sick_mask, no matter whether this is a first scan or an 1218c2ecf20Sopenharmony_ci * evaluation of repair effectiveness. 1228c2ecf20Sopenharmony_ci * 1238c2ecf20Sopenharmony_ci * Otherwise, no direct corruption was found, so mark whatever's in 1248c2ecf20Sopenharmony_ci * sick_mask as healthy. 1258c2ecf20Sopenharmony_ci */ 1268c2ecf20Sopenharmony_civoid 1278c2ecf20Sopenharmony_cixchk_update_health( 1288c2ecf20Sopenharmony_ci struct xfs_scrub *sc) 1298c2ecf20Sopenharmony_ci{ 1308c2ecf20Sopenharmony_ci struct xfs_perag *pag; 1318c2ecf20Sopenharmony_ci bool bad; 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci if (!sc->sick_mask) 1348c2ecf20Sopenharmony_ci return; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci bad = (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT); 1378c2ecf20Sopenharmony_ci switch (type_to_health_flag[sc->sm->sm_type].group) { 1388c2ecf20Sopenharmony_ci case XHG_AG: 1398c2ecf20Sopenharmony_ci pag = xfs_perag_get(sc->mp, sc->sm->sm_agno); 1408c2ecf20Sopenharmony_ci if (bad) 1418c2ecf20Sopenharmony_ci xfs_ag_mark_sick(pag, sc->sick_mask); 1428c2ecf20Sopenharmony_ci else 1438c2ecf20Sopenharmony_ci xfs_ag_mark_healthy(pag, sc->sick_mask); 1448c2ecf20Sopenharmony_ci xfs_perag_put(pag); 1458c2ecf20Sopenharmony_ci break; 1468c2ecf20Sopenharmony_ci case XHG_INO: 1478c2ecf20Sopenharmony_ci if (!sc->ip) 1488c2ecf20Sopenharmony_ci return; 1498c2ecf20Sopenharmony_ci if (bad) 1508c2ecf20Sopenharmony_ci xfs_inode_mark_sick(sc->ip, sc->sick_mask); 1518c2ecf20Sopenharmony_ci else 1528c2ecf20Sopenharmony_ci xfs_inode_mark_healthy(sc->ip, sc->sick_mask); 1538c2ecf20Sopenharmony_ci break; 1548c2ecf20Sopenharmony_ci case XHG_FS: 1558c2ecf20Sopenharmony_ci if (bad) 1568c2ecf20Sopenharmony_ci xfs_fs_mark_sick(sc->mp, sc->sick_mask); 1578c2ecf20Sopenharmony_ci else 1588c2ecf20Sopenharmony_ci xfs_fs_mark_healthy(sc->mp, sc->sick_mask); 1598c2ecf20Sopenharmony_ci break; 1608c2ecf20Sopenharmony_ci case XHG_RT: 1618c2ecf20Sopenharmony_ci if (bad) 1628c2ecf20Sopenharmony_ci xfs_rt_mark_sick(sc->mp, sc->sick_mask); 1638c2ecf20Sopenharmony_ci else 1648c2ecf20Sopenharmony_ci xfs_rt_mark_healthy(sc->mp, sc->sick_mask); 1658c2ecf20Sopenharmony_ci break; 1668c2ecf20Sopenharmony_ci default: 1678c2ecf20Sopenharmony_ci ASSERT(0); 1688c2ecf20Sopenharmony_ci break; 1698c2ecf20Sopenharmony_ci } 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci/* Is the given per-AG btree healthy enough for scanning? */ 1738c2ecf20Sopenharmony_cibool 1748c2ecf20Sopenharmony_cixchk_ag_btree_healthy_enough( 1758c2ecf20Sopenharmony_ci struct xfs_scrub *sc, 1768c2ecf20Sopenharmony_ci struct xfs_perag *pag, 1778c2ecf20Sopenharmony_ci xfs_btnum_t btnum) 1788c2ecf20Sopenharmony_ci{ 1798c2ecf20Sopenharmony_ci unsigned int mask = 0; 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_ci /* 1828c2ecf20Sopenharmony_ci * We always want the cursor if it's the same type as whatever we're 1838c2ecf20Sopenharmony_ci * scrubbing, even if we already know the structure is corrupt. 1848c2ecf20Sopenharmony_ci * 1858c2ecf20Sopenharmony_ci * Otherwise, we're only interested in the btree for cross-referencing. 1868c2ecf20Sopenharmony_ci * If we know the btree is bad then don't bother, just set XFAIL. 1878c2ecf20Sopenharmony_ci */ 1888c2ecf20Sopenharmony_ci switch (btnum) { 1898c2ecf20Sopenharmony_ci case XFS_BTNUM_BNO: 1908c2ecf20Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_BNOBT) 1918c2ecf20Sopenharmony_ci return true; 1928c2ecf20Sopenharmony_ci mask = XFS_SICK_AG_BNOBT; 1938c2ecf20Sopenharmony_ci break; 1948c2ecf20Sopenharmony_ci case XFS_BTNUM_CNT: 1958c2ecf20Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_CNTBT) 1968c2ecf20Sopenharmony_ci return true; 1978c2ecf20Sopenharmony_ci mask = XFS_SICK_AG_CNTBT; 1988c2ecf20Sopenharmony_ci break; 1998c2ecf20Sopenharmony_ci case XFS_BTNUM_INO: 2008c2ecf20Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_INOBT) 2018c2ecf20Sopenharmony_ci return true; 2028c2ecf20Sopenharmony_ci mask = XFS_SICK_AG_INOBT; 2038c2ecf20Sopenharmony_ci break; 2048c2ecf20Sopenharmony_ci case XFS_BTNUM_FINO: 2058c2ecf20Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_FINOBT) 2068c2ecf20Sopenharmony_ci return true; 2078c2ecf20Sopenharmony_ci mask = XFS_SICK_AG_FINOBT; 2088c2ecf20Sopenharmony_ci break; 2098c2ecf20Sopenharmony_ci case XFS_BTNUM_RMAP: 2108c2ecf20Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_RMAPBT) 2118c2ecf20Sopenharmony_ci return true; 2128c2ecf20Sopenharmony_ci mask = XFS_SICK_AG_RMAPBT; 2138c2ecf20Sopenharmony_ci break; 2148c2ecf20Sopenharmony_ci case XFS_BTNUM_REFC: 2158c2ecf20Sopenharmony_ci if (sc->sm->sm_type == XFS_SCRUB_TYPE_REFCNTBT) 2168c2ecf20Sopenharmony_ci return true; 2178c2ecf20Sopenharmony_ci mask = XFS_SICK_AG_REFCNTBT; 2188c2ecf20Sopenharmony_ci break; 2198c2ecf20Sopenharmony_ci default: 2208c2ecf20Sopenharmony_ci ASSERT(0); 2218c2ecf20Sopenharmony_ci return true; 2228c2ecf20Sopenharmony_ci } 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci if (xfs_ag_has_sickness(pag, mask)) { 2258c2ecf20Sopenharmony_ci sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL; 2268c2ecf20Sopenharmony_ci return false; 2278c2ecf20Sopenharmony_ci } 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ci return true; 2308c2ecf20Sopenharmony_ci} 231