162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
462306a36Sopenharmony_ci * Author: Darrick J. Wong <djwong@kernel.org>
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1162306a36Sopenharmony_ci#include "xfs_mount.h"
1262306a36Sopenharmony_ci#include "xfs_log_format.h"
1362306a36Sopenharmony_ci#include "xfs_trans.h"
1462306a36Sopenharmony_ci#include "xfs_inode.h"
1562306a36Sopenharmony_ci#include "xfs_quota.h"
1662306a36Sopenharmony_ci#include "xfs_qm.h"
1762306a36Sopenharmony_ci#include "xfs_errortag.h"
1862306a36Sopenharmony_ci#include "xfs_error.h"
1962306a36Sopenharmony_ci#include "xfs_scrub.h"
2062306a36Sopenharmony_ci#include "scrub/scrub.h"
2162306a36Sopenharmony_ci#include "scrub/common.h"
2262306a36Sopenharmony_ci#include "scrub/trace.h"
2362306a36Sopenharmony_ci#include "scrub/repair.h"
2462306a36Sopenharmony_ci#include "scrub/health.h"
2562306a36Sopenharmony_ci#include "scrub/stats.h"
2662306a36Sopenharmony_ci#include "scrub/xfile.h"
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci/*
2962306a36Sopenharmony_ci * Online Scrub and Repair
3062306a36Sopenharmony_ci *
3162306a36Sopenharmony_ci * Traditionally, XFS (the kernel driver) did not know how to check or
3262306a36Sopenharmony_ci * repair on-disk data structures.  That task was left to the xfs_check
3362306a36Sopenharmony_ci * and xfs_repair tools, both of which require taking the filesystem
3462306a36Sopenharmony_ci * offline for a thorough but time consuming examination.  Online
3562306a36Sopenharmony_ci * scrub & repair, on the other hand, enables us to check the metadata
3662306a36Sopenharmony_ci * for obvious errors while carefully stepping around the filesystem's
3762306a36Sopenharmony_ci * ongoing operations, locking rules, etc.
3862306a36Sopenharmony_ci *
3962306a36Sopenharmony_ci * Given that most XFS metadata consist of records stored in a btree,
4062306a36Sopenharmony_ci * most of the checking functions iterate the btree blocks themselves
4162306a36Sopenharmony_ci * looking for irregularities.  When a record block is encountered, each
4262306a36Sopenharmony_ci * record can be checked for obviously bad values.  Record values can
4362306a36Sopenharmony_ci * also be cross-referenced against other btrees to look for potential
4462306a36Sopenharmony_ci * misunderstandings between pieces of metadata.
4562306a36Sopenharmony_ci *
4662306a36Sopenharmony_ci * It is expected that the checkers responsible for per-AG metadata
4762306a36Sopenharmony_ci * structures will lock the AG headers (AGI, AGF, AGFL), iterate the
4862306a36Sopenharmony_ci * metadata structure, and perform any relevant cross-referencing before
4962306a36Sopenharmony_ci * unlocking the AG and returning the results to userspace.  These
5062306a36Sopenharmony_ci * scrubbers must not keep an AG locked for too long to avoid tying up
5162306a36Sopenharmony_ci * the block and inode allocators.
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci * Block maps and b-trees rooted in an inode present a special challenge
5462306a36Sopenharmony_ci * because they can involve extents from any AG.  The general scrubber
5562306a36Sopenharmony_ci * structure of lock -> check -> xref -> unlock still holds, but AG
5662306a36Sopenharmony_ci * locking order rules /must/ be obeyed to avoid deadlocks.  The
5762306a36Sopenharmony_ci * ordering rule, of course, is that we must lock in increasing AG
5862306a36Sopenharmony_ci * order.  Helper functions are provided to track which AG headers we've
5962306a36Sopenharmony_ci * already locked.  If we detect an imminent locking order violation, we
6062306a36Sopenharmony_ci * can signal a potential deadlock, in which case the scrubber can jump
6162306a36Sopenharmony_ci * out to the top level, lock all the AGs in order, and retry the scrub.
6262306a36Sopenharmony_ci *
6362306a36Sopenharmony_ci * For file data (directories, extended attributes, symlinks) scrub, we
6462306a36Sopenharmony_ci * can simply lock the inode and walk the data.  For btree data
6562306a36Sopenharmony_ci * (directories and attributes) we follow the same btree-scrubbing
6662306a36Sopenharmony_ci * strategy outlined previously to check the records.
6762306a36Sopenharmony_ci *
6862306a36Sopenharmony_ci * We use a bit of trickery with transactions to avoid buffer deadlocks
6962306a36Sopenharmony_ci * if there is a cycle in the metadata.  The basic problem is that
7062306a36Sopenharmony_ci * travelling down a btree involves locking the current buffer at each
7162306a36Sopenharmony_ci * tree level.  If a pointer should somehow point back to a buffer that
7262306a36Sopenharmony_ci * we've already examined, we will deadlock due to the second buffer
7362306a36Sopenharmony_ci * locking attempt.  Note however that grabbing a buffer in transaction
7462306a36Sopenharmony_ci * context links the locked buffer to the transaction.  If we try to
7562306a36Sopenharmony_ci * re-grab the buffer in the context of the same transaction, we avoid
7662306a36Sopenharmony_ci * the second lock attempt and continue.  Between the verifier and the
7762306a36Sopenharmony_ci * scrubber, something will notice that something is amiss and report
7862306a36Sopenharmony_ci * the corruption.  Therefore, each scrubber will allocate an empty
7962306a36Sopenharmony_ci * transaction, attach buffers to it, and cancel the transaction at the
8062306a36Sopenharmony_ci * end of the scrub run.  Cancelling a non-dirty transaction simply
8162306a36Sopenharmony_ci * unlocks the buffers.
8262306a36Sopenharmony_ci *
8362306a36Sopenharmony_ci * There are four pieces of data that scrub can communicate to
8462306a36Sopenharmony_ci * userspace.  The first is the error code (errno), which can be used to
8562306a36Sopenharmony_ci * communicate operational errors in performing the scrub.  There are
8662306a36Sopenharmony_ci * also three flags that can be set in the scrub context.  If the data
8762306a36Sopenharmony_ci * structure itself is corrupt, the CORRUPT flag will be set.  If
8862306a36Sopenharmony_ci * the metadata is correct but otherwise suboptimal, the PREEN flag
8962306a36Sopenharmony_ci * will be set.
9062306a36Sopenharmony_ci *
9162306a36Sopenharmony_ci * We perform secondary validation of filesystem metadata by
9262306a36Sopenharmony_ci * cross-referencing every record with all other available metadata.
9362306a36Sopenharmony_ci * For example, for block mapping extents, we verify that there are no
9462306a36Sopenharmony_ci * records in the free space and inode btrees corresponding to that
9562306a36Sopenharmony_ci * space extent and that there is a corresponding entry in the reverse
9662306a36Sopenharmony_ci * mapping btree.  Inconsistent metadata is noted by setting the
9762306a36Sopenharmony_ci * XCORRUPT flag; btree query function errors are noted by setting the
9862306a36Sopenharmony_ci * XFAIL flag and deleting the cursor to prevent further attempts to
9962306a36Sopenharmony_ci * cross-reference with a defective btree.
10062306a36Sopenharmony_ci *
10162306a36Sopenharmony_ci * If a piece of metadata proves corrupt or suboptimal, the userspace
10262306a36Sopenharmony_ci * program can ask the kernel to apply some tender loving care (TLC) to
10362306a36Sopenharmony_ci * the metadata object by setting the REPAIR flag and re-calling the
10462306a36Sopenharmony_ci * scrub ioctl.  "Corruption" is defined by metadata violating the
10562306a36Sopenharmony_ci * on-disk specification; operations cannot continue if the violation is
10662306a36Sopenharmony_ci * left untreated.  It is possible for XFS to continue if an object is
10762306a36Sopenharmony_ci * "suboptimal", however performance may be degraded.  Repairs are
10862306a36Sopenharmony_ci * usually performed by rebuilding the metadata entirely out of
10962306a36Sopenharmony_ci * redundant metadata.  Optimizing, on the other hand, can sometimes be
11062306a36Sopenharmony_ci * done without rebuilding entire structures.
11162306a36Sopenharmony_ci *
11262306a36Sopenharmony_ci * Generally speaking, the repair code has the following code structure:
11362306a36Sopenharmony_ci * Lock -> scrub -> repair -> commit -> re-lock -> re-scrub -> unlock.
11462306a36Sopenharmony_ci * The first check helps us figure out if we need to rebuild or simply
11562306a36Sopenharmony_ci * optimize the structure so that the rebuild knows what to do.  The
11662306a36Sopenharmony_ci * second check evaluates the completeness of the repair; that is what
11762306a36Sopenharmony_ci * is reported to userspace.
11862306a36Sopenharmony_ci *
11962306a36Sopenharmony_ci * A quick note on symbol prefixes:
12062306a36Sopenharmony_ci * - "xfs_" are general XFS symbols.
12162306a36Sopenharmony_ci * - "xchk_" are symbols related to metadata checking.
12262306a36Sopenharmony_ci * - "xrep_" are symbols related to metadata repair.
12362306a36Sopenharmony_ci * - "xfs_scrub_" are symbols that tie online fsck to the rest of XFS.
12462306a36Sopenharmony_ci */
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci/*
12762306a36Sopenharmony_ci * Scrub probe -- userspace uses this to probe if we're willing to scrub
12862306a36Sopenharmony_ci * or repair a given mountpoint.  This will be used by xfs_scrub to
12962306a36Sopenharmony_ci * probe the kernel's abilities to scrub (and repair) the metadata.  We
13062306a36Sopenharmony_ci * do this by validating the ioctl inputs from userspace, preparing the
13162306a36Sopenharmony_ci * filesystem for a scrub (or a repair) operation, and immediately
13262306a36Sopenharmony_ci * returning to userspace.  Userspace can use the returned errno and
13362306a36Sopenharmony_ci * structure state to decide (in broad terms) if scrub/repair are
13462306a36Sopenharmony_ci * supported by the running kernel.
13562306a36Sopenharmony_ci */
13662306a36Sopenharmony_cistatic int
13762306a36Sopenharmony_cixchk_probe(
13862306a36Sopenharmony_ci	struct xfs_scrub	*sc)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	int			error = 0;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	if (xchk_should_terminate(sc, &error))
14362306a36Sopenharmony_ci		return error;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	return 0;
14662306a36Sopenharmony_ci}
14762306a36Sopenharmony_ci
14862306a36Sopenharmony_ci/* Scrub setup and teardown */
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistatic inline void
15162306a36Sopenharmony_cixchk_fsgates_disable(
15262306a36Sopenharmony_ci	struct xfs_scrub	*sc)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	if (!(sc->flags & XCHK_FSGATES_ALL))
15562306a36Sopenharmony_ci		return;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci	if (sc->flags & XCHK_FSGATES_DRAIN)
16062306a36Sopenharmony_ci		xfs_drain_wait_disable();
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	sc->flags &= ~XCHK_FSGATES_ALL;
16362306a36Sopenharmony_ci}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci/* Free all the resources and finish the transactions. */
16662306a36Sopenharmony_ciSTATIC int
16762306a36Sopenharmony_cixchk_teardown(
16862306a36Sopenharmony_ci	struct xfs_scrub	*sc,
16962306a36Sopenharmony_ci	int			error)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	xchk_ag_free(sc, &sc->sa);
17262306a36Sopenharmony_ci	if (sc->tp) {
17362306a36Sopenharmony_ci		if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
17462306a36Sopenharmony_ci			error = xfs_trans_commit(sc->tp);
17562306a36Sopenharmony_ci		else
17662306a36Sopenharmony_ci			xfs_trans_cancel(sc->tp);
17762306a36Sopenharmony_ci		sc->tp = NULL;
17862306a36Sopenharmony_ci	}
17962306a36Sopenharmony_ci	if (sc->ip) {
18062306a36Sopenharmony_ci		if (sc->ilock_flags)
18162306a36Sopenharmony_ci			xchk_iunlock(sc, sc->ilock_flags);
18262306a36Sopenharmony_ci		xchk_irele(sc, sc->ip);
18362306a36Sopenharmony_ci		sc->ip = NULL;
18462306a36Sopenharmony_ci	}
18562306a36Sopenharmony_ci	if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
18662306a36Sopenharmony_ci		sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
18762306a36Sopenharmony_ci		mnt_drop_write_file(sc->file);
18862306a36Sopenharmony_ci	}
18962306a36Sopenharmony_ci	if (sc->xfile) {
19062306a36Sopenharmony_ci		xfile_destroy(sc->xfile);
19162306a36Sopenharmony_ci		sc->xfile = NULL;
19262306a36Sopenharmony_ci	}
19362306a36Sopenharmony_ci	if (sc->buf) {
19462306a36Sopenharmony_ci		if (sc->buf_cleanup)
19562306a36Sopenharmony_ci			sc->buf_cleanup(sc->buf);
19662306a36Sopenharmony_ci		kvfree(sc->buf);
19762306a36Sopenharmony_ci		sc->buf_cleanup = NULL;
19862306a36Sopenharmony_ci		sc->buf = NULL;
19962306a36Sopenharmony_ci	}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	xchk_fsgates_disable(sc);
20262306a36Sopenharmony_ci	return error;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci/* Scrubbing dispatch. */
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_cistatic const struct xchk_meta_ops meta_scrub_ops[] = {
20862306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_PROBE] = {	/* ioctl presence test */
20962306a36Sopenharmony_ci		.type	= ST_NONE,
21062306a36Sopenharmony_ci		.setup	= xchk_setup_fs,
21162306a36Sopenharmony_ci		.scrub	= xchk_probe,
21262306a36Sopenharmony_ci		.repair = xrep_probe,
21362306a36Sopenharmony_ci	},
21462306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_SB] = {		/* superblock */
21562306a36Sopenharmony_ci		.type	= ST_PERAG,
21662306a36Sopenharmony_ci		.setup	= xchk_setup_agheader,
21762306a36Sopenharmony_ci		.scrub	= xchk_superblock,
21862306a36Sopenharmony_ci		.repair	= xrep_superblock,
21962306a36Sopenharmony_ci	},
22062306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_AGF] = {	/* agf */
22162306a36Sopenharmony_ci		.type	= ST_PERAG,
22262306a36Sopenharmony_ci		.setup	= xchk_setup_agheader,
22362306a36Sopenharmony_ci		.scrub	= xchk_agf,
22462306a36Sopenharmony_ci		.repair	= xrep_agf,
22562306a36Sopenharmony_ci	},
22662306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_AGFL]= {	/* agfl */
22762306a36Sopenharmony_ci		.type	= ST_PERAG,
22862306a36Sopenharmony_ci		.setup	= xchk_setup_agheader,
22962306a36Sopenharmony_ci		.scrub	= xchk_agfl,
23062306a36Sopenharmony_ci		.repair	= xrep_agfl,
23162306a36Sopenharmony_ci	},
23262306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_AGI] = {	/* agi */
23362306a36Sopenharmony_ci		.type	= ST_PERAG,
23462306a36Sopenharmony_ci		.setup	= xchk_setup_agheader,
23562306a36Sopenharmony_ci		.scrub	= xchk_agi,
23662306a36Sopenharmony_ci		.repair	= xrep_agi,
23762306a36Sopenharmony_ci	},
23862306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_BNOBT] = {	/* bnobt */
23962306a36Sopenharmony_ci		.type	= ST_PERAG,
24062306a36Sopenharmony_ci		.setup	= xchk_setup_ag_allocbt,
24162306a36Sopenharmony_ci		.scrub	= xchk_bnobt,
24262306a36Sopenharmony_ci		.repair	= xrep_notsupported,
24362306a36Sopenharmony_ci	},
24462306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_CNTBT] = {	/* cntbt */
24562306a36Sopenharmony_ci		.type	= ST_PERAG,
24662306a36Sopenharmony_ci		.setup	= xchk_setup_ag_allocbt,
24762306a36Sopenharmony_ci		.scrub	= xchk_cntbt,
24862306a36Sopenharmony_ci		.repair	= xrep_notsupported,
24962306a36Sopenharmony_ci	},
25062306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_INOBT] = {	/* inobt */
25162306a36Sopenharmony_ci		.type	= ST_PERAG,
25262306a36Sopenharmony_ci		.setup	= xchk_setup_ag_iallocbt,
25362306a36Sopenharmony_ci		.scrub	= xchk_inobt,
25462306a36Sopenharmony_ci		.repair	= xrep_notsupported,
25562306a36Sopenharmony_ci	},
25662306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_FINOBT] = {	/* finobt */
25762306a36Sopenharmony_ci		.type	= ST_PERAG,
25862306a36Sopenharmony_ci		.setup	= xchk_setup_ag_iallocbt,
25962306a36Sopenharmony_ci		.scrub	= xchk_finobt,
26062306a36Sopenharmony_ci		.has	= xfs_has_finobt,
26162306a36Sopenharmony_ci		.repair	= xrep_notsupported,
26262306a36Sopenharmony_ci	},
26362306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_RMAPBT] = {	/* rmapbt */
26462306a36Sopenharmony_ci		.type	= ST_PERAG,
26562306a36Sopenharmony_ci		.setup	= xchk_setup_ag_rmapbt,
26662306a36Sopenharmony_ci		.scrub	= xchk_rmapbt,
26762306a36Sopenharmony_ci		.has	= xfs_has_rmapbt,
26862306a36Sopenharmony_ci		.repair	= xrep_notsupported,
26962306a36Sopenharmony_ci	},
27062306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_REFCNTBT] = {	/* refcountbt */
27162306a36Sopenharmony_ci		.type	= ST_PERAG,
27262306a36Sopenharmony_ci		.setup	= xchk_setup_ag_refcountbt,
27362306a36Sopenharmony_ci		.scrub	= xchk_refcountbt,
27462306a36Sopenharmony_ci		.has	= xfs_has_reflink,
27562306a36Sopenharmony_ci		.repair	= xrep_notsupported,
27662306a36Sopenharmony_ci	},
27762306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_INODE] = {	/* inode record */
27862306a36Sopenharmony_ci		.type	= ST_INODE,
27962306a36Sopenharmony_ci		.setup	= xchk_setup_inode,
28062306a36Sopenharmony_ci		.scrub	= xchk_inode,
28162306a36Sopenharmony_ci		.repair	= xrep_notsupported,
28262306a36Sopenharmony_ci	},
28362306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_BMBTD] = {	/* inode data fork */
28462306a36Sopenharmony_ci		.type	= ST_INODE,
28562306a36Sopenharmony_ci		.setup	= xchk_setup_inode_bmap,
28662306a36Sopenharmony_ci		.scrub	= xchk_bmap_data,
28762306a36Sopenharmony_ci		.repair	= xrep_notsupported,
28862306a36Sopenharmony_ci	},
28962306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_BMBTA] = {	/* inode attr fork */
29062306a36Sopenharmony_ci		.type	= ST_INODE,
29162306a36Sopenharmony_ci		.setup	= xchk_setup_inode_bmap,
29262306a36Sopenharmony_ci		.scrub	= xchk_bmap_attr,
29362306a36Sopenharmony_ci		.repair	= xrep_notsupported,
29462306a36Sopenharmony_ci	},
29562306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_BMBTC] = {	/* inode CoW fork */
29662306a36Sopenharmony_ci		.type	= ST_INODE,
29762306a36Sopenharmony_ci		.setup	= xchk_setup_inode_bmap,
29862306a36Sopenharmony_ci		.scrub	= xchk_bmap_cow,
29962306a36Sopenharmony_ci		.repair	= xrep_notsupported,
30062306a36Sopenharmony_ci	},
30162306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_DIR] = {	/* directory */
30262306a36Sopenharmony_ci		.type	= ST_INODE,
30362306a36Sopenharmony_ci		.setup	= xchk_setup_directory,
30462306a36Sopenharmony_ci		.scrub	= xchk_directory,
30562306a36Sopenharmony_ci		.repair	= xrep_notsupported,
30662306a36Sopenharmony_ci	},
30762306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_XATTR] = {	/* extended attributes */
30862306a36Sopenharmony_ci		.type	= ST_INODE,
30962306a36Sopenharmony_ci		.setup	= xchk_setup_xattr,
31062306a36Sopenharmony_ci		.scrub	= xchk_xattr,
31162306a36Sopenharmony_ci		.repair	= xrep_notsupported,
31262306a36Sopenharmony_ci	},
31362306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_SYMLINK] = {	/* symbolic link */
31462306a36Sopenharmony_ci		.type	= ST_INODE,
31562306a36Sopenharmony_ci		.setup	= xchk_setup_symlink,
31662306a36Sopenharmony_ci		.scrub	= xchk_symlink,
31762306a36Sopenharmony_ci		.repair	= xrep_notsupported,
31862306a36Sopenharmony_ci	},
31962306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_PARENT] = {	/* parent pointers */
32062306a36Sopenharmony_ci		.type	= ST_INODE,
32162306a36Sopenharmony_ci		.setup	= xchk_setup_parent,
32262306a36Sopenharmony_ci		.scrub	= xchk_parent,
32362306a36Sopenharmony_ci		.repair	= xrep_notsupported,
32462306a36Sopenharmony_ci	},
32562306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_RTBITMAP] = {	/* realtime bitmap */
32662306a36Sopenharmony_ci		.type	= ST_FS,
32762306a36Sopenharmony_ci		.setup	= xchk_setup_rtbitmap,
32862306a36Sopenharmony_ci		.scrub	= xchk_rtbitmap,
32962306a36Sopenharmony_ci		.has	= xfs_has_realtime,
33062306a36Sopenharmony_ci		.repair	= xrep_notsupported,
33162306a36Sopenharmony_ci	},
33262306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_RTSUM] = {	/* realtime summary */
33362306a36Sopenharmony_ci		.type	= ST_FS,
33462306a36Sopenharmony_ci		.setup	= xchk_setup_rtsummary,
33562306a36Sopenharmony_ci		.scrub	= xchk_rtsummary,
33662306a36Sopenharmony_ci		.has	= xfs_has_realtime,
33762306a36Sopenharmony_ci		.repair	= xrep_notsupported,
33862306a36Sopenharmony_ci	},
33962306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_UQUOTA] = {	/* user quota */
34062306a36Sopenharmony_ci		.type	= ST_FS,
34162306a36Sopenharmony_ci		.setup	= xchk_setup_quota,
34262306a36Sopenharmony_ci		.scrub	= xchk_quota,
34362306a36Sopenharmony_ci		.repair	= xrep_notsupported,
34462306a36Sopenharmony_ci	},
34562306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_GQUOTA] = {	/* group quota */
34662306a36Sopenharmony_ci		.type	= ST_FS,
34762306a36Sopenharmony_ci		.setup	= xchk_setup_quota,
34862306a36Sopenharmony_ci		.scrub	= xchk_quota,
34962306a36Sopenharmony_ci		.repair	= xrep_notsupported,
35062306a36Sopenharmony_ci	},
35162306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_PQUOTA] = {	/* project quota */
35262306a36Sopenharmony_ci		.type	= ST_FS,
35362306a36Sopenharmony_ci		.setup	= xchk_setup_quota,
35462306a36Sopenharmony_ci		.scrub	= xchk_quota,
35562306a36Sopenharmony_ci		.repair	= xrep_notsupported,
35662306a36Sopenharmony_ci	},
35762306a36Sopenharmony_ci	[XFS_SCRUB_TYPE_FSCOUNTERS] = {	/* fs summary counters */
35862306a36Sopenharmony_ci		.type	= ST_FS,
35962306a36Sopenharmony_ci		.setup	= xchk_setup_fscounters,
36062306a36Sopenharmony_ci		.scrub	= xchk_fscounters,
36162306a36Sopenharmony_ci		.repair	= xrep_notsupported,
36262306a36Sopenharmony_ci	},
36362306a36Sopenharmony_ci};
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_cistatic int
36662306a36Sopenharmony_cixchk_validate_inputs(
36762306a36Sopenharmony_ci	struct xfs_mount		*mp,
36862306a36Sopenharmony_ci	struct xfs_scrub_metadata	*sm)
36962306a36Sopenharmony_ci{
37062306a36Sopenharmony_ci	int				error;
37162306a36Sopenharmony_ci	const struct xchk_meta_ops	*ops;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	error = -EINVAL;
37462306a36Sopenharmony_ci	/* Check our inputs. */
37562306a36Sopenharmony_ci	sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
37662306a36Sopenharmony_ci	if (sm->sm_flags & ~XFS_SCRUB_FLAGS_IN)
37762306a36Sopenharmony_ci		goto out;
37862306a36Sopenharmony_ci	/* sm_reserved[] must be zero */
37962306a36Sopenharmony_ci	if (memchr_inv(sm->sm_reserved, 0, sizeof(sm->sm_reserved)))
38062306a36Sopenharmony_ci		goto out;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	error = -ENOENT;
38362306a36Sopenharmony_ci	/* Do we know about this type of metadata? */
38462306a36Sopenharmony_ci	if (sm->sm_type >= XFS_SCRUB_TYPE_NR)
38562306a36Sopenharmony_ci		goto out;
38662306a36Sopenharmony_ci	ops = &meta_scrub_ops[sm->sm_type];
38762306a36Sopenharmony_ci	if (ops->setup == NULL || ops->scrub == NULL)
38862306a36Sopenharmony_ci		goto out;
38962306a36Sopenharmony_ci	/* Does this fs even support this type of metadata? */
39062306a36Sopenharmony_ci	if (ops->has && !ops->has(mp))
39162306a36Sopenharmony_ci		goto out;
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_ci	error = -EINVAL;
39462306a36Sopenharmony_ci	/* restricting fields must be appropriate for type */
39562306a36Sopenharmony_ci	switch (ops->type) {
39662306a36Sopenharmony_ci	case ST_NONE:
39762306a36Sopenharmony_ci	case ST_FS:
39862306a36Sopenharmony_ci		if (sm->sm_ino || sm->sm_gen || sm->sm_agno)
39962306a36Sopenharmony_ci			goto out;
40062306a36Sopenharmony_ci		break;
40162306a36Sopenharmony_ci	case ST_PERAG:
40262306a36Sopenharmony_ci		if (sm->sm_ino || sm->sm_gen ||
40362306a36Sopenharmony_ci		    sm->sm_agno >= mp->m_sb.sb_agcount)
40462306a36Sopenharmony_ci			goto out;
40562306a36Sopenharmony_ci		break;
40662306a36Sopenharmony_ci	case ST_INODE:
40762306a36Sopenharmony_ci		if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
40862306a36Sopenharmony_ci			goto out;
40962306a36Sopenharmony_ci		break;
41062306a36Sopenharmony_ci	default:
41162306a36Sopenharmony_ci		goto out;
41262306a36Sopenharmony_ci	}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	/* No rebuild without repair. */
41562306a36Sopenharmony_ci	if ((sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) &&
41662306a36Sopenharmony_ci	    !(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
41762306a36Sopenharmony_ci		return -EINVAL;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_ci	/*
42062306a36Sopenharmony_ci	 * We only want to repair read-write v5+ filesystems.  Defer the check
42162306a36Sopenharmony_ci	 * for ops->repair until after our scrub confirms that we need to
42262306a36Sopenharmony_ci	 * perform repairs so that we avoid failing due to not supporting
42362306a36Sopenharmony_ci	 * repairing an object that doesn't need repairs.
42462306a36Sopenharmony_ci	 */
42562306a36Sopenharmony_ci	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
42662306a36Sopenharmony_ci		error = -EOPNOTSUPP;
42762306a36Sopenharmony_ci		if (!xfs_has_crc(mp))
42862306a36Sopenharmony_ci			goto out;
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci		error = -EROFS;
43162306a36Sopenharmony_ci		if (xfs_is_readonly(mp))
43262306a36Sopenharmony_ci			goto out;
43362306a36Sopenharmony_ci	}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	error = 0;
43662306a36Sopenharmony_ciout:
43762306a36Sopenharmony_ci	return error;
43862306a36Sopenharmony_ci}
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci#ifdef CONFIG_XFS_ONLINE_REPAIR
44162306a36Sopenharmony_cistatic inline void xchk_postmortem(struct xfs_scrub *sc)
44262306a36Sopenharmony_ci{
44362306a36Sopenharmony_ci	/*
44462306a36Sopenharmony_ci	 * Userspace asked us to repair something, we repaired it, rescanned
44562306a36Sopenharmony_ci	 * it, and the rescan says it's still broken.  Scream about this in
44662306a36Sopenharmony_ci	 * the system logs.
44762306a36Sopenharmony_ci	 */
44862306a36Sopenharmony_ci	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
44962306a36Sopenharmony_ci	    (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
45062306a36Sopenharmony_ci				 XFS_SCRUB_OFLAG_XCORRUPT)))
45162306a36Sopenharmony_ci		xrep_failure(sc->mp);
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci#else
45462306a36Sopenharmony_cistatic inline void xchk_postmortem(struct xfs_scrub *sc)
45562306a36Sopenharmony_ci{
45662306a36Sopenharmony_ci	/*
45762306a36Sopenharmony_ci	 * Userspace asked us to scrub something, it's broken, and we have no
45862306a36Sopenharmony_ci	 * way of fixing it.  Scream in the logs.
45962306a36Sopenharmony_ci	 */
46062306a36Sopenharmony_ci	if (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
46162306a36Sopenharmony_ci				XFS_SCRUB_OFLAG_XCORRUPT))
46262306a36Sopenharmony_ci		xfs_alert_ratelimited(sc->mp,
46362306a36Sopenharmony_ci				"Corruption detected during scrub.");
46462306a36Sopenharmony_ci}
46562306a36Sopenharmony_ci#endif /* CONFIG_XFS_ONLINE_REPAIR */
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci/* Dispatch metadata scrubbing. */
46862306a36Sopenharmony_ciint
46962306a36Sopenharmony_cixfs_scrub_metadata(
47062306a36Sopenharmony_ci	struct file			*file,
47162306a36Sopenharmony_ci	struct xfs_scrub_metadata	*sm)
47262306a36Sopenharmony_ci{
47362306a36Sopenharmony_ci	struct xchk_stats_run		run = { };
47462306a36Sopenharmony_ci	struct xfs_scrub		*sc;
47562306a36Sopenharmony_ci	struct xfs_mount		*mp = XFS_I(file_inode(file))->i_mount;
47662306a36Sopenharmony_ci	u64				check_start;
47762306a36Sopenharmony_ci	int				error = 0;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
48062306a36Sopenharmony_ci		(sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci	trace_xchk_start(XFS_I(file_inode(file)), sm, error);
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	/* Forbidden if we are shut down or mounted norecovery. */
48562306a36Sopenharmony_ci	error = -ESHUTDOWN;
48662306a36Sopenharmony_ci	if (xfs_is_shutdown(mp))
48762306a36Sopenharmony_ci		goto out;
48862306a36Sopenharmony_ci	error = -ENOTRECOVERABLE;
48962306a36Sopenharmony_ci	if (xfs_has_norecovery(mp))
49062306a36Sopenharmony_ci		goto out;
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	error = xchk_validate_inputs(mp, sm);
49362306a36Sopenharmony_ci	if (error)
49462306a36Sopenharmony_ci		goto out;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
49762306a36Sopenharmony_ci "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
50062306a36Sopenharmony_ci	if (!sc) {
50162306a36Sopenharmony_ci		error = -ENOMEM;
50262306a36Sopenharmony_ci		goto out;
50362306a36Sopenharmony_ci	}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	sc->mp = mp;
50662306a36Sopenharmony_ci	sc->file = file;
50762306a36Sopenharmony_ci	sc->sm = sm;
50862306a36Sopenharmony_ci	sc->ops = &meta_scrub_ops[sm->sm_type];
50962306a36Sopenharmony_ci	sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
51062306a36Sopenharmony_ciretry_op:
51162306a36Sopenharmony_ci	/*
51262306a36Sopenharmony_ci	 * When repairs are allowed, prevent freezing or readonly remount while
51362306a36Sopenharmony_ci	 * scrub is running with a real transaction.
51462306a36Sopenharmony_ci	 */
51562306a36Sopenharmony_ci	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
51662306a36Sopenharmony_ci		error = mnt_want_write_file(sc->file);
51762306a36Sopenharmony_ci		if (error)
51862306a36Sopenharmony_ci			goto out_sc;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci		sc->flags |= XCHK_HAVE_FREEZE_PROT;
52162306a36Sopenharmony_ci	}
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	/* Set up for the operation. */
52462306a36Sopenharmony_ci	error = sc->ops->setup(sc);
52562306a36Sopenharmony_ci	if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
52662306a36Sopenharmony_ci		goto try_harder;
52762306a36Sopenharmony_ci	if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
52862306a36Sopenharmony_ci		goto need_drain;
52962306a36Sopenharmony_ci	if (error)
53062306a36Sopenharmony_ci		goto out_teardown;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci	/* Scrub for errors. */
53362306a36Sopenharmony_ci	check_start = xchk_stats_now();
53462306a36Sopenharmony_ci	error = sc->ops->scrub(sc);
53562306a36Sopenharmony_ci	run.scrub_ns += xchk_stats_elapsed_ns(check_start);
53662306a36Sopenharmony_ci	if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
53762306a36Sopenharmony_ci		goto try_harder;
53862306a36Sopenharmony_ci	if (error == -ECHRNG && !(sc->flags & XCHK_NEED_DRAIN))
53962306a36Sopenharmony_ci		goto need_drain;
54062306a36Sopenharmony_ci	if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
54162306a36Sopenharmony_ci		goto out_teardown;
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	xchk_update_health(sc);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
54662306a36Sopenharmony_ci	    !(sc->flags & XREP_ALREADY_FIXED)) {
54762306a36Sopenharmony_ci		bool needs_fix = xchk_needs_repair(sc->sm);
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci		/* Userspace asked us to rebuild the structure regardless. */
55062306a36Sopenharmony_ci		if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD)
55162306a36Sopenharmony_ci			needs_fix = true;
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci		/* Let debug users force us into the repair routines. */
55462306a36Sopenharmony_ci		if (XFS_TEST_ERROR(needs_fix, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
55562306a36Sopenharmony_ci			needs_fix = true;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci		/*
55862306a36Sopenharmony_ci		 * If userspace asked for a repair but it wasn't necessary,
55962306a36Sopenharmony_ci		 * report that back to userspace.
56062306a36Sopenharmony_ci		 */
56162306a36Sopenharmony_ci		if (!needs_fix) {
56262306a36Sopenharmony_ci			sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
56362306a36Sopenharmony_ci			goto out_nofix;
56462306a36Sopenharmony_ci		}
56562306a36Sopenharmony_ci
56662306a36Sopenharmony_ci		/*
56762306a36Sopenharmony_ci		 * If it's broken, userspace wants us to fix it, and we haven't
56862306a36Sopenharmony_ci		 * already tried to fix it, then attempt a repair.
56962306a36Sopenharmony_ci		 */
57062306a36Sopenharmony_ci		error = xrep_attempt(sc, &run);
57162306a36Sopenharmony_ci		if (error == -EAGAIN) {
57262306a36Sopenharmony_ci			/*
57362306a36Sopenharmony_ci			 * Either the repair function succeeded or it couldn't
57462306a36Sopenharmony_ci			 * get all the resources it needs; either way, we go
57562306a36Sopenharmony_ci			 * back to the beginning and call the scrub function.
57662306a36Sopenharmony_ci			 */
57762306a36Sopenharmony_ci			error = xchk_teardown(sc, 0);
57862306a36Sopenharmony_ci			if (error) {
57962306a36Sopenharmony_ci				xrep_failure(mp);
58062306a36Sopenharmony_ci				goto out_sc;
58162306a36Sopenharmony_ci			}
58262306a36Sopenharmony_ci			goto retry_op;
58362306a36Sopenharmony_ci		}
58462306a36Sopenharmony_ci	}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ciout_nofix:
58762306a36Sopenharmony_ci	xchk_postmortem(sc);
58862306a36Sopenharmony_ciout_teardown:
58962306a36Sopenharmony_ci	error = xchk_teardown(sc, error);
59062306a36Sopenharmony_ciout_sc:
59162306a36Sopenharmony_ci	if (error != -ENOENT)
59262306a36Sopenharmony_ci		xchk_stats_merge(mp, sm, &run);
59362306a36Sopenharmony_ci	kfree(sc);
59462306a36Sopenharmony_ciout:
59562306a36Sopenharmony_ci	trace_xchk_done(XFS_I(file_inode(file)), sm, error);
59662306a36Sopenharmony_ci	if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
59762306a36Sopenharmony_ci		sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
59862306a36Sopenharmony_ci		error = 0;
59962306a36Sopenharmony_ci	}
60062306a36Sopenharmony_ci	return error;
60162306a36Sopenharmony_cineed_drain:
60262306a36Sopenharmony_ci	error = xchk_teardown(sc, 0);
60362306a36Sopenharmony_ci	if (error)
60462306a36Sopenharmony_ci		goto out_sc;
60562306a36Sopenharmony_ci	sc->flags |= XCHK_NEED_DRAIN;
60662306a36Sopenharmony_ci	run.retries++;
60762306a36Sopenharmony_ci	goto retry_op;
60862306a36Sopenharmony_citry_harder:
60962306a36Sopenharmony_ci	/*
61062306a36Sopenharmony_ci	 * Scrubbers return -EDEADLOCK to mean 'try harder'.  Tear down
61162306a36Sopenharmony_ci	 * everything we hold, then set up again with preparation for
61262306a36Sopenharmony_ci	 * worst-case scenarios.
61362306a36Sopenharmony_ci	 */
61462306a36Sopenharmony_ci	error = xchk_teardown(sc, 0);
61562306a36Sopenharmony_ci	if (error)
61662306a36Sopenharmony_ci		goto out_sc;
61762306a36Sopenharmony_ci	sc->flags |= XCHK_TRY_HARDER;
61862306a36Sopenharmony_ci	run.retries++;
61962306a36Sopenharmony_ci	goto retry_op;
62062306a36Sopenharmony_ci}
621