162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
462306a36Sopenharmony_ci * Author: Darrick J. Wong <djwong@kernel.org>
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1162306a36Sopenharmony_ci#include "xfs_mount.h"
1262306a36Sopenharmony_ci#include "xfs_btree.h"
1362306a36Sopenharmony_ci#include "xfs_log_format.h"
1462306a36Sopenharmony_ci#include "xfs_trans.h"
1562306a36Sopenharmony_ci#include "xfs_inode.h"
1662306a36Sopenharmony_ci#include "xfs_icache.h"
1762306a36Sopenharmony_ci#include "xfs_alloc.h"
1862306a36Sopenharmony_ci#include "xfs_alloc_btree.h"
1962306a36Sopenharmony_ci#include "xfs_ialloc.h"
2062306a36Sopenharmony_ci#include "xfs_ialloc_btree.h"
2162306a36Sopenharmony_ci#include "xfs_refcount_btree.h"
2262306a36Sopenharmony_ci#include "xfs_rmap.h"
2362306a36Sopenharmony_ci#include "xfs_rmap_btree.h"
2462306a36Sopenharmony_ci#include "xfs_log.h"
2562306a36Sopenharmony_ci#include "xfs_trans_priv.h"
2662306a36Sopenharmony_ci#include "xfs_da_format.h"
2762306a36Sopenharmony_ci#include "xfs_da_btree.h"
2862306a36Sopenharmony_ci#include "xfs_attr.h"
2962306a36Sopenharmony_ci#include "xfs_reflink.h"
3062306a36Sopenharmony_ci#include "xfs_ag.h"
3162306a36Sopenharmony_ci#include "scrub/scrub.h"
3262306a36Sopenharmony_ci#include "scrub/common.h"
3362306a36Sopenharmony_ci#include "scrub/trace.h"
3462306a36Sopenharmony_ci#include "scrub/repair.h"
3562306a36Sopenharmony_ci#include "scrub/health.h"
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/* Common code for the metadata scrubbers. */
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci/*
4062306a36Sopenharmony_ci * Handling operational errors.
4162306a36Sopenharmony_ci *
4262306a36Sopenharmony_ci * The *_process_error() family of functions are used to process error return
4362306a36Sopenharmony_ci * codes from functions called as part of a scrub operation.
4462306a36Sopenharmony_ci *
4562306a36Sopenharmony_ci * If there's no error, we return true to tell the caller that it's ok
4662306a36Sopenharmony_ci * to move on to the next check in its list.
4762306a36Sopenharmony_ci *
4862306a36Sopenharmony_ci * For non-verifier errors (e.g. ENOMEM) we return false to tell the
4962306a36Sopenharmony_ci * caller that something bad happened, and we preserve *error so that
5062306a36Sopenharmony_ci * the caller can return the *error up the stack to userspace.
5162306a36Sopenharmony_ci *
5262306a36Sopenharmony_ci * Verifier errors (EFSBADCRC/EFSCORRUPTED) are recorded by setting
5362306a36Sopenharmony_ci * OFLAG_CORRUPT in sm_flags and the *error is cleared.  In other words,
5462306a36Sopenharmony_ci * we track verifier errors (and failed scrub checks) via OFLAG_CORRUPT,
5562306a36Sopenharmony_ci * not via return codes.  We return false to tell the caller that
5662306a36Sopenharmony_ci * something bad happened.  Since the error has been cleared, the caller
5762306a36Sopenharmony_ci * will (presumably) return that zero and scrubbing will move on to
5862306a36Sopenharmony_ci * whatever's next.
5962306a36Sopenharmony_ci *
6062306a36Sopenharmony_ci * ftrace can be used to record the precise metadata location and the
6162306a36Sopenharmony_ci * approximate code location of the failed operation.
6262306a36Sopenharmony_ci */
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci/* Check for operational errors. */
6562306a36Sopenharmony_cistatic bool
6662306a36Sopenharmony_ci__xchk_process_error(
6762306a36Sopenharmony_ci	struct xfs_scrub	*sc,
6862306a36Sopenharmony_ci	xfs_agnumber_t		agno,
6962306a36Sopenharmony_ci	xfs_agblock_t		bno,
7062306a36Sopenharmony_ci	int			*error,
7162306a36Sopenharmony_ci	__u32			errflag,
7262306a36Sopenharmony_ci	void			*ret_ip)
7362306a36Sopenharmony_ci{
7462306a36Sopenharmony_ci	switch (*error) {
7562306a36Sopenharmony_ci	case 0:
7662306a36Sopenharmony_ci		return true;
7762306a36Sopenharmony_ci	case -EDEADLOCK:
7862306a36Sopenharmony_ci	case -ECHRNG:
7962306a36Sopenharmony_ci		/* Used to restart an op with deadlock avoidance. */
8062306a36Sopenharmony_ci		trace_xchk_deadlock_retry(
8162306a36Sopenharmony_ci				sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
8262306a36Sopenharmony_ci				sc->sm, *error);
8362306a36Sopenharmony_ci		break;
8462306a36Sopenharmony_ci	case -EFSBADCRC:
8562306a36Sopenharmony_ci	case -EFSCORRUPTED:
8662306a36Sopenharmony_ci		/* Note the badness but don't abort. */
8762306a36Sopenharmony_ci		sc->sm->sm_flags |= errflag;
8862306a36Sopenharmony_ci		*error = 0;
8962306a36Sopenharmony_ci		fallthrough;
9062306a36Sopenharmony_ci	default:
9162306a36Sopenharmony_ci		trace_xchk_op_error(sc, agno, bno, *error,
9262306a36Sopenharmony_ci				ret_ip);
9362306a36Sopenharmony_ci		break;
9462306a36Sopenharmony_ci	}
9562306a36Sopenharmony_ci	return false;
9662306a36Sopenharmony_ci}
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_cibool
9962306a36Sopenharmony_cixchk_process_error(
10062306a36Sopenharmony_ci	struct xfs_scrub	*sc,
10162306a36Sopenharmony_ci	xfs_agnumber_t		agno,
10262306a36Sopenharmony_ci	xfs_agblock_t		bno,
10362306a36Sopenharmony_ci	int			*error)
10462306a36Sopenharmony_ci{
10562306a36Sopenharmony_ci	return __xchk_process_error(sc, agno, bno, error,
10662306a36Sopenharmony_ci			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
10762306a36Sopenharmony_ci}
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cibool
11062306a36Sopenharmony_cixchk_xref_process_error(
11162306a36Sopenharmony_ci	struct xfs_scrub	*sc,
11262306a36Sopenharmony_ci	xfs_agnumber_t		agno,
11362306a36Sopenharmony_ci	xfs_agblock_t		bno,
11462306a36Sopenharmony_ci	int			*error)
11562306a36Sopenharmony_ci{
11662306a36Sopenharmony_ci	return __xchk_process_error(sc, agno, bno, error,
11762306a36Sopenharmony_ci			XFS_SCRUB_OFLAG_XFAIL, __return_address);
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci/* Check for operational errors for a file offset. */
12162306a36Sopenharmony_cistatic bool
12262306a36Sopenharmony_ci__xchk_fblock_process_error(
12362306a36Sopenharmony_ci	struct xfs_scrub	*sc,
12462306a36Sopenharmony_ci	int			whichfork,
12562306a36Sopenharmony_ci	xfs_fileoff_t		offset,
12662306a36Sopenharmony_ci	int			*error,
12762306a36Sopenharmony_ci	__u32			errflag,
12862306a36Sopenharmony_ci	void			*ret_ip)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	switch (*error) {
13162306a36Sopenharmony_ci	case 0:
13262306a36Sopenharmony_ci		return true;
13362306a36Sopenharmony_ci	case -EDEADLOCK:
13462306a36Sopenharmony_ci	case -ECHRNG:
13562306a36Sopenharmony_ci		/* Used to restart an op with deadlock avoidance. */
13662306a36Sopenharmony_ci		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
13762306a36Sopenharmony_ci		break;
13862306a36Sopenharmony_ci	case -EFSBADCRC:
13962306a36Sopenharmony_ci	case -EFSCORRUPTED:
14062306a36Sopenharmony_ci		/* Note the badness but don't abort. */
14162306a36Sopenharmony_ci		sc->sm->sm_flags |= errflag;
14262306a36Sopenharmony_ci		*error = 0;
14362306a36Sopenharmony_ci		fallthrough;
14462306a36Sopenharmony_ci	default:
14562306a36Sopenharmony_ci		trace_xchk_file_op_error(sc, whichfork, offset, *error,
14662306a36Sopenharmony_ci				ret_ip);
14762306a36Sopenharmony_ci		break;
14862306a36Sopenharmony_ci	}
14962306a36Sopenharmony_ci	return false;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_cibool
15362306a36Sopenharmony_cixchk_fblock_process_error(
15462306a36Sopenharmony_ci	struct xfs_scrub	*sc,
15562306a36Sopenharmony_ci	int			whichfork,
15662306a36Sopenharmony_ci	xfs_fileoff_t		offset,
15762306a36Sopenharmony_ci	int			*error)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	return __xchk_fblock_process_error(sc, whichfork, offset, error,
16062306a36Sopenharmony_ci			XFS_SCRUB_OFLAG_CORRUPT, __return_address);
16162306a36Sopenharmony_ci}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_cibool
16462306a36Sopenharmony_cixchk_fblock_xref_process_error(
16562306a36Sopenharmony_ci	struct xfs_scrub	*sc,
16662306a36Sopenharmony_ci	int			whichfork,
16762306a36Sopenharmony_ci	xfs_fileoff_t		offset,
16862306a36Sopenharmony_ci	int			*error)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	return __xchk_fblock_process_error(sc, whichfork, offset, error,
17162306a36Sopenharmony_ci			XFS_SCRUB_OFLAG_XFAIL, __return_address);
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci/*
17562306a36Sopenharmony_ci * Handling scrub corruption/optimization/warning checks.
17662306a36Sopenharmony_ci *
17762306a36Sopenharmony_ci * The *_set_{corrupt,preen,warning}() family of functions are used to
17862306a36Sopenharmony_ci * record the presence of metadata that is incorrect (corrupt), could be
17962306a36Sopenharmony_ci * optimized somehow (preen), or should be flagged for administrative
18062306a36Sopenharmony_ci * review but is not incorrect (warn).
18162306a36Sopenharmony_ci *
18262306a36Sopenharmony_ci * ftrace can be used to record the precise metadata location and
18362306a36Sopenharmony_ci * approximate code location of the failed check.
18462306a36Sopenharmony_ci */
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci/* Record a block which could be optimized. */
18762306a36Sopenharmony_civoid
18862306a36Sopenharmony_cixchk_block_set_preen(
18962306a36Sopenharmony_ci	struct xfs_scrub	*sc,
19062306a36Sopenharmony_ci	struct xfs_buf		*bp)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
19362306a36Sopenharmony_ci	trace_xchk_block_preen(sc, xfs_buf_daddr(bp), __return_address);
19462306a36Sopenharmony_ci}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci/*
19762306a36Sopenharmony_ci * Record an inode which could be optimized.  The trace data will
19862306a36Sopenharmony_ci * include the block given by bp if bp is given; otherwise it will use
19962306a36Sopenharmony_ci * the block location of the inode record itself.
20062306a36Sopenharmony_ci */
20162306a36Sopenharmony_civoid
20262306a36Sopenharmony_cixchk_ino_set_preen(
20362306a36Sopenharmony_ci	struct xfs_scrub	*sc,
20462306a36Sopenharmony_ci	xfs_ino_t		ino)
20562306a36Sopenharmony_ci{
20662306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
20762306a36Sopenharmony_ci	trace_xchk_ino_preen(sc, ino, __return_address);
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_ci/* Record something being wrong with the filesystem primary superblock. */
21162306a36Sopenharmony_civoid
21262306a36Sopenharmony_cixchk_set_corrupt(
21362306a36Sopenharmony_ci	struct xfs_scrub	*sc)
21462306a36Sopenharmony_ci{
21562306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
21662306a36Sopenharmony_ci	trace_xchk_fs_error(sc, 0, __return_address);
21762306a36Sopenharmony_ci}
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci/* Record a corrupt block. */
22062306a36Sopenharmony_civoid
22162306a36Sopenharmony_cixchk_block_set_corrupt(
22262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
22362306a36Sopenharmony_ci	struct xfs_buf		*bp)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
22662306a36Sopenharmony_ci	trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
22762306a36Sopenharmony_ci}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci/* Record a corruption while cross-referencing. */
23062306a36Sopenharmony_civoid
23162306a36Sopenharmony_cixchk_block_xref_set_corrupt(
23262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
23362306a36Sopenharmony_ci	struct xfs_buf		*bp)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
23662306a36Sopenharmony_ci	trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
23762306a36Sopenharmony_ci}
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci/*
24062306a36Sopenharmony_ci * Record a corrupt inode.  The trace data will include the block given
24162306a36Sopenharmony_ci * by bp if bp is given; otherwise it will use the block location of the
24262306a36Sopenharmony_ci * inode record itself.
24362306a36Sopenharmony_ci */
24462306a36Sopenharmony_civoid
24562306a36Sopenharmony_cixchk_ino_set_corrupt(
24662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
24762306a36Sopenharmony_ci	xfs_ino_t		ino)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
25062306a36Sopenharmony_ci	trace_xchk_ino_error(sc, ino, __return_address);
25162306a36Sopenharmony_ci}
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_ci/* Record a corruption while cross-referencing with an inode. */
25462306a36Sopenharmony_civoid
25562306a36Sopenharmony_cixchk_ino_xref_set_corrupt(
25662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
25762306a36Sopenharmony_ci	xfs_ino_t		ino)
25862306a36Sopenharmony_ci{
25962306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
26062306a36Sopenharmony_ci	trace_xchk_ino_error(sc, ino, __return_address);
26162306a36Sopenharmony_ci}
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci/* Record corruption in a block indexed by a file fork. */
26462306a36Sopenharmony_civoid
26562306a36Sopenharmony_cixchk_fblock_set_corrupt(
26662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
26762306a36Sopenharmony_ci	int			whichfork,
26862306a36Sopenharmony_ci	xfs_fileoff_t		offset)
26962306a36Sopenharmony_ci{
27062306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
27162306a36Sopenharmony_ci	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
27262306a36Sopenharmony_ci}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci/* Record a corruption while cross-referencing a fork block. */
27562306a36Sopenharmony_civoid
27662306a36Sopenharmony_cixchk_fblock_xref_set_corrupt(
27762306a36Sopenharmony_ci	struct xfs_scrub	*sc,
27862306a36Sopenharmony_ci	int			whichfork,
27962306a36Sopenharmony_ci	xfs_fileoff_t		offset)
28062306a36Sopenharmony_ci{
28162306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XCORRUPT;
28262306a36Sopenharmony_ci	trace_xchk_fblock_error(sc, whichfork, offset, __return_address);
28362306a36Sopenharmony_ci}
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci/*
28662306a36Sopenharmony_ci * Warn about inodes that need administrative review but is not
28762306a36Sopenharmony_ci * incorrect.
28862306a36Sopenharmony_ci */
28962306a36Sopenharmony_civoid
29062306a36Sopenharmony_cixchk_ino_set_warning(
29162306a36Sopenharmony_ci	struct xfs_scrub	*sc,
29262306a36Sopenharmony_ci	xfs_ino_t		ino)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
29562306a36Sopenharmony_ci	trace_xchk_ino_warning(sc, ino, __return_address);
29662306a36Sopenharmony_ci}
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci/* Warn about a block indexed by a file fork that needs review. */
29962306a36Sopenharmony_civoid
30062306a36Sopenharmony_cixchk_fblock_set_warning(
30162306a36Sopenharmony_ci	struct xfs_scrub	*sc,
30262306a36Sopenharmony_ci	int			whichfork,
30362306a36Sopenharmony_ci	xfs_fileoff_t		offset)
30462306a36Sopenharmony_ci{
30562306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_WARNING;
30662306a36Sopenharmony_ci	trace_xchk_fblock_warning(sc, whichfork, offset, __return_address);
30762306a36Sopenharmony_ci}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci/* Signal an incomplete scrub. */
31062306a36Sopenharmony_civoid
31162306a36Sopenharmony_cixchk_set_incomplete(
31262306a36Sopenharmony_ci	struct xfs_scrub	*sc)
31362306a36Sopenharmony_ci{
31462306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_INCOMPLETE;
31562306a36Sopenharmony_ci	trace_xchk_incomplete(sc, __return_address);
31662306a36Sopenharmony_ci}
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci/*
31962306a36Sopenharmony_ci * rmap scrubbing -- compute the number of blocks with a given owner,
32062306a36Sopenharmony_ci * at least according to the reverse mapping data.
32162306a36Sopenharmony_ci */
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_cistruct xchk_rmap_ownedby_info {
32462306a36Sopenharmony_ci	const struct xfs_owner_info	*oinfo;
32562306a36Sopenharmony_ci	xfs_filblks_t			*blocks;
32662306a36Sopenharmony_ci};
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ciSTATIC int
32962306a36Sopenharmony_cixchk_count_rmap_ownedby_irec(
33062306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
33162306a36Sopenharmony_ci	const struct xfs_rmap_irec	*rec,
33262306a36Sopenharmony_ci	void				*priv)
33362306a36Sopenharmony_ci{
33462306a36Sopenharmony_ci	struct xchk_rmap_ownedby_info	*sroi = priv;
33562306a36Sopenharmony_ci	bool				irec_attr;
33662306a36Sopenharmony_ci	bool				oinfo_attr;
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci	irec_attr = rec->rm_flags & XFS_RMAP_ATTR_FORK;
33962306a36Sopenharmony_ci	oinfo_attr = sroi->oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK;
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	if (rec->rm_owner != sroi->oinfo->oi_owner)
34262306a36Sopenharmony_ci		return 0;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) || irec_attr == oinfo_attr)
34562306a36Sopenharmony_ci		(*sroi->blocks) += rec->rm_blockcount;
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	return 0;
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci/*
35162306a36Sopenharmony_ci * Calculate the number of blocks the rmap thinks are owned by something.
35262306a36Sopenharmony_ci * The caller should pass us an rmapbt cursor.
35362306a36Sopenharmony_ci */
35462306a36Sopenharmony_ciint
35562306a36Sopenharmony_cixchk_count_rmap_ownedby_ag(
35662306a36Sopenharmony_ci	struct xfs_scrub		*sc,
35762306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
35862306a36Sopenharmony_ci	const struct xfs_owner_info	*oinfo,
35962306a36Sopenharmony_ci	xfs_filblks_t			*blocks)
36062306a36Sopenharmony_ci{
36162306a36Sopenharmony_ci	struct xchk_rmap_ownedby_info	sroi = {
36262306a36Sopenharmony_ci		.oinfo			= oinfo,
36362306a36Sopenharmony_ci		.blocks			= blocks,
36462306a36Sopenharmony_ci	};
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	*blocks = 0;
36762306a36Sopenharmony_ci	return xfs_rmap_query_all(cur, xchk_count_rmap_ownedby_irec,
36862306a36Sopenharmony_ci			&sroi);
36962306a36Sopenharmony_ci}
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci/*
37262306a36Sopenharmony_ci * AG scrubbing
37362306a36Sopenharmony_ci *
37462306a36Sopenharmony_ci * These helpers facilitate locking an allocation group's header
37562306a36Sopenharmony_ci * buffers, setting up cursors for all btrees that are present, and
37662306a36Sopenharmony_ci * cleaning everything up once we're through.
37762306a36Sopenharmony_ci */
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci/* Decide if we want to return an AG header read failure. */
38062306a36Sopenharmony_cistatic inline bool
38162306a36Sopenharmony_ciwant_ag_read_header_failure(
38262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
38362306a36Sopenharmony_ci	unsigned int		type)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci	/* Return all AG header read failures when scanning btrees. */
38662306a36Sopenharmony_ci	if (sc->sm->sm_type != XFS_SCRUB_TYPE_AGF &&
38762306a36Sopenharmony_ci	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGFL &&
38862306a36Sopenharmony_ci	    sc->sm->sm_type != XFS_SCRUB_TYPE_AGI)
38962306a36Sopenharmony_ci		return true;
39062306a36Sopenharmony_ci	/*
39162306a36Sopenharmony_ci	 * If we're scanning a given type of AG header, we only want to
39262306a36Sopenharmony_ci	 * see read failures from that specific header.  We'd like the
39362306a36Sopenharmony_ci	 * other headers to cross-check them, but this isn't required.
39462306a36Sopenharmony_ci	 */
39562306a36Sopenharmony_ci	if (sc->sm->sm_type == type)
39662306a36Sopenharmony_ci		return true;
39762306a36Sopenharmony_ci	return false;
39862306a36Sopenharmony_ci}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci/*
40162306a36Sopenharmony_ci * Grab the AG header buffers for the attached perag structure.
40262306a36Sopenharmony_ci *
40362306a36Sopenharmony_ci * The headers should be released by xchk_ag_free, but as a fail safe we attach
40462306a36Sopenharmony_ci * all the buffers we grab to the scrub transaction so they'll all be freed
40562306a36Sopenharmony_ci * when we cancel it.
40662306a36Sopenharmony_ci */
40762306a36Sopenharmony_cistatic inline int
40862306a36Sopenharmony_cixchk_perag_read_headers(
40962306a36Sopenharmony_ci	struct xfs_scrub	*sc,
41062306a36Sopenharmony_ci	struct xchk_ag		*sa)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	int			error;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp);
41562306a36Sopenharmony_ci	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI))
41662306a36Sopenharmony_ci		return error;
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci	error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp);
41962306a36Sopenharmony_ci	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
42062306a36Sopenharmony_ci		return error;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	return 0;
42362306a36Sopenharmony_ci}
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci/*
42662306a36Sopenharmony_ci * Grab the AG headers for the attached perag structure and wait for pending
42762306a36Sopenharmony_ci * intents to drain.
42862306a36Sopenharmony_ci */
42962306a36Sopenharmony_cistatic int
43062306a36Sopenharmony_cixchk_perag_drain_and_lock(
43162306a36Sopenharmony_ci	struct xfs_scrub	*sc)
43262306a36Sopenharmony_ci{
43362306a36Sopenharmony_ci	struct xchk_ag		*sa = &sc->sa;
43462306a36Sopenharmony_ci	int			error = 0;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	ASSERT(sa->pag != NULL);
43762306a36Sopenharmony_ci	ASSERT(sa->agi_bp == NULL);
43862306a36Sopenharmony_ci	ASSERT(sa->agf_bp == NULL);
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci	do {
44162306a36Sopenharmony_ci		if (xchk_should_terminate(sc, &error))
44262306a36Sopenharmony_ci			return error;
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci		error = xchk_perag_read_headers(sc, sa);
44562306a36Sopenharmony_ci		if (error)
44662306a36Sopenharmony_ci			return error;
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci		/*
44962306a36Sopenharmony_ci		 * If we've grabbed an inode for scrubbing then we assume that
45062306a36Sopenharmony_ci		 * holding its ILOCK will suffice to coordinate with any intent
45162306a36Sopenharmony_ci		 * chains involving this inode.
45262306a36Sopenharmony_ci		 */
45362306a36Sopenharmony_ci		if (sc->ip)
45462306a36Sopenharmony_ci			return 0;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci		/*
45762306a36Sopenharmony_ci		 * Decide if this AG is quiet enough for all metadata to be
45862306a36Sopenharmony_ci		 * consistent with each other.  XFS allows the AG header buffer
45962306a36Sopenharmony_ci		 * locks to cycle across transaction rolls while processing
46062306a36Sopenharmony_ci		 * chains of deferred ops, which means that there could be
46162306a36Sopenharmony_ci		 * other threads in the middle of processing a chain of
46262306a36Sopenharmony_ci		 * deferred ops.  For regular operations we are careful about
46362306a36Sopenharmony_ci		 * ordering operations to prevent collisions between threads
46462306a36Sopenharmony_ci		 * (which is why we don't need a per-AG lock), but scrub and
46562306a36Sopenharmony_ci		 * repair have to serialize against chained operations.
46662306a36Sopenharmony_ci		 *
46762306a36Sopenharmony_ci		 * We just locked all the AG headers buffers; now take a look
46862306a36Sopenharmony_ci		 * to see if there are any intents in progress.  If there are,
46962306a36Sopenharmony_ci		 * drop the AG headers and wait for the intents to drain.
47062306a36Sopenharmony_ci		 * Since we hold all the AG header locks for the duration of
47162306a36Sopenharmony_ci		 * the scrub, this is the only time we have to sample the
47262306a36Sopenharmony_ci		 * intents counter; any threads increasing it after this point
47362306a36Sopenharmony_ci		 * can't possibly be in the middle of a chain of AG metadata
47462306a36Sopenharmony_ci		 * updates.
47562306a36Sopenharmony_ci		 *
47662306a36Sopenharmony_ci		 * Obviously, this should be slanted against scrub and in favor
47762306a36Sopenharmony_ci		 * of runtime threads.
47862306a36Sopenharmony_ci		 */
47962306a36Sopenharmony_ci		if (!xfs_perag_intent_busy(sa->pag))
48062306a36Sopenharmony_ci			return 0;
48162306a36Sopenharmony_ci
48262306a36Sopenharmony_ci		if (sa->agf_bp) {
48362306a36Sopenharmony_ci			xfs_trans_brelse(sc->tp, sa->agf_bp);
48462306a36Sopenharmony_ci			sa->agf_bp = NULL;
48562306a36Sopenharmony_ci		}
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci		if (sa->agi_bp) {
48862306a36Sopenharmony_ci			xfs_trans_brelse(sc->tp, sa->agi_bp);
48962306a36Sopenharmony_ci			sa->agi_bp = NULL;
49062306a36Sopenharmony_ci		}
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci		if (!(sc->flags & XCHK_FSGATES_DRAIN))
49362306a36Sopenharmony_ci			return -ECHRNG;
49462306a36Sopenharmony_ci		error = xfs_perag_intent_drain(sa->pag);
49562306a36Sopenharmony_ci		if (error == -ERESTARTSYS)
49662306a36Sopenharmony_ci			error = -EINTR;
49762306a36Sopenharmony_ci	} while (!error);
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci	return error;
50062306a36Sopenharmony_ci}
50162306a36Sopenharmony_ci
50262306a36Sopenharmony_ci/*
50362306a36Sopenharmony_ci * Grab the per-AG structure, grab all AG header buffers, and wait until there
50462306a36Sopenharmony_ci * aren't any pending intents.  Returns -ENOENT if we can't grab the perag
50562306a36Sopenharmony_ci * structure.
50662306a36Sopenharmony_ci */
50762306a36Sopenharmony_ciint
50862306a36Sopenharmony_cixchk_ag_read_headers(
50962306a36Sopenharmony_ci	struct xfs_scrub	*sc,
51062306a36Sopenharmony_ci	xfs_agnumber_t		agno,
51162306a36Sopenharmony_ci	struct xchk_ag		*sa)
51262306a36Sopenharmony_ci{
51362306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	ASSERT(!sa->pag);
51662306a36Sopenharmony_ci	sa->pag = xfs_perag_get(mp, agno);
51762306a36Sopenharmony_ci	if (!sa->pag)
51862306a36Sopenharmony_ci		return -ENOENT;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	return xchk_perag_drain_and_lock(sc);
52162306a36Sopenharmony_ci}
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci/* Release all the AG btree cursors. */
52462306a36Sopenharmony_civoid
52562306a36Sopenharmony_cixchk_ag_btcur_free(
52662306a36Sopenharmony_ci	struct xchk_ag		*sa)
52762306a36Sopenharmony_ci{
52862306a36Sopenharmony_ci	if (sa->refc_cur)
52962306a36Sopenharmony_ci		xfs_btree_del_cursor(sa->refc_cur, XFS_BTREE_ERROR);
53062306a36Sopenharmony_ci	if (sa->rmap_cur)
53162306a36Sopenharmony_ci		xfs_btree_del_cursor(sa->rmap_cur, XFS_BTREE_ERROR);
53262306a36Sopenharmony_ci	if (sa->fino_cur)
53362306a36Sopenharmony_ci		xfs_btree_del_cursor(sa->fino_cur, XFS_BTREE_ERROR);
53462306a36Sopenharmony_ci	if (sa->ino_cur)
53562306a36Sopenharmony_ci		xfs_btree_del_cursor(sa->ino_cur, XFS_BTREE_ERROR);
53662306a36Sopenharmony_ci	if (sa->cnt_cur)
53762306a36Sopenharmony_ci		xfs_btree_del_cursor(sa->cnt_cur, XFS_BTREE_ERROR);
53862306a36Sopenharmony_ci	if (sa->bno_cur)
53962306a36Sopenharmony_ci		xfs_btree_del_cursor(sa->bno_cur, XFS_BTREE_ERROR);
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	sa->refc_cur = NULL;
54262306a36Sopenharmony_ci	sa->rmap_cur = NULL;
54362306a36Sopenharmony_ci	sa->fino_cur = NULL;
54462306a36Sopenharmony_ci	sa->ino_cur = NULL;
54562306a36Sopenharmony_ci	sa->bno_cur = NULL;
54662306a36Sopenharmony_ci	sa->cnt_cur = NULL;
54762306a36Sopenharmony_ci}
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci/* Initialize all the btree cursors for an AG. */
55062306a36Sopenharmony_civoid
55162306a36Sopenharmony_cixchk_ag_btcur_init(
55262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
55362306a36Sopenharmony_ci	struct xchk_ag		*sa)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	if (sa->agf_bp &&
55862306a36Sopenharmony_ci	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_BNO)) {
55962306a36Sopenharmony_ci		/* Set up a bnobt cursor for cross-referencing. */
56062306a36Sopenharmony_ci		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
56162306a36Sopenharmony_ci				sa->pag, XFS_BTNUM_BNO);
56262306a36Sopenharmony_ci	}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	if (sa->agf_bp &&
56562306a36Sopenharmony_ci	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_CNT)) {
56662306a36Sopenharmony_ci		/* Set up a cntbt cursor for cross-referencing. */
56762306a36Sopenharmony_ci		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
56862306a36Sopenharmony_ci				sa->pag, XFS_BTNUM_CNT);
56962306a36Sopenharmony_ci	}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	/* Set up a inobt cursor for cross-referencing. */
57262306a36Sopenharmony_ci	if (sa->agi_bp &&
57362306a36Sopenharmony_ci	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
57462306a36Sopenharmony_ci		sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
57562306a36Sopenharmony_ci				XFS_BTNUM_INO);
57662306a36Sopenharmony_ci	}
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	/* Set up a finobt cursor for cross-referencing. */
57962306a36Sopenharmony_ci	if (sa->agi_bp && xfs_has_finobt(mp) &&
58062306a36Sopenharmony_ci	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
58162306a36Sopenharmony_ci		sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp,
58262306a36Sopenharmony_ci				XFS_BTNUM_FINO);
58362306a36Sopenharmony_ci	}
58462306a36Sopenharmony_ci
58562306a36Sopenharmony_ci	/* Set up a rmapbt cursor for cross-referencing. */
58662306a36Sopenharmony_ci	if (sa->agf_bp && xfs_has_rmapbt(mp) &&
58762306a36Sopenharmony_ci	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
58862306a36Sopenharmony_ci		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
58962306a36Sopenharmony_ci				sa->pag);
59062306a36Sopenharmony_ci	}
59162306a36Sopenharmony_ci
59262306a36Sopenharmony_ci	/* Set up a refcountbt cursor for cross-referencing. */
59362306a36Sopenharmony_ci	if (sa->agf_bp && xfs_has_reflink(mp) &&
59462306a36Sopenharmony_ci	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
59562306a36Sopenharmony_ci		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
59662306a36Sopenharmony_ci				sa->agf_bp, sa->pag);
59762306a36Sopenharmony_ci	}
59862306a36Sopenharmony_ci}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci/* Release the AG header context and btree cursors. */
60162306a36Sopenharmony_civoid
60262306a36Sopenharmony_cixchk_ag_free(
60362306a36Sopenharmony_ci	struct xfs_scrub	*sc,
60462306a36Sopenharmony_ci	struct xchk_ag		*sa)
60562306a36Sopenharmony_ci{
60662306a36Sopenharmony_ci	xchk_ag_btcur_free(sa);
60762306a36Sopenharmony_ci	if (sa->agf_bp) {
60862306a36Sopenharmony_ci		xfs_trans_brelse(sc->tp, sa->agf_bp);
60962306a36Sopenharmony_ci		sa->agf_bp = NULL;
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci	if (sa->agi_bp) {
61262306a36Sopenharmony_ci		xfs_trans_brelse(sc->tp, sa->agi_bp);
61362306a36Sopenharmony_ci		sa->agi_bp = NULL;
61462306a36Sopenharmony_ci	}
61562306a36Sopenharmony_ci	if (sa->pag) {
61662306a36Sopenharmony_ci		xfs_perag_put(sa->pag);
61762306a36Sopenharmony_ci		sa->pag = NULL;
61862306a36Sopenharmony_ci	}
61962306a36Sopenharmony_ci}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci/*
62262306a36Sopenharmony_ci * For scrub, grab the perag structure, the AGI, and the AGF headers, in that
62362306a36Sopenharmony_ci * order.  Locking order requires us to get the AGI before the AGF.  We use the
62462306a36Sopenharmony_ci * transaction to avoid deadlocking on crosslinked metadata buffers; either the
62562306a36Sopenharmony_ci * caller passes one in (bmap scrub) or we have to create a transaction
62662306a36Sopenharmony_ci * ourselves.  Returns ENOENT if the perag struct cannot be grabbed.
62762306a36Sopenharmony_ci */
62862306a36Sopenharmony_ciint
62962306a36Sopenharmony_cixchk_ag_init(
63062306a36Sopenharmony_ci	struct xfs_scrub	*sc,
63162306a36Sopenharmony_ci	xfs_agnumber_t		agno,
63262306a36Sopenharmony_ci	struct xchk_ag		*sa)
63362306a36Sopenharmony_ci{
63462306a36Sopenharmony_ci	int			error;
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	error = xchk_ag_read_headers(sc, agno, sa);
63762306a36Sopenharmony_ci	if (error)
63862306a36Sopenharmony_ci		return error;
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci	xchk_ag_btcur_init(sc, sa);
64162306a36Sopenharmony_ci	return 0;
64262306a36Sopenharmony_ci}
64362306a36Sopenharmony_ci
64462306a36Sopenharmony_ci/* Per-scrubber setup functions */
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_civoid
64762306a36Sopenharmony_cixchk_trans_cancel(
64862306a36Sopenharmony_ci	struct xfs_scrub	*sc)
64962306a36Sopenharmony_ci{
65062306a36Sopenharmony_ci	xfs_trans_cancel(sc->tp);
65162306a36Sopenharmony_ci	sc->tp = NULL;
65262306a36Sopenharmony_ci}
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci/*
65562306a36Sopenharmony_ci * Grab an empty transaction so that we can re-grab locked buffers if
65662306a36Sopenharmony_ci * one of our btrees turns out to be cyclic.
65762306a36Sopenharmony_ci *
65862306a36Sopenharmony_ci * If we're going to repair something, we need to ask for the largest possible
65962306a36Sopenharmony_ci * log reservation so that we can handle the worst case scenario for metadata
66062306a36Sopenharmony_ci * updates while rebuilding a metadata item.  We also need to reserve as many
66162306a36Sopenharmony_ci * blocks in the head transaction as we think we're going to need to rebuild
66262306a36Sopenharmony_ci * the metadata object.
66362306a36Sopenharmony_ci */
66462306a36Sopenharmony_ciint
66562306a36Sopenharmony_cixchk_trans_alloc(
66662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
66762306a36Sopenharmony_ci	uint			resblks)
66862306a36Sopenharmony_ci{
66962306a36Sopenharmony_ci	if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
67062306a36Sopenharmony_ci		return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
67162306a36Sopenharmony_ci				resblks, 0, 0, &sc->tp);
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
67462306a36Sopenharmony_ci}
67562306a36Sopenharmony_ci
67662306a36Sopenharmony_ci/* Set us up with a transaction and an empty context. */
67762306a36Sopenharmony_ciint
67862306a36Sopenharmony_cixchk_setup_fs(
67962306a36Sopenharmony_ci	struct xfs_scrub	*sc)
68062306a36Sopenharmony_ci{
68162306a36Sopenharmony_ci	uint			resblks;
68262306a36Sopenharmony_ci
68362306a36Sopenharmony_ci	resblks = xrep_calc_ag_resblks(sc);
68462306a36Sopenharmony_ci	return xchk_trans_alloc(sc, resblks);
68562306a36Sopenharmony_ci}
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci/* Set us up with AG headers and btree cursors. */
68862306a36Sopenharmony_ciint
68962306a36Sopenharmony_cixchk_setup_ag_btree(
69062306a36Sopenharmony_ci	struct xfs_scrub	*sc,
69162306a36Sopenharmony_ci	bool			force_log)
69262306a36Sopenharmony_ci{
69362306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
69462306a36Sopenharmony_ci	int			error;
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci	/*
69762306a36Sopenharmony_ci	 * If the caller asks us to checkpont the log, do so.  This
69862306a36Sopenharmony_ci	 * expensive operation should be performed infrequently and only
69962306a36Sopenharmony_ci	 * as a last resort.  Any caller that sets force_log should
70062306a36Sopenharmony_ci	 * document why they need to do so.
70162306a36Sopenharmony_ci	 */
70262306a36Sopenharmony_ci	if (force_log) {
70362306a36Sopenharmony_ci		error = xchk_checkpoint_log(mp);
70462306a36Sopenharmony_ci		if (error)
70562306a36Sopenharmony_ci			return error;
70662306a36Sopenharmony_ci	}
70762306a36Sopenharmony_ci
70862306a36Sopenharmony_ci	error = xchk_setup_fs(sc);
70962306a36Sopenharmony_ci	if (error)
71062306a36Sopenharmony_ci		return error;
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	return xchk_ag_init(sc, sc->sm->sm_agno, &sc->sa);
71362306a36Sopenharmony_ci}
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci/* Push everything out of the log onto disk. */
71662306a36Sopenharmony_ciint
71762306a36Sopenharmony_cixchk_checkpoint_log(
71862306a36Sopenharmony_ci	struct xfs_mount	*mp)
71962306a36Sopenharmony_ci{
72062306a36Sopenharmony_ci	int			error;
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	error = xfs_log_force(mp, XFS_LOG_SYNC);
72362306a36Sopenharmony_ci	if (error)
72462306a36Sopenharmony_ci		return error;
72562306a36Sopenharmony_ci	xfs_ail_push_all_sync(mp->m_ail);
72662306a36Sopenharmony_ci	return 0;
72762306a36Sopenharmony_ci}
72862306a36Sopenharmony_ci
72962306a36Sopenharmony_ci/* Verify that an inode is allocated ondisk, then return its cached inode. */
73062306a36Sopenharmony_ciint
73162306a36Sopenharmony_cixchk_iget(
73262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
73362306a36Sopenharmony_ci	xfs_ino_t		inum,
73462306a36Sopenharmony_ci	struct xfs_inode	**ipp)
73562306a36Sopenharmony_ci{
73662306a36Sopenharmony_ci	return xfs_iget(sc->mp, sc->tp, inum, XFS_IGET_UNTRUSTED, 0, ipp);
73762306a36Sopenharmony_ci}
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci/*
74062306a36Sopenharmony_ci * Try to grab an inode in a manner that avoids races with physical inode
74162306a36Sopenharmony_ci * allocation.  If we can't, return the locked AGI buffer so that the caller
74262306a36Sopenharmony_ci * can single-step the loading process to see where things went wrong.
74362306a36Sopenharmony_ci * Callers must have a valid scrub transaction.
74462306a36Sopenharmony_ci *
74562306a36Sopenharmony_ci * If the iget succeeds, return 0, a NULL AGI, and the inode.
74662306a36Sopenharmony_ci *
74762306a36Sopenharmony_ci * If the iget fails, return the error, the locked AGI, and a NULL inode.  This
74862306a36Sopenharmony_ci * can include -EINVAL and -ENOENT for invalid inode numbers or inodes that are
74962306a36Sopenharmony_ci * no longer allocated; or any other corruption or runtime error.
75062306a36Sopenharmony_ci *
75162306a36Sopenharmony_ci * If the AGI read fails, return the error, a NULL AGI, and NULL inode.
75262306a36Sopenharmony_ci *
75362306a36Sopenharmony_ci * If a fatal signal is pending, return -EINTR, a NULL AGI, and a NULL inode.
75462306a36Sopenharmony_ci */
75562306a36Sopenharmony_ciint
75662306a36Sopenharmony_cixchk_iget_agi(
75762306a36Sopenharmony_ci	struct xfs_scrub	*sc,
75862306a36Sopenharmony_ci	xfs_ino_t		inum,
75962306a36Sopenharmony_ci	struct xfs_buf		**agi_bpp,
76062306a36Sopenharmony_ci	struct xfs_inode	**ipp)
76162306a36Sopenharmony_ci{
76262306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
76362306a36Sopenharmony_ci	struct xfs_trans	*tp = sc->tp;
76462306a36Sopenharmony_ci	struct xfs_perag	*pag;
76562306a36Sopenharmony_ci	int			error;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	ASSERT(sc->tp != NULL);
76862306a36Sopenharmony_ci
76962306a36Sopenharmony_ciagain:
77062306a36Sopenharmony_ci	*agi_bpp = NULL;
77162306a36Sopenharmony_ci	*ipp = NULL;
77262306a36Sopenharmony_ci	error = 0;
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	if (xchk_should_terminate(sc, &error))
77562306a36Sopenharmony_ci		return error;
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	/*
77862306a36Sopenharmony_ci	 * Attach the AGI buffer to the scrub transaction to avoid deadlocks
77962306a36Sopenharmony_ci	 * in the iget cache miss path.
78062306a36Sopenharmony_ci	 */
78162306a36Sopenharmony_ci	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
78262306a36Sopenharmony_ci	error = xfs_ialloc_read_agi(pag, tp, agi_bpp);
78362306a36Sopenharmony_ci	xfs_perag_put(pag);
78462306a36Sopenharmony_ci	if (error)
78562306a36Sopenharmony_ci		return error;
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci	error = xfs_iget(mp, tp, inum,
78862306a36Sopenharmony_ci			XFS_IGET_NORETRY | XFS_IGET_UNTRUSTED, 0, ipp);
78962306a36Sopenharmony_ci	if (error == -EAGAIN) {
79062306a36Sopenharmony_ci		/*
79162306a36Sopenharmony_ci		 * The inode may be in core but temporarily unavailable and may
79262306a36Sopenharmony_ci		 * require the AGI buffer before it can be returned.  Drop the
79362306a36Sopenharmony_ci		 * AGI buffer and retry the lookup.
79462306a36Sopenharmony_ci		 *
79562306a36Sopenharmony_ci		 * Incore lookup will fail with EAGAIN on a cache hit if the
79662306a36Sopenharmony_ci		 * inode is queued to the inactivation list.  The inactivation
79762306a36Sopenharmony_ci		 * worker may remove the inode from the unlinked list and hence
79862306a36Sopenharmony_ci		 * needs the AGI.
79962306a36Sopenharmony_ci		 *
80062306a36Sopenharmony_ci		 * Hence xchk_iget_agi() needs to drop the AGI lock on EAGAIN
80162306a36Sopenharmony_ci		 * to allow inodegc to make progress and move the inode to
80262306a36Sopenharmony_ci		 * IRECLAIMABLE state where xfs_iget will be able to return it
80362306a36Sopenharmony_ci		 * again if it can lock the inode.
80462306a36Sopenharmony_ci		 */
80562306a36Sopenharmony_ci		xfs_trans_brelse(tp, *agi_bpp);
80662306a36Sopenharmony_ci		delay(1);
80762306a36Sopenharmony_ci		goto again;
80862306a36Sopenharmony_ci	}
80962306a36Sopenharmony_ci	if (error)
81062306a36Sopenharmony_ci		return error;
81162306a36Sopenharmony_ci
81262306a36Sopenharmony_ci	/* We got the inode, so we can release the AGI. */
81362306a36Sopenharmony_ci	ASSERT(*ipp != NULL);
81462306a36Sopenharmony_ci	xfs_trans_brelse(tp, *agi_bpp);
81562306a36Sopenharmony_ci	*agi_bpp = NULL;
81662306a36Sopenharmony_ci	return 0;
81762306a36Sopenharmony_ci}
81862306a36Sopenharmony_ci
81962306a36Sopenharmony_ci/* Install an inode that we opened by handle for scrubbing. */
82062306a36Sopenharmony_ciint
82162306a36Sopenharmony_cixchk_install_handle_inode(
82262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
82362306a36Sopenharmony_ci	struct xfs_inode	*ip)
82462306a36Sopenharmony_ci{
82562306a36Sopenharmony_ci	if (VFS_I(ip)->i_generation != sc->sm->sm_gen) {
82662306a36Sopenharmony_ci		xchk_irele(sc, ip);
82762306a36Sopenharmony_ci		return -ENOENT;
82862306a36Sopenharmony_ci	}
82962306a36Sopenharmony_ci
83062306a36Sopenharmony_ci	sc->ip = ip;
83162306a36Sopenharmony_ci	return 0;
83262306a36Sopenharmony_ci}
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci/*
83562306a36Sopenharmony_ci * Install an already-referenced inode for scrubbing.  Get our own reference to
83662306a36Sopenharmony_ci * the inode to make disposal simpler.  The inode must not be in I_FREEING or
83762306a36Sopenharmony_ci * I_WILL_FREE state!
83862306a36Sopenharmony_ci */
83962306a36Sopenharmony_ciint
84062306a36Sopenharmony_cixchk_install_live_inode(
84162306a36Sopenharmony_ci	struct xfs_scrub	*sc,
84262306a36Sopenharmony_ci	struct xfs_inode	*ip)
84362306a36Sopenharmony_ci{
84462306a36Sopenharmony_ci	if (!igrab(VFS_I(ip))) {
84562306a36Sopenharmony_ci		xchk_ino_set_corrupt(sc, ip->i_ino);
84662306a36Sopenharmony_ci		return -EFSCORRUPTED;
84762306a36Sopenharmony_ci	}
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	sc->ip = ip;
85062306a36Sopenharmony_ci	return 0;
85162306a36Sopenharmony_ci}
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci/*
85462306a36Sopenharmony_ci * In preparation to scrub metadata structures that hang off of an inode,
85562306a36Sopenharmony_ci * grab either the inode referenced in the scrub control structure or the
85662306a36Sopenharmony_ci * inode passed in.  If the inumber does not reference an allocated inode
85762306a36Sopenharmony_ci * record, the function returns ENOENT to end the scrub early.  The inode
85862306a36Sopenharmony_ci * is not locked.
85962306a36Sopenharmony_ci */
86062306a36Sopenharmony_ciint
86162306a36Sopenharmony_cixchk_iget_for_scrubbing(
86262306a36Sopenharmony_ci	struct xfs_scrub	*sc)
86362306a36Sopenharmony_ci{
86462306a36Sopenharmony_ci	struct xfs_imap		imap;
86562306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
86662306a36Sopenharmony_ci	struct xfs_perag	*pag;
86762306a36Sopenharmony_ci	struct xfs_buf		*agi_bp;
86862306a36Sopenharmony_ci	struct xfs_inode	*ip_in = XFS_I(file_inode(sc->file));
86962306a36Sopenharmony_ci	struct xfs_inode	*ip = NULL;
87062306a36Sopenharmony_ci	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino);
87162306a36Sopenharmony_ci	int			error;
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci	ASSERT(sc->tp == NULL);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	/* We want to scan the inode we already had opened. */
87662306a36Sopenharmony_ci	if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
87762306a36Sopenharmony_ci		return xchk_install_live_inode(sc, ip_in);
87862306a36Sopenharmony_ci
87962306a36Sopenharmony_ci	/* Reject internal metadata files and obviously bad inode numbers. */
88062306a36Sopenharmony_ci	if (xfs_internal_inum(mp, sc->sm->sm_ino))
88162306a36Sopenharmony_ci		return -ENOENT;
88262306a36Sopenharmony_ci	if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
88362306a36Sopenharmony_ci		return -ENOENT;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	/* Try a regular untrusted iget. */
88662306a36Sopenharmony_ci	error = xchk_iget(sc, sc->sm->sm_ino, &ip);
88762306a36Sopenharmony_ci	if (!error)
88862306a36Sopenharmony_ci		return xchk_install_handle_inode(sc, ip);
88962306a36Sopenharmony_ci	if (error == -ENOENT)
89062306a36Sopenharmony_ci		return error;
89162306a36Sopenharmony_ci	if (error != -EINVAL)
89262306a36Sopenharmony_ci		goto out_error;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	/*
89562306a36Sopenharmony_ci	 * EINVAL with IGET_UNTRUSTED probably means one of several things:
89662306a36Sopenharmony_ci	 * userspace gave us an inode number that doesn't correspond to fs
89762306a36Sopenharmony_ci	 * space; the inode btree lacks a record for this inode; or there is a
89862306a36Sopenharmony_ci	 * record, and it says this inode is free.
89962306a36Sopenharmony_ci	 *
90062306a36Sopenharmony_ci	 * We want to look up this inode in the inobt to distinguish two
90162306a36Sopenharmony_ci	 * scenarios: (1) the inobt says the inode is free, in which case
90262306a36Sopenharmony_ci	 * there's nothing to do; and (2) the inobt says the inode is
90362306a36Sopenharmony_ci	 * allocated, but loading it failed due to corruption.
90462306a36Sopenharmony_ci	 *
90562306a36Sopenharmony_ci	 * Allocate a transaction and grab the AGI to prevent inobt activity
90662306a36Sopenharmony_ci	 * in this AG.  Retry the iget in case someone allocated a new inode
90762306a36Sopenharmony_ci	 * after the first iget failed.
90862306a36Sopenharmony_ci	 */
90962306a36Sopenharmony_ci	error = xchk_trans_alloc(sc, 0);
91062306a36Sopenharmony_ci	if (error)
91162306a36Sopenharmony_ci		goto out_error;
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip);
91462306a36Sopenharmony_ci	if (error == 0) {
91562306a36Sopenharmony_ci		/* Actually got the inode, so install it. */
91662306a36Sopenharmony_ci		xchk_trans_cancel(sc);
91762306a36Sopenharmony_ci		return xchk_install_handle_inode(sc, ip);
91862306a36Sopenharmony_ci	}
91962306a36Sopenharmony_ci	if (error == -ENOENT)
92062306a36Sopenharmony_ci		goto out_gone;
92162306a36Sopenharmony_ci	if (error != -EINVAL)
92262306a36Sopenharmony_ci		goto out_cancel;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	/* Ensure that we have protected against inode allocation/freeing. */
92562306a36Sopenharmony_ci	if (agi_bp == NULL) {
92662306a36Sopenharmony_ci		ASSERT(agi_bp != NULL);
92762306a36Sopenharmony_ci		error = -ECANCELED;
92862306a36Sopenharmony_ci		goto out_cancel;
92962306a36Sopenharmony_ci	}
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	/*
93262306a36Sopenharmony_ci	 * Untrusted iget failed a second time.  Let's try an inobt lookup.
93362306a36Sopenharmony_ci	 * If the inobt thinks this the inode neither can exist inside the
93462306a36Sopenharmony_ci	 * filesystem nor is allocated, return ENOENT to signal that the check
93562306a36Sopenharmony_ci	 * can be skipped.
93662306a36Sopenharmony_ci	 *
93762306a36Sopenharmony_ci	 * If the lookup returns corruption, we'll mark this inode corrupt and
93862306a36Sopenharmony_ci	 * exit to userspace.  There's little chance of fixing anything until
93962306a36Sopenharmony_ci	 * the inobt is straightened out, but there's nothing we can do here.
94062306a36Sopenharmony_ci	 *
94162306a36Sopenharmony_ci	 * If the lookup encounters any other error, exit to userspace.
94262306a36Sopenharmony_ci	 *
94362306a36Sopenharmony_ci	 * If the lookup succeeds, something else must be very wrong in the fs
94462306a36Sopenharmony_ci	 * such that setting up the incore inode failed in some strange way.
94562306a36Sopenharmony_ci	 * Treat those as corruptions.
94662306a36Sopenharmony_ci	 */
94762306a36Sopenharmony_ci	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino));
94862306a36Sopenharmony_ci	if (!pag) {
94962306a36Sopenharmony_ci		error = -EFSCORRUPTED;
95062306a36Sopenharmony_ci		goto out_cancel;
95162306a36Sopenharmony_ci	}
95262306a36Sopenharmony_ci
95362306a36Sopenharmony_ci	error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap,
95462306a36Sopenharmony_ci			XFS_IGET_UNTRUSTED);
95562306a36Sopenharmony_ci	xfs_perag_put(pag);
95662306a36Sopenharmony_ci	if (error == -EINVAL || error == -ENOENT)
95762306a36Sopenharmony_ci		goto out_gone;
95862306a36Sopenharmony_ci	if (!error)
95962306a36Sopenharmony_ci		error = -EFSCORRUPTED;
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ciout_cancel:
96262306a36Sopenharmony_ci	xchk_trans_cancel(sc);
96362306a36Sopenharmony_ciout_error:
96462306a36Sopenharmony_ci	trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino),
96562306a36Sopenharmony_ci			error, __return_address);
96662306a36Sopenharmony_ci	return error;
96762306a36Sopenharmony_ciout_gone:
96862306a36Sopenharmony_ci	/* The file is gone, so there's nothing to check. */
96962306a36Sopenharmony_ci	xchk_trans_cancel(sc);
97062306a36Sopenharmony_ci	return -ENOENT;
97162306a36Sopenharmony_ci}
97262306a36Sopenharmony_ci
97362306a36Sopenharmony_ci/* Release an inode, possibly dropping it in the process. */
97462306a36Sopenharmony_civoid
97562306a36Sopenharmony_cixchk_irele(
97662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
97762306a36Sopenharmony_ci	struct xfs_inode	*ip)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	if (current->journal_info != NULL) {
98062306a36Sopenharmony_ci		ASSERT(current->journal_info == sc->tp);
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_ci		/*
98362306a36Sopenharmony_ci		 * If we are in a transaction, we /cannot/ drop the inode
98462306a36Sopenharmony_ci		 * ourselves, because the VFS will trigger writeback, which
98562306a36Sopenharmony_ci		 * can require a transaction.  Clear DONTCACHE to force the
98662306a36Sopenharmony_ci		 * inode to the LRU, where someone else can take care of
98762306a36Sopenharmony_ci		 * dropping it.
98862306a36Sopenharmony_ci		 *
98962306a36Sopenharmony_ci		 * Note that when we grabbed our reference to the inode, it
99062306a36Sopenharmony_ci		 * could have had an active ref and DONTCACHE set if a sysadmin
99162306a36Sopenharmony_ci		 * is trying to coerce a change in file access mode.  icache
99262306a36Sopenharmony_ci		 * hits do not clear DONTCACHE, so we must do it here.
99362306a36Sopenharmony_ci		 */
99462306a36Sopenharmony_ci		spin_lock(&VFS_I(ip)->i_lock);
99562306a36Sopenharmony_ci		VFS_I(ip)->i_state &= ~I_DONTCACHE;
99662306a36Sopenharmony_ci		spin_unlock(&VFS_I(ip)->i_lock);
99762306a36Sopenharmony_ci	} else if (atomic_read(&VFS_I(ip)->i_count) == 1) {
99862306a36Sopenharmony_ci		/*
99962306a36Sopenharmony_ci		 * If this is the last reference to the inode and the caller
100062306a36Sopenharmony_ci		 * permits it, set DONTCACHE to avoid thrashing.
100162306a36Sopenharmony_ci		 */
100262306a36Sopenharmony_ci		d_mark_dontcache(VFS_I(ip));
100362306a36Sopenharmony_ci	}
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci	xfs_irele(ip);
100662306a36Sopenharmony_ci}
100762306a36Sopenharmony_ci
100862306a36Sopenharmony_ci/*
100962306a36Sopenharmony_ci * Set us up to scrub metadata mapped by a file's fork.  Callers must not use
101062306a36Sopenharmony_ci * this to operate on user-accessible regular file data because the MMAPLOCK is
101162306a36Sopenharmony_ci * not taken.
101262306a36Sopenharmony_ci */
101362306a36Sopenharmony_ciint
101462306a36Sopenharmony_cixchk_setup_inode_contents(
101562306a36Sopenharmony_ci	struct xfs_scrub	*sc,
101662306a36Sopenharmony_ci	unsigned int		resblks)
101762306a36Sopenharmony_ci{
101862306a36Sopenharmony_ci	int			error;
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci	error = xchk_iget_for_scrubbing(sc);
102162306a36Sopenharmony_ci	if (error)
102262306a36Sopenharmony_ci		return error;
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	/* Lock the inode so the VFS cannot touch this file. */
102562306a36Sopenharmony_ci	xchk_ilock(sc, XFS_IOLOCK_EXCL);
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	error = xchk_trans_alloc(sc, resblks);
102862306a36Sopenharmony_ci	if (error)
102962306a36Sopenharmony_ci		goto out;
103062306a36Sopenharmony_ci	xchk_ilock(sc, XFS_ILOCK_EXCL);
103162306a36Sopenharmony_ciout:
103262306a36Sopenharmony_ci	/* scrub teardown will unlock and release the inode for us */
103362306a36Sopenharmony_ci	return error;
103462306a36Sopenharmony_ci}
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_civoid
103762306a36Sopenharmony_cixchk_ilock(
103862306a36Sopenharmony_ci	struct xfs_scrub	*sc,
103962306a36Sopenharmony_ci	unsigned int		ilock_flags)
104062306a36Sopenharmony_ci{
104162306a36Sopenharmony_ci	xfs_ilock(sc->ip, ilock_flags);
104262306a36Sopenharmony_ci	sc->ilock_flags |= ilock_flags;
104362306a36Sopenharmony_ci}
104462306a36Sopenharmony_ci
104562306a36Sopenharmony_cibool
104662306a36Sopenharmony_cixchk_ilock_nowait(
104762306a36Sopenharmony_ci	struct xfs_scrub	*sc,
104862306a36Sopenharmony_ci	unsigned int		ilock_flags)
104962306a36Sopenharmony_ci{
105062306a36Sopenharmony_ci	if (xfs_ilock_nowait(sc->ip, ilock_flags)) {
105162306a36Sopenharmony_ci		sc->ilock_flags |= ilock_flags;
105262306a36Sopenharmony_ci		return true;
105362306a36Sopenharmony_ci	}
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci	return false;
105662306a36Sopenharmony_ci}
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_civoid
105962306a36Sopenharmony_cixchk_iunlock(
106062306a36Sopenharmony_ci	struct xfs_scrub	*sc,
106162306a36Sopenharmony_ci	unsigned int		ilock_flags)
106262306a36Sopenharmony_ci{
106362306a36Sopenharmony_ci	sc->ilock_flags &= ~ilock_flags;
106462306a36Sopenharmony_ci	xfs_iunlock(sc->ip, ilock_flags);
106562306a36Sopenharmony_ci}
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci/*
106862306a36Sopenharmony_ci * Predicate that decides if we need to evaluate the cross-reference check.
106962306a36Sopenharmony_ci * If there was an error accessing the cross-reference btree, just delete
107062306a36Sopenharmony_ci * the cursor and skip the check.
107162306a36Sopenharmony_ci */
107262306a36Sopenharmony_cibool
107362306a36Sopenharmony_cixchk_should_check_xref(
107462306a36Sopenharmony_ci	struct xfs_scrub	*sc,
107562306a36Sopenharmony_ci	int			*error,
107662306a36Sopenharmony_ci	struct xfs_btree_cur	**curpp)
107762306a36Sopenharmony_ci{
107862306a36Sopenharmony_ci	/* No point in xref if we already know we're corrupt. */
107962306a36Sopenharmony_ci	if (xchk_skip_xref(sc->sm))
108062306a36Sopenharmony_ci		return false;
108162306a36Sopenharmony_ci
108262306a36Sopenharmony_ci	if (*error == 0)
108362306a36Sopenharmony_ci		return true;
108462306a36Sopenharmony_ci
108562306a36Sopenharmony_ci	if (curpp) {
108662306a36Sopenharmony_ci		/* If we've already given up on xref, just bail out. */
108762306a36Sopenharmony_ci		if (!*curpp)
108862306a36Sopenharmony_ci			return false;
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci		/* xref error, delete cursor and bail out. */
109162306a36Sopenharmony_ci		xfs_btree_del_cursor(*curpp, XFS_BTREE_ERROR);
109262306a36Sopenharmony_ci		*curpp = NULL;
109362306a36Sopenharmony_ci	}
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
109662306a36Sopenharmony_ci	trace_xchk_xref_error(sc, *error, __return_address);
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	/*
109962306a36Sopenharmony_ci	 * Errors encountered during cross-referencing with another
110062306a36Sopenharmony_ci	 * data structure should not cause this scrubber to abort.
110162306a36Sopenharmony_ci	 */
110262306a36Sopenharmony_ci	*error = 0;
110362306a36Sopenharmony_ci	return false;
110462306a36Sopenharmony_ci}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci/* Run the structure verifiers on in-memory buffers to detect bad memory. */
110762306a36Sopenharmony_civoid
110862306a36Sopenharmony_cixchk_buffer_recheck(
110962306a36Sopenharmony_ci	struct xfs_scrub	*sc,
111062306a36Sopenharmony_ci	struct xfs_buf		*bp)
111162306a36Sopenharmony_ci{
111262306a36Sopenharmony_ci	xfs_failaddr_t		fa;
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	if (bp->b_ops == NULL) {
111562306a36Sopenharmony_ci		xchk_block_set_corrupt(sc, bp);
111662306a36Sopenharmony_ci		return;
111762306a36Sopenharmony_ci	}
111862306a36Sopenharmony_ci	if (bp->b_ops->verify_struct == NULL) {
111962306a36Sopenharmony_ci		xchk_set_incomplete(sc);
112062306a36Sopenharmony_ci		return;
112162306a36Sopenharmony_ci	}
112262306a36Sopenharmony_ci	fa = bp->b_ops->verify_struct(bp);
112362306a36Sopenharmony_ci	if (!fa)
112462306a36Sopenharmony_ci		return;
112562306a36Sopenharmony_ci	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
112662306a36Sopenharmony_ci	trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
112762306a36Sopenharmony_ci}
112862306a36Sopenharmony_ci
112962306a36Sopenharmony_cistatic inline int
113062306a36Sopenharmony_cixchk_metadata_inode_subtype(
113162306a36Sopenharmony_ci	struct xfs_scrub	*sc,
113262306a36Sopenharmony_ci	unsigned int		scrub_type)
113362306a36Sopenharmony_ci{
113462306a36Sopenharmony_ci	__u32			smtype = sc->sm->sm_type;
113562306a36Sopenharmony_ci	int			error;
113662306a36Sopenharmony_ci
113762306a36Sopenharmony_ci	sc->sm->sm_type = scrub_type;
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci	switch (scrub_type) {
114062306a36Sopenharmony_ci	case XFS_SCRUB_TYPE_INODE:
114162306a36Sopenharmony_ci		error = xchk_inode(sc);
114262306a36Sopenharmony_ci		break;
114362306a36Sopenharmony_ci	case XFS_SCRUB_TYPE_BMBTD:
114462306a36Sopenharmony_ci		error = xchk_bmap_data(sc);
114562306a36Sopenharmony_ci		break;
114662306a36Sopenharmony_ci	default:
114762306a36Sopenharmony_ci		ASSERT(0);
114862306a36Sopenharmony_ci		error = -EFSCORRUPTED;
114962306a36Sopenharmony_ci		break;
115062306a36Sopenharmony_ci	}
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	sc->sm->sm_type = smtype;
115362306a36Sopenharmony_ci	return error;
115462306a36Sopenharmony_ci}
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci/*
115762306a36Sopenharmony_ci * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
115862306a36Sopenharmony_ci * pointed to by sc->ip and the ILOCK must be held.
115962306a36Sopenharmony_ci */
116062306a36Sopenharmony_ciint
116162306a36Sopenharmony_cixchk_metadata_inode_forks(
116262306a36Sopenharmony_ci	struct xfs_scrub	*sc)
116362306a36Sopenharmony_ci{
116462306a36Sopenharmony_ci	bool			shared;
116562306a36Sopenharmony_ci	int			error;
116662306a36Sopenharmony_ci
116762306a36Sopenharmony_ci	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
116862306a36Sopenharmony_ci		return 0;
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	/* Check the inode record. */
117162306a36Sopenharmony_ci	error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
117262306a36Sopenharmony_ci	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
117362306a36Sopenharmony_ci		return error;
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_ci	/* Metadata inodes don't live on the rt device. */
117662306a36Sopenharmony_ci	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
117762306a36Sopenharmony_ci		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
117862306a36Sopenharmony_ci		return 0;
117962306a36Sopenharmony_ci	}
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci	/* They should never participate in reflink. */
118262306a36Sopenharmony_ci	if (xfs_is_reflink_inode(sc->ip)) {
118362306a36Sopenharmony_ci		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
118462306a36Sopenharmony_ci		return 0;
118562306a36Sopenharmony_ci	}
118662306a36Sopenharmony_ci
118762306a36Sopenharmony_ci	/* They also should never have extended attributes. */
118862306a36Sopenharmony_ci	if (xfs_inode_hasattr(sc->ip)) {
118962306a36Sopenharmony_ci		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
119062306a36Sopenharmony_ci		return 0;
119162306a36Sopenharmony_ci	}
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_ci	/* Invoke the data fork scrubber. */
119462306a36Sopenharmony_ci	error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
119562306a36Sopenharmony_ci	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
119662306a36Sopenharmony_ci		return error;
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci	/* Look for incorrect shared blocks. */
119962306a36Sopenharmony_ci	if (xfs_has_reflink(sc->mp)) {
120062306a36Sopenharmony_ci		error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip,
120162306a36Sopenharmony_ci				&shared);
120262306a36Sopenharmony_ci		if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
120362306a36Sopenharmony_ci				&error))
120462306a36Sopenharmony_ci			return error;
120562306a36Sopenharmony_ci		if (shared)
120662306a36Sopenharmony_ci			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
120762306a36Sopenharmony_ci	}
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	return 0;
121062306a36Sopenharmony_ci}
121162306a36Sopenharmony_ci
121262306a36Sopenharmony_ci/*
121362306a36Sopenharmony_ci * Enable filesystem hooks (i.e. runtime code patching) before starting a scrub
121462306a36Sopenharmony_ci * operation.  Callers must not hold any locks that intersect with the CPU
121562306a36Sopenharmony_ci * hotplug lock (e.g. writeback locks) because code patching must halt the CPUs
121662306a36Sopenharmony_ci * to change kernel code.
121762306a36Sopenharmony_ci */
121862306a36Sopenharmony_civoid
121962306a36Sopenharmony_cixchk_fsgates_enable(
122062306a36Sopenharmony_ci	struct xfs_scrub	*sc,
122162306a36Sopenharmony_ci	unsigned int		scrub_fsgates)
122262306a36Sopenharmony_ci{
122362306a36Sopenharmony_ci	ASSERT(!(scrub_fsgates & ~XCHK_FSGATES_ALL));
122462306a36Sopenharmony_ci	ASSERT(!(sc->flags & scrub_fsgates));
122562306a36Sopenharmony_ci
122662306a36Sopenharmony_ci	trace_xchk_fsgates_enable(sc, scrub_fsgates);
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_ci	if (scrub_fsgates & XCHK_FSGATES_DRAIN)
122962306a36Sopenharmony_ci		xfs_drain_wait_enable();
123062306a36Sopenharmony_ci
123162306a36Sopenharmony_ci	sc->flags |= scrub_fsgates;
123262306a36Sopenharmony_ci}
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci/*
123562306a36Sopenharmony_ci * Decide if this is this a cached inode that's also allocated.  The caller
123662306a36Sopenharmony_ci * must hold a reference to an AG and the AGI buffer lock to prevent inodes
123762306a36Sopenharmony_ci * from being allocated or freed.
123862306a36Sopenharmony_ci *
123962306a36Sopenharmony_ci * Look up an inode by number in the given file system.  If the inode number
124062306a36Sopenharmony_ci * is invalid, return -EINVAL.  If the inode is not in cache, return -ENODATA.
124162306a36Sopenharmony_ci * If the inode is being reclaimed, return -ENODATA because we know the inode
124262306a36Sopenharmony_ci * cache cannot be updating the ondisk metadata.
124362306a36Sopenharmony_ci *
124462306a36Sopenharmony_ci * Otherwise, the incore inode is the one we want, and it is either live,
124562306a36Sopenharmony_ci * somewhere in the inactivation machinery, or reclaimable.  The inode is
124662306a36Sopenharmony_ci * allocated if i_mode is nonzero.  In all three cases, the cached inode will
124762306a36Sopenharmony_ci * be more up to date than the ondisk inode buffer, so we must use the incore
124862306a36Sopenharmony_ci * i_mode.
124962306a36Sopenharmony_ci */
125062306a36Sopenharmony_ciint
125162306a36Sopenharmony_cixchk_inode_is_allocated(
125262306a36Sopenharmony_ci	struct xfs_scrub	*sc,
125362306a36Sopenharmony_ci	xfs_agino_t		agino,
125462306a36Sopenharmony_ci	bool			*inuse)
125562306a36Sopenharmony_ci{
125662306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
125762306a36Sopenharmony_ci	struct xfs_perag	*pag = sc->sa.pag;
125862306a36Sopenharmony_ci	xfs_ino_t		ino;
125962306a36Sopenharmony_ci	struct xfs_inode	*ip;
126062306a36Sopenharmony_ci	int			error;
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci	/* caller must hold perag reference */
126362306a36Sopenharmony_ci	if (pag == NULL) {
126462306a36Sopenharmony_ci		ASSERT(pag != NULL);
126562306a36Sopenharmony_ci		return -EINVAL;
126662306a36Sopenharmony_ci	}
126762306a36Sopenharmony_ci
126862306a36Sopenharmony_ci	/* caller must have AGI buffer */
126962306a36Sopenharmony_ci	if (sc->sa.agi_bp == NULL) {
127062306a36Sopenharmony_ci		ASSERT(sc->sa.agi_bp != NULL);
127162306a36Sopenharmony_ci		return -EINVAL;
127262306a36Sopenharmony_ci	}
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci	/* reject inode numbers outside existing AGs */
127562306a36Sopenharmony_ci	ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
127662306a36Sopenharmony_ci	if (!xfs_verify_ino(mp, ino))
127762306a36Sopenharmony_ci		return -EINVAL;
127862306a36Sopenharmony_ci
127962306a36Sopenharmony_ci	error = -ENODATA;
128062306a36Sopenharmony_ci	rcu_read_lock();
128162306a36Sopenharmony_ci	ip = radix_tree_lookup(&pag->pag_ici_root, agino);
128262306a36Sopenharmony_ci	if (!ip) {
128362306a36Sopenharmony_ci		/* cache miss */
128462306a36Sopenharmony_ci		goto out_rcu;
128562306a36Sopenharmony_ci	}
128662306a36Sopenharmony_ci
128762306a36Sopenharmony_ci	/*
128862306a36Sopenharmony_ci	 * If the inode number doesn't match, the incore inode got reused
128962306a36Sopenharmony_ci	 * during an RCU grace period and the radix tree hasn't been updated.
129062306a36Sopenharmony_ci	 * This isn't the inode we want.
129162306a36Sopenharmony_ci	 */
129262306a36Sopenharmony_ci	spin_lock(&ip->i_flags_lock);
129362306a36Sopenharmony_ci	if (ip->i_ino != ino)
129462306a36Sopenharmony_ci		goto out_skip;
129562306a36Sopenharmony_ci
129662306a36Sopenharmony_ci	trace_xchk_inode_is_allocated(ip);
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_ci	/*
129962306a36Sopenharmony_ci	 * We have an incore inode that matches the inode we want, and the
130062306a36Sopenharmony_ci	 * caller holds the perag structure and the AGI buffer.  Let's check
130162306a36Sopenharmony_ci	 * our assumptions below:
130262306a36Sopenharmony_ci	 */
130362306a36Sopenharmony_ci
130462306a36Sopenharmony_ci#ifdef DEBUG
130562306a36Sopenharmony_ci	/*
130662306a36Sopenharmony_ci	 * (1) If the incore inode is live (i.e. referenced from the dcache),
130762306a36Sopenharmony_ci	 * it will not be INEW, nor will it be in the inactivation or reclaim
130862306a36Sopenharmony_ci	 * machinery.  The ondisk inode had better be allocated.  This is the
130962306a36Sopenharmony_ci	 * most trivial case.
131062306a36Sopenharmony_ci	 */
131162306a36Sopenharmony_ci	if (!(ip->i_flags & (XFS_NEED_INACTIVE | XFS_INEW | XFS_IRECLAIMABLE |
131262306a36Sopenharmony_ci			     XFS_INACTIVATING))) {
131362306a36Sopenharmony_ci		/* live inode */
131462306a36Sopenharmony_ci		ASSERT(VFS_I(ip)->i_mode != 0);
131562306a36Sopenharmony_ci	}
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	/*
131862306a36Sopenharmony_ci	 * If the incore inode is INEW, there are several possibilities:
131962306a36Sopenharmony_ci	 *
132062306a36Sopenharmony_ci	 * (2) For a file that is being created, note that we allocate the
132162306a36Sopenharmony_ci	 * ondisk inode before allocating, initializing, and adding the incore
132262306a36Sopenharmony_ci	 * inode to the radix tree.
132362306a36Sopenharmony_ci	 *
132462306a36Sopenharmony_ci	 * (3) If the incore inode is being recycled, the inode has to be
132562306a36Sopenharmony_ci	 * allocated because we don't allow freed inodes to be recycled.
132662306a36Sopenharmony_ci	 * Recycling doesn't touch i_mode.
132762306a36Sopenharmony_ci	 */
132862306a36Sopenharmony_ci	if (ip->i_flags & XFS_INEW) {
132962306a36Sopenharmony_ci		/* created on disk already or recycling */
133062306a36Sopenharmony_ci		ASSERT(VFS_I(ip)->i_mode != 0);
133162306a36Sopenharmony_ci	}
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_ci	/*
133462306a36Sopenharmony_ci	 * (4) If the inode is queued for inactivation (NEED_INACTIVE) but
133562306a36Sopenharmony_ci	 * inactivation has not started (!INACTIVATING), it is still allocated.
133662306a36Sopenharmony_ci	 */
133762306a36Sopenharmony_ci	if ((ip->i_flags & XFS_NEED_INACTIVE) &&
133862306a36Sopenharmony_ci	    !(ip->i_flags & XFS_INACTIVATING)) {
133962306a36Sopenharmony_ci		/* definitely before difree */
134062306a36Sopenharmony_ci		ASSERT(VFS_I(ip)->i_mode != 0);
134162306a36Sopenharmony_ci	}
134262306a36Sopenharmony_ci#endif
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci	/*
134562306a36Sopenharmony_ci	 * If the incore inode is undergoing inactivation (INACTIVATING), there
134662306a36Sopenharmony_ci	 * are two possibilities:
134762306a36Sopenharmony_ci	 *
134862306a36Sopenharmony_ci	 * (5) It is before the point where it would get freed ondisk, in which
134962306a36Sopenharmony_ci	 * case i_mode is still nonzero.
135062306a36Sopenharmony_ci	 *
135162306a36Sopenharmony_ci	 * (6) It has already been freed, in which case i_mode is zero.
135262306a36Sopenharmony_ci	 *
135362306a36Sopenharmony_ci	 * We don't take the ILOCK here, but difree and dialloc update the AGI,
135462306a36Sopenharmony_ci	 * and we've taken the AGI buffer lock, which prevents that from
135562306a36Sopenharmony_ci	 * happening.
135662306a36Sopenharmony_ci	 */
135762306a36Sopenharmony_ci
135862306a36Sopenharmony_ci	/*
135962306a36Sopenharmony_ci	 * (7) Inodes undergoing inactivation (INACTIVATING) or queued for
136062306a36Sopenharmony_ci	 * reclaim (IRECLAIMABLE) could be allocated or free.  i_mode still
136162306a36Sopenharmony_ci	 * reflects the ondisk state.
136262306a36Sopenharmony_ci	 */
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_ci	/*
136562306a36Sopenharmony_ci	 * (8) If the inode is in IFLUSHING, it's safe to query i_mode because
136662306a36Sopenharmony_ci	 * the flush code uses i_mode to format the ondisk inode.
136762306a36Sopenharmony_ci	 */
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	/*
137062306a36Sopenharmony_ci	 * (9) If the inode is in IRECLAIM and was reachable via the radix
137162306a36Sopenharmony_ci	 * tree, it still has the same i_mode as it did before it entered
137262306a36Sopenharmony_ci	 * reclaim.  The inode object is still alive because we hold the RCU
137362306a36Sopenharmony_ci	 * read lock.
137462306a36Sopenharmony_ci	 */
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci	*inuse = VFS_I(ip)->i_mode != 0;
137762306a36Sopenharmony_ci	error = 0;
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ciout_skip:
138062306a36Sopenharmony_ci	spin_unlock(&ip->i_flags_lock);
138162306a36Sopenharmony_ciout_rcu:
138262306a36Sopenharmony_ci	rcu_read_unlock();
138362306a36Sopenharmony_ci	return error;
138462306a36Sopenharmony_ci}
1385