162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2019-2023 Oracle.  All Rights Reserved.
462306a36Sopenharmony_ci * Author: Darrick J. Wong <djwong@kernel.org>
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1162306a36Sopenharmony_ci#include "xfs_log_format.h"
1262306a36Sopenharmony_ci#include "xfs_trans.h"
1362306a36Sopenharmony_ci#include "xfs_mount.h"
1462306a36Sopenharmony_ci#include "xfs_alloc.h"
1562306a36Sopenharmony_ci#include "xfs_ialloc.h"
1662306a36Sopenharmony_ci#include "xfs_health.h"
1762306a36Sopenharmony_ci#include "xfs_btree.h"
1862306a36Sopenharmony_ci#include "xfs_ag.h"
1962306a36Sopenharmony_ci#include "xfs_rtalloc.h"
2062306a36Sopenharmony_ci#include "xfs_inode.h"
2162306a36Sopenharmony_ci#include "xfs_icache.h"
2262306a36Sopenharmony_ci#include "scrub/scrub.h"
2362306a36Sopenharmony_ci#include "scrub/common.h"
2462306a36Sopenharmony_ci#include "scrub/trace.h"
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci/*
2762306a36Sopenharmony_ci * FS Summary Counters
2862306a36Sopenharmony_ci * ===================
2962306a36Sopenharmony_ci *
3062306a36Sopenharmony_ci * The basics of filesystem summary counter checking are that we iterate the
3162306a36Sopenharmony_ci * AGs counting the number of free blocks, free space btree blocks, per-AG
3262306a36Sopenharmony_ci * reservations, inodes, delayed allocation reservations, and free inodes.
3362306a36Sopenharmony_ci * Then we compare what we computed against the in-core counters.
3462306a36Sopenharmony_ci *
3562306a36Sopenharmony_ci * However, the reality is that summary counters are a tricky beast to check.
3662306a36Sopenharmony_ci * While we /could/ freeze the filesystem and scramble around the AGs counting
3762306a36Sopenharmony_ci * the free blocks, in practice we prefer not do that for a scan because
3862306a36Sopenharmony_ci * freezing is costly.  To get around this, we added a per-cpu counter of the
3962306a36Sopenharmony_ci * delalloc reservations so that we can rotor around the AGs relatively
4062306a36Sopenharmony_ci * quickly, and we allow the counts to be slightly off because we're not taking
4162306a36Sopenharmony_ci * any locks while we do this.
4262306a36Sopenharmony_ci *
4362306a36Sopenharmony_ci * So the first thing we do is warm up the buffer cache in the setup routine by
4462306a36Sopenharmony_ci * walking all the AGs to make sure the incore per-AG structure has been
4562306a36Sopenharmony_ci * initialized.  The expected value calculation then iterates the incore per-AG
4662306a36Sopenharmony_ci * structures as quickly as it can.  We snapshot the percpu counters before and
4762306a36Sopenharmony_ci * after this operation and use the difference in counter values to guess at
4862306a36Sopenharmony_ci * our tolerance for mismatch between expected and actual counter values.
4962306a36Sopenharmony_ci */
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_cistruct xchk_fscounters {
5262306a36Sopenharmony_ci	struct xfs_scrub	*sc;
5362306a36Sopenharmony_ci	uint64_t		icount;
5462306a36Sopenharmony_ci	uint64_t		ifree;
5562306a36Sopenharmony_ci	uint64_t		fdblocks;
5662306a36Sopenharmony_ci	uint64_t		frextents;
5762306a36Sopenharmony_ci	unsigned long long	icount_min;
5862306a36Sopenharmony_ci	unsigned long long	icount_max;
5962306a36Sopenharmony_ci	bool			frozen;
6062306a36Sopenharmony_ci};
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/*
6362306a36Sopenharmony_ci * Since the expected value computation is lockless but only browses incore
6462306a36Sopenharmony_ci * values, the percpu counters should be fairly close to each other.  However,
6562306a36Sopenharmony_ci * we'll allow ourselves to be off by at least this (arbitrary) amount.
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_ci#define XCHK_FSCOUNT_MIN_VARIANCE	(512)
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/*
7062306a36Sopenharmony_ci * Make sure the per-AG structure has been initialized from the on-disk header
7162306a36Sopenharmony_ci * contents and trust that the incore counters match the ondisk counters.  (The
7262306a36Sopenharmony_ci * AGF and AGI scrubbers check them, and a normal xfs_scrub run checks the
7362306a36Sopenharmony_ci * summary counters after checking all AG headers).  Do this from the setup
7462306a36Sopenharmony_ci * function so that the inner AG aggregation loop runs as quickly as possible.
7562306a36Sopenharmony_ci *
7662306a36Sopenharmony_ci * This function runs during the setup phase /before/ we start checking any
7762306a36Sopenharmony_ci * metadata.
7862306a36Sopenharmony_ci */
7962306a36Sopenharmony_ciSTATIC int
8062306a36Sopenharmony_cixchk_fscount_warmup(
8162306a36Sopenharmony_ci	struct xfs_scrub	*sc)
8262306a36Sopenharmony_ci{
8362306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
8462306a36Sopenharmony_ci	struct xfs_buf		*agi_bp = NULL;
8562306a36Sopenharmony_ci	struct xfs_buf		*agf_bp = NULL;
8662306a36Sopenharmony_ci	struct xfs_perag	*pag = NULL;
8762306a36Sopenharmony_ci	xfs_agnumber_t		agno;
8862306a36Sopenharmony_ci	int			error = 0;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	for_each_perag(mp, agno, pag) {
9162306a36Sopenharmony_ci		if (xchk_should_terminate(sc, &error))
9262306a36Sopenharmony_ci			break;
9362306a36Sopenharmony_ci		if (xfs_perag_initialised_agi(pag) &&
9462306a36Sopenharmony_ci		    xfs_perag_initialised_agf(pag))
9562306a36Sopenharmony_ci			continue;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci		/* Lock both AG headers. */
9862306a36Sopenharmony_ci		error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp);
9962306a36Sopenharmony_ci		if (error)
10062306a36Sopenharmony_ci			break;
10162306a36Sopenharmony_ci		error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp);
10262306a36Sopenharmony_ci		if (error)
10362306a36Sopenharmony_ci			break;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci		/*
10662306a36Sopenharmony_ci		 * These are supposed to be initialized by the header read
10762306a36Sopenharmony_ci		 * function.
10862306a36Sopenharmony_ci		 */
10962306a36Sopenharmony_ci		if (!xfs_perag_initialised_agi(pag) ||
11062306a36Sopenharmony_ci		    !xfs_perag_initialised_agf(pag)) {
11162306a36Sopenharmony_ci			error = -EFSCORRUPTED;
11262306a36Sopenharmony_ci			break;
11362306a36Sopenharmony_ci		}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci		xfs_buf_relse(agf_bp);
11662306a36Sopenharmony_ci		agf_bp = NULL;
11762306a36Sopenharmony_ci		xfs_buf_relse(agi_bp);
11862306a36Sopenharmony_ci		agi_bp = NULL;
11962306a36Sopenharmony_ci	}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	if (agf_bp)
12262306a36Sopenharmony_ci		xfs_buf_relse(agf_bp);
12362306a36Sopenharmony_ci	if (agi_bp)
12462306a36Sopenharmony_ci		xfs_buf_relse(agi_bp);
12562306a36Sopenharmony_ci	if (pag)
12662306a36Sopenharmony_ci		xfs_perag_rele(pag);
12762306a36Sopenharmony_ci	return error;
12862306a36Sopenharmony_ci}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_cistatic inline int
13162306a36Sopenharmony_cixchk_fsfreeze(
13262306a36Sopenharmony_ci	struct xfs_scrub	*sc)
13362306a36Sopenharmony_ci{
13462306a36Sopenharmony_ci	int			error;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci	error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
13762306a36Sopenharmony_ci	trace_xchk_fsfreeze(sc, error);
13862306a36Sopenharmony_ci	return error;
13962306a36Sopenharmony_ci}
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_cistatic inline int
14262306a36Sopenharmony_cixchk_fsthaw(
14362306a36Sopenharmony_ci	struct xfs_scrub	*sc)
14462306a36Sopenharmony_ci{
14562306a36Sopenharmony_ci	int			error;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	/* This should always succeed, we have a kernel freeze */
14862306a36Sopenharmony_ci	error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
14962306a36Sopenharmony_ci	trace_xchk_fsthaw(sc, error);
15062306a36Sopenharmony_ci	return error;
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci/*
15462306a36Sopenharmony_ci * We couldn't stabilize the filesystem long enough to sample all the variables
15562306a36Sopenharmony_ci * that comprise the summary counters and compare them to the percpu counters.
15662306a36Sopenharmony_ci * We need to disable all writer threads, which means taking the first two
15762306a36Sopenharmony_ci * freeze levels to put userspace to sleep, and the third freeze level to
15862306a36Sopenharmony_ci * prevent background threads from starting new transactions.  Take one level
15962306a36Sopenharmony_ci * more to prevent other callers from unfreezing the filesystem while we run.
16062306a36Sopenharmony_ci */
16162306a36Sopenharmony_ciSTATIC int
16262306a36Sopenharmony_cixchk_fscounters_freeze(
16362306a36Sopenharmony_ci	struct xfs_scrub	*sc)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	struct xchk_fscounters	*fsc = sc->buf;
16662306a36Sopenharmony_ci	int			error = 0;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
16962306a36Sopenharmony_ci		sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
17062306a36Sopenharmony_ci		mnt_drop_write_file(sc->file);
17162306a36Sopenharmony_ci	}
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	/* Try to grab a kernel freeze. */
17462306a36Sopenharmony_ci	while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
17562306a36Sopenharmony_ci		if (xchk_should_terminate(sc, &error))
17662306a36Sopenharmony_ci			return error;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci		delay(HZ / 10);
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci	if (error)
18162306a36Sopenharmony_ci		return error;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	fsc->frozen = true;
18462306a36Sopenharmony_ci	return 0;
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci/* Thaw the filesystem after checking or repairing fscounters. */
18862306a36Sopenharmony_ciSTATIC void
18962306a36Sopenharmony_cixchk_fscounters_cleanup(
19062306a36Sopenharmony_ci	void			*buf)
19162306a36Sopenharmony_ci{
19262306a36Sopenharmony_ci	struct xchk_fscounters	*fsc = buf;
19362306a36Sopenharmony_ci	struct xfs_scrub	*sc = fsc->sc;
19462306a36Sopenharmony_ci	int			error;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	if (!fsc->frozen)
19762306a36Sopenharmony_ci		return;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	error = xchk_fsthaw(sc);
20062306a36Sopenharmony_ci	if (error)
20162306a36Sopenharmony_ci		xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
20262306a36Sopenharmony_ci	else
20362306a36Sopenharmony_ci		fsc->frozen = false;
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_ciint
20762306a36Sopenharmony_cixchk_setup_fscounters(
20862306a36Sopenharmony_ci	struct xfs_scrub	*sc)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	struct xchk_fscounters	*fsc;
21162306a36Sopenharmony_ci	int			error;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	/*
21462306a36Sopenharmony_ci	 * If the AGF doesn't track btreeblks, we have to lock the AGF to count
21562306a36Sopenharmony_ci	 * btree block usage by walking the actual btrees.
21662306a36Sopenharmony_ci	 */
21762306a36Sopenharmony_ci	if (!xfs_has_lazysbcount(sc->mp))
21862306a36Sopenharmony_ci		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
22162306a36Sopenharmony_ci	if (!sc->buf)
22262306a36Sopenharmony_ci		return -ENOMEM;
22362306a36Sopenharmony_ci	sc->buf_cleanup = xchk_fscounters_cleanup;
22462306a36Sopenharmony_ci	fsc = sc->buf;
22562306a36Sopenharmony_ci	fsc->sc = sc;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	/* We must get the incore counters set up before we can proceed. */
23062306a36Sopenharmony_ci	error = xchk_fscount_warmup(sc);
23162306a36Sopenharmony_ci	if (error)
23262306a36Sopenharmony_ci		return error;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	/*
23562306a36Sopenharmony_ci	 * Pause all writer activity in the filesystem while we're scrubbing to
23662306a36Sopenharmony_ci	 * reduce the likelihood of background perturbations to the counters
23762306a36Sopenharmony_ci	 * throwing off our calculations.
23862306a36Sopenharmony_ci	 */
23962306a36Sopenharmony_ci	if (sc->flags & XCHK_TRY_HARDER) {
24062306a36Sopenharmony_ci		error = xchk_fscounters_freeze(sc);
24162306a36Sopenharmony_ci		if (error)
24262306a36Sopenharmony_ci			return error;
24362306a36Sopenharmony_ci	}
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	return xfs_trans_alloc_empty(sc->mp, &sc->tp);
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci/*
24962306a36Sopenharmony_ci * Part 1: Collecting filesystem summary counts.  For each AG, we add its
25062306a36Sopenharmony_ci * summary counts (total inodes, free inodes, free data blocks) to an incore
25162306a36Sopenharmony_ci * copy of the overall filesystem summary counts.
25262306a36Sopenharmony_ci *
25362306a36Sopenharmony_ci * To avoid false corruption reports in part 2, any failure in this part must
25462306a36Sopenharmony_ci * set the INCOMPLETE flag even when a negative errno is returned.  This care
25562306a36Sopenharmony_ci * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
25662306a36Sopenharmony_ci * ECANCELED) that are absorbed into a scrub state flag update by
25762306a36Sopenharmony_ci * xchk_*_process_error.
25862306a36Sopenharmony_ci */
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci/* Count free space btree blocks manually for pre-lazysbcount filesystems. */
26162306a36Sopenharmony_cistatic int
26262306a36Sopenharmony_cixchk_fscount_btreeblks(
26362306a36Sopenharmony_ci	struct xfs_scrub	*sc,
26462306a36Sopenharmony_ci	struct xchk_fscounters	*fsc,
26562306a36Sopenharmony_ci	xfs_agnumber_t		agno)
26662306a36Sopenharmony_ci{
26762306a36Sopenharmony_ci	xfs_extlen_t		blocks;
26862306a36Sopenharmony_ci	int			error;
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci	error = xchk_ag_init_existing(sc, agno, &sc->sa);
27162306a36Sopenharmony_ci	if (error)
27262306a36Sopenharmony_ci		goto out_free;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
27562306a36Sopenharmony_ci	if (error)
27662306a36Sopenharmony_ci		goto out_free;
27762306a36Sopenharmony_ci	fsc->fdblocks += blocks - 1;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
28062306a36Sopenharmony_ci	if (error)
28162306a36Sopenharmony_ci		goto out_free;
28262306a36Sopenharmony_ci	fsc->fdblocks += blocks - 1;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ciout_free:
28562306a36Sopenharmony_ci	xchk_ag_free(sc, &sc->sa);
28662306a36Sopenharmony_ci	return error;
28762306a36Sopenharmony_ci}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci/*
29062306a36Sopenharmony_ci * Calculate what the global in-core counters ought to be from the incore
29162306a36Sopenharmony_ci * per-AG structure.  Callers can compare this to the actual in-core counters
29262306a36Sopenharmony_ci * to estimate by how much both in-core and on-disk counters need to be
29362306a36Sopenharmony_ci * adjusted.
29462306a36Sopenharmony_ci */
29562306a36Sopenharmony_ciSTATIC int
29662306a36Sopenharmony_cixchk_fscount_aggregate_agcounts(
29762306a36Sopenharmony_ci	struct xfs_scrub	*sc,
29862306a36Sopenharmony_ci	struct xchk_fscounters	*fsc)
29962306a36Sopenharmony_ci{
30062306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
30162306a36Sopenharmony_ci	struct xfs_perag	*pag;
30262306a36Sopenharmony_ci	uint64_t		delayed;
30362306a36Sopenharmony_ci	xfs_agnumber_t		agno;
30462306a36Sopenharmony_ci	int			tries = 8;
30562306a36Sopenharmony_ci	int			error = 0;
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ciretry:
30862306a36Sopenharmony_ci	fsc->icount = 0;
30962306a36Sopenharmony_ci	fsc->ifree = 0;
31062306a36Sopenharmony_ci	fsc->fdblocks = 0;
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	for_each_perag(mp, agno, pag) {
31362306a36Sopenharmony_ci		if (xchk_should_terminate(sc, &error))
31462306a36Sopenharmony_ci			break;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci		/* This somehow got unset since the warmup? */
31762306a36Sopenharmony_ci		if (!xfs_perag_initialised_agi(pag) ||
31862306a36Sopenharmony_ci		    !xfs_perag_initialised_agf(pag)) {
31962306a36Sopenharmony_ci			error = -EFSCORRUPTED;
32062306a36Sopenharmony_ci			break;
32162306a36Sopenharmony_ci		}
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci		/* Count all the inodes */
32462306a36Sopenharmony_ci		fsc->icount += pag->pagi_count;
32562306a36Sopenharmony_ci		fsc->ifree += pag->pagi_freecount;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci		/* Add up the free/freelist/bnobt/cntbt blocks */
32862306a36Sopenharmony_ci		fsc->fdblocks += pag->pagf_freeblks;
32962306a36Sopenharmony_ci		fsc->fdblocks += pag->pagf_flcount;
33062306a36Sopenharmony_ci		if (xfs_has_lazysbcount(sc->mp)) {
33162306a36Sopenharmony_ci			fsc->fdblocks += pag->pagf_btreeblks;
33262306a36Sopenharmony_ci		} else {
33362306a36Sopenharmony_ci			error = xchk_fscount_btreeblks(sc, fsc, agno);
33462306a36Sopenharmony_ci			if (error)
33562306a36Sopenharmony_ci				break;
33662306a36Sopenharmony_ci		}
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci		/*
33962306a36Sopenharmony_ci		 * Per-AG reservations are taken out of the incore counters,
34062306a36Sopenharmony_ci		 * so they must be left out of the free blocks computation.
34162306a36Sopenharmony_ci		 */
34262306a36Sopenharmony_ci		fsc->fdblocks -= pag->pag_meta_resv.ar_reserved;
34362306a36Sopenharmony_ci		fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved;
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	}
34662306a36Sopenharmony_ci	if (pag)
34762306a36Sopenharmony_ci		xfs_perag_rele(pag);
34862306a36Sopenharmony_ci	if (error) {
34962306a36Sopenharmony_ci		xchk_set_incomplete(sc);
35062306a36Sopenharmony_ci		return error;
35162306a36Sopenharmony_ci	}
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	/*
35462306a36Sopenharmony_ci	 * The global incore space reservation is taken from the incore
35562306a36Sopenharmony_ci	 * counters, so leave that out of the computation.
35662306a36Sopenharmony_ci	 */
35762306a36Sopenharmony_ci	fsc->fdblocks -= mp->m_resblks_avail;
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	/*
36062306a36Sopenharmony_ci	 * Delayed allocation reservations are taken out of the incore counters
36162306a36Sopenharmony_ci	 * but not recorded on disk, so leave them and their indlen blocks out
36262306a36Sopenharmony_ci	 * of the computation.
36362306a36Sopenharmony_ci	 */
36462306a36Sopenharmony_ci	delayed = percpu_counter_sum(&mp->m_delalloc_blks);
36562306a36Sopenharmony_ci	fsc->fdblocks -= delayed;
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	trace_xchk_fscounters_calc(mp, fsc->icount, fsc->ifree, fsc->fdblocks,
36862306a36Sopenharmony_ci			delayed);
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	/* Bail out if the values we compute are totally nonsense. */
37262306a36Sopenharmony_ci	if (fsc->icount < fsc->icount_min || fsc->icount > fsc->icount_max ||
37362306a36Sopenharmony_ci	    fsc->fdblocks > mp->m_sb.sb_dblocks ||
37462306a36Sopenharmony_ci	    fsc->ifree > fsc->icount_max)
37562306a36Sopenharmony_ci		return -EFSCORRUPTED;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	/*
37862306a36Sopenharmony_ci	 * If ifree > icount then we probably had some perturbation in the
37962306a36Sopenharmony_ci	 * counters while we were calculating things.  We'll try a few times
38062306a36Sopenharmony_ci	 * to maintain ifree <= icount before giving up.
38162306a36Sopenharmony_ci	 */
38262306a36Sopenharmony_ci	if (fsc->ifree > fsc->icount) {
38362306a36Sopenharmony_ci		if (tries--)
38462306a36Sopenharmony_ci			goto retry;
38562306a36Sopenharmony_ci		return -EDEADLOCK;
38662306a36Sopenharmony_ci	}
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	return 0;
38962306a36Sopenharmony_ci}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci#ifdef CONFIG_XFS_RT
39262306a36Sopenharmony_ciSTATIC int
39362306a36Sopenharmony_cixchk_fscount_add_frextent(
39462306a36Sopenharmony_ci	struct xfs_mount		*mp,
39562306a36Sopenharmony_ci	struct xfs_trans		*tp,
39662306a36Sopenharmony_ci	const struct xfs_rtalloc_rec	*rec,
39762306a36Sopenharmony_ci	void				*priv)
39862306a36Sopenharmony_ci{
39962306a36Sopenharmony_ci	struct xchk_fscounters		*fsc = priv;
40062306a36Sopenharmony_ci	int				error = 0;
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	fsc->frextents += rec->ar_extcount;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	xchk_should_terminate(fsc->sc, &error);
40562306a36Sopenharmony_ci	return error;
40662306a36Sopenharmony_ci}
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci/* Calculate the number of free realtime extents from the realtime bitmap. */
40962306a36Sopenharmony_ciSTATIC int
41062306a36Sopenharmony_cixchk_fscount_count_frextents(
41162306a36Sopenharmony_ci	struct xfs_scrub	*sc,
41262306a36Sopenharmony_ci	struct xchk_fscounters	*fsc)
41362306a36Sopenharmony_ci{
41462306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
41562306a36Sopenharmony_ci	int			error;
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	fsc->frextents = 0;
41862306a36Sopenharmony_ci	if (!xfs_has_realtime(mp))
41962306a36Sopenharmony_ci		return 0;
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci	xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
42262306a36Sopenharmony_ci	error = xfs_rtalloc_query_all(sc->mp, sc->tp,
42362306a36Sopenharmony_ci			xchk_fscount_add_frextent, fsc);
42462306a36Sopenharmony_ci	if (error) {
42562306a36Sopenharmony_ci		xchk_set_incomplete(sc);
42662306a36Sopenharmony_ci		goto out_unlock;
42762306a36Sopenharmony_ci	}
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ciout_unlock:
43062306a36Sopenharmony_ci	xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
43162306a36Sopenharmony_ci	return error;
43262306a36Sopenharmony_ci}
43362306a36Sopenharmony_ci#else
43462306a36Sopenharmony_ciSTATIC int
43562306a36Sopenharmony_cixchk_fscount_count_frextents(
43662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
43762306a36Sopenharmony_ci	struct xchk_fscounters	*fsc)
43862306a36Sopenharmony_ci{
43962306a36Sopenharmony_ci	fsc->frextents = 0;
44062306a36Sopenharmony_ci	return 0;
44162306a36Sopenharmony_ci}
44262306a36Sopenharmony_ci#endif /* CONFIG_XFS_RT */
44362306a36Sopenharmony_ci
44462306a36Sopenharmony_ci/*
44562306a36Sopenharmony_ci * Part 2: Comparing filesystem summary counters.  All we have to do here is
44662306a36Sopenharmony_ci * sum the percpu counters and compare them to what we've observed.
44762306a36Sopenharmony_ci */
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_ci/*
45062306a36Sopenharmony_ci * Is the @counter reasonably close to the @expected value?
45162306a36Sopenharmony_ci *
45262306a36Sopenharmony_ci * We neither locked nor froze anything in the filesystem while aggregating the
45362306a36Sopenharmony_ci * per-AG data to compute the @expected value, which means that the counter
45462306a36Sopenharmony_ci * could have changed.  We know the @old_value of the summation of the counter
45562306a36Sopenharmony_ci * before the aggregation, and we re-sum the counter now.  If the expected
45662306a36Sopenharmony_ci * value falls between the two summations, we're ok.
45762306a36Sopenharmony_ci *
45862306a36Sopenharmony_ci * Otherwise, we /might/ have a problem.  If the change in the summations is
45962306a36Sopenharmony_ci * more than we want to tolerate, the filesystem is probably busy and we should
46062306a36Sopenharmony_ci * just send back INCOMPLETE and see if userspace will try again.
46162306a36Sopenharmony_ci *
46262306a36Sopenharmony_ci * If we're repairing then we require an exact match.
46362306a36Sopenharmony_ci */
46462306a36Sopenharmony_cistatic inline bool
46562306a36Sopenharmony_cixchk_fscount_within_range(
46662306a36Sopenharmony_ci	struct xfs_scrub	*sc,
46762306a36Sopenharmony_ci	const int64_t		old_value,
46862306a36Sopenharmony_ci	struct percpu_counter	*counter,
46962306a36Sopenharmony_ci	uint64_t		expected)
47062306a36Sopenharmony_ci{
47162306a36Sopenharmony_ci	int64_t			min_value, max_value;
47262306a36Sopenharmony_ci	int64_t			curr_value = percpu_counter_sum(counter);
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	trace_xchk_fscounters_within_range(sc->mp, expected, curr_value,
47562306a36Sopenharmony_ci			old_value);
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci	/* Negative values are always wrong. */
47862306a36Sopenharmony_ci	if (curr_value < 0)
47962306a36Sopenharmony_ci		return false;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	/* Exact matches are always ok. */
48262306a36Sopenharmony_ci	if (curr_value == expected)
48362306a36Sopenharmony_ci		return true;
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	min_value = min(old_value, curr_value);
48662306a36Sopenharmony_ci	max_value = max(old_value, curr_value);
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci	/* Within the before-and-after range is ok. */
48962306a36Sopenharmony_ci	if (expected >= min_value && expected <= max_value)
49062306a36Sopenharmony_ci		return true;
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	/* Everything else is bad. */
49362306a36Sopenharmony_ci	return false;
49462306a36Sopenharmony_ci}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci/* Check the superblock counters. */
49762306a36Sopenharmony_ciint
49862306a36Sopenharmony_cixchk_fscounters(
49962306a36Sopenharmony_ci	struct xfs_scrub	*sc)
50062306a36Sopenharmony_ci{
50162306a36Sopenharmony_ci	struct xfs_mount	*mp = sc->mp;
50262306a36Sopenharmony_ci	struct xchk_fscounters	*fsc = sc->buf;
50362306a36Sopenharmony_ci	int64_t			icount, ifree, fdblocks, frextents;
50462306a36Sopenharmony_ci	bool			try_again = false;
50562306a36Sopenharmony_ci	int			error;
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	/* Snapshot the percpu counters. */
50862306a36Sopenharmony_ci	icount = percpu_counter_sum(&mp->m_icount);
50962306a36Sopenharmony_ci	ifree = percpu_counter_sum(&mp->m_ifree);
51062306a36Sopenharmony_ci	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
51162306a36Sopenharmony_ci	frextents = percpu_counter_sum(&mp->m_frextents);
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	/* No negative values, please! */
51462306a36Sopenharmony_ci	if (icount < 0 || ifree < 0)
51562306a36Sopenharmony_ci		xchk_set_corrupt(sc);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	/*
51862306a36Sopenharmony_ci	 * If the filesystem is not frozen, the counter summation calls above
51962306a36Sopenharmony_ci	 * can race with xfs_mod_freecounter, which subtracts a requested space
52062306a36Sopenharmony_ci	 * reservation from the counter and undoes the subtraction if that made
52162306a36Sopenharmony_ci	 * the counter go negative.  Therefore, it's possible to see negative
52262306a36Sopenharmony_ci	 * values here, and we should only flag that as a corruption if we
52362306a36Sopenharmony_ci	 * froze the fs.  This is much more likely to happen with frextents
52462306a36Sopenharmony_ci	 * since there are no reserved pools.
52562306a36Sopenharmony_ci	 */
52662306a36Sopenharmony_ci	if (fdblocks < 0 || frextents < 0) {
52762306a36Sopenharmony_ci		if (!fsc->frozen)
52862306a36Sopenharmony_ci			return -EDEADLOCK;
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci		xchk_set_corrupt(sc);
53162306a36Sopenharmony_ci		return 0;
53262306a36Sopenharmony_ci	}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci	/* See if icount is obviously wrong. */
53562306a36Sopenharmony_ci	if (icount < fsc->icount_min || icount > fsc->icount_max)
53662306a36Sopenharmony_ci		xchk_set_corrupt(sc);
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	/* See if fdblocks is obviously wrong. */
53962306a36Sopenharmony_ci	if (fdblocks > mp->m_sb.sb_dblocks)
54062306a36Sopenharmony_ci		xchk_set_corrupt(sc);
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	/* See if frextents is obviously wrong. */
54362306a36Sopenharmony_ci	if (frextents > mp->m_sb.sb_rextents)
54462306a36Sopenharmony_ci		xchk_set_corrupt(sc);
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	/*
54762306a36Sopenharmony_ci	 * If ifree exceeds icount by more than the minimum variance then
54862306a36Sopenharmony_ci	 * something's probably wrong with the counters.
54962306a36Sopenharmony_ci	 */
55062306a36Sopenharmony_ci	if (ifree > icount && ifree - icount > XCHK_FSCOUNT_MIN_VARIANCE)
55162306a36Sopenharmony_ci		xchk_set_corrupt(sc);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	/* Walk the incore AG headers to calculate the expected counters. */
55462306a36Sopenharmony_ci	error = xchk_fscount_aggregate_agcounts(sc, fsc);
55562306a36Sopenharmony_ci	if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
55662306a36Sopenharmony_ci		return error;
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	/* Count the free extents counter for rt volumes. */
55962306a36Sopenharmony_ci	error = xchk_fscount_count_frextents(sc, fsc);
56062306a36Sopenharmony_ci	if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
56162306a36Sopenharmony_ci		return error;
56262306a36Sopenharmony_ci	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
56362306a36Sopenharmony_ci		return 0;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	/*
56662306a36Sopenharmony_ci	 * Compare the in-core counters with whatever we counted.  If the fs is
56762306a36Sopenharmony_ci	 * frozen, we treat the discrepancy as a corruption because the freeze
56862306a36Sopenharmony_ci	 * should have stabilized the counter values.  Otherwise, we need
56962306a36Sopenharmony_ci	 * userspace to call us back having granted us freeze permission.
57062306a36Sopenharmony_ci	 */
57162306a36Sopenharmony_ci	if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
57262306a36Sopenharmony_ci				fsc->icount)) {
57362306a36Sopenharmony_ci		if (fsc->frozen)
57462306a36Sopenharmony_ci			xchk_set_corrupt(sc);
57562306a36Sopenharmony_ci		else
57662306a36Sopenharmony_ci			try_again = true;
57762306a36Sopenharmony_ci	}
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci	if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
58062306a36Sopenharmony_ci		if (fsc->frozen)
58162306a36Sopenharmony_ci			xchk_set_corrupt(sc);
58262306a36Sopenharmony_ci		else
58362306a36Sopenharmony_ci			try_again = true;
58462306a36Sopenharmony_ci	}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
58762306a36Sopenharmony_ci			fsc->fdblocks)) {
58862306a36Sopenharmony_ci		if (fsc->frozen)
58962306a36Sopenharmony_ci			xchk_set_corrupt(sc);
59062306a36Sopenharmony_ci		else
59162306a36Sopenharmony_ci			try_again = true;
59262306a36Sopenharmony_ci	}
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
59562306a36Sopenharmony_ci			fsc->frextents)) {
59662306a36Sopenharmony_ci		if (fsc->frozen)
59762306a36Sopenharmony_ci			xchk_set_corrupt(sc);
59862306a36Sopenharmony_ci		else
59962306a36Sopenharmony_ci			try_again = true;
60062306a36Sopenharmony_ci	}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	if (try_again)
60362306a36Sopenharmony_ci		return -EDEADLOCK;
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	return 0;
60662306a36Sopenharmony_ci}
607