xref: /kernel/linux/linux-5.10/fs/xfs/scrub/refcount.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_btree.h"
11#include "xfs_rmap.h"
12#include "xfs_refcount.h"
13#include "scrub/scrub.h"
14#include "scrub/common.h"
15#include "scrub/btree.h"
16
17/*
18 * Set us up to scrub reference count btrees.
19 */
20int
21xchk_setup_ag_refcountbt(
22	struct xfs_scrub	*sc,
23	struct xfs_inode	*ip)
24{
25	return xchk_setup_ag_btree(sc, ip, false);
26}
27
28/* Reference count btree scrubber. */
29
30/*
31 * Confirming Reference Counts via Reverse Mappings
32 *
33 * We want to count the reverse mappings overlapping a refcount record
34 * (bno, len, refcount), allowing for the possibility that some of the
35 * overlap may come from smaller adjoining reverse mappings, while some
36 * comes from single extents which overlap the range entirely.  The
37 * outer loop is as follows:
38 *
39 * 1. For all reverse mappings overlapping the refcount extent,
40 *    a. If a given rmap completely overlaps, mark it as seen.
41 *    b. Otherwise, record the fragment (in agbno order) for later
42 *       processing.
43 *
44 * Once we've seen all the rmaps, we know that for all blocks in the
45 * refcount record we want to find $refcount owners and we've already
46 * visited $seen extents that overlap all the blocks.  Therefore, we
47 * need to find ($refcount - $seen) owners for every block in the
48 * extent; call that quantity $target_nr.  Proceed as follows:
49 *
50 * 2. Pull the first $target_nr fragments from the list; all of them
51 *    should start at or before the start of the extent.
52 *    Call this subset of fragments the working set.
53 * 3. Until there are no more unprocessed fragments,
54 *    a. Find the shortest fragments in the set and remove them.
55 *    b. Note the block number of the end of these fragments.
56 *    c. Pull the same number of fragments from the list.  All of these
57 *       fragments should start at the block number recorded in the
58 *       previous step.
59 *    d. Put those fragments in the set.
60 * 4. Check that there are $target_nr fragments remaining in the list,
61 *    and that they all end at or beyond the end of the refcount extent.
62 *
63 * If the refcount is correct, all the check conditions in the algorithm
64 * should always hold true.  If not, the refcount is incorrect.
65 */
66struct xchk_refcnt_frag {
67	struct list_head	list;
68	struct xfs_rmap_irec	rm;
69};
70
71struct xchk_refcnt_check {
72	struct xfs_scrub	*sc;
73	struct list_head	fragments;
74
75	/* refcount extent we're examining */
76	xfs_agblock_t		bno;
77	xfs_extlen_t		len;
78	xfs_nlink_t		refcount;
79
80	/* number of owners seen */
81	xfs_nlink_t		seen;
82};
83
84/*
85 * Decide if the given rmap is large enough that we can redeem it
86 * towards refcount verification now, or if it's a fragment, in
87 * which case we'll hang onto it in the hopes that we'll later
88 * discover that we've collected exactly the correct number of
89 * fragments as the refcountbt says we should have.
90 */
91STATIC int
92xchk_refcountbt_rmap_check(
93	struct xfs_btree_cur		*cur,
94	struct xfs_rmap_irec		*rec,
95	void				*priv)
96{
97	struct xchk_refcnt_check	*refchk = priv;
98	struct xchk_refcnt_frag		*frag;
99	xfs_agblock_t			rm_last;
100	xfs_agblock_t			rc_last;
101	int				error = 0;
102
103	if (xchk_should_terminate(refchk->sc, &error))
104		return error;
105
106	rm_last = rec->rm_startblock + rec->rm_blockcount - 1;
107	rc_last = refchk->bno + refchk->len - 1;
108
109	/* Confirm that a single-owner refc extent is a CoW stage. */
110	if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) {
111		xchk_btree_xref_set_corrupt(refchk->sc, cur, 0);
112		return 0;
113	}
114
115	if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) {
116		/*
117		 * The rmap overlaps the refcount record, so we can confirm
118		 * one refcount owner seen.
119		 */
120		refchk->seen++;
121	} else {
122		/*
123		 * This rmap covers only part of the refcount record, so
124		 * save the fragment for later processing.  If the rmapbt
125		 * is healthy each rmap_irec we see will be in agbno order
126		 * so we don't need insertion sort here.
127		 */
128		frag = kmem_alloc(sizeof(struct xchk_refcnt_frag),
129				KM_MAYFAIL);
130		if (!frag)
131			return -ENOMEM;
132		memcpy(&frag->rm, rec, sizeof(frag->rm));
133		list_add_tail(&frag->list, &refchk->fragments);
134	}
135
136	return 0;
137}
138
139/*
140 * Given a bunch of rmap fragments, iterate through them, keeping
141 * a running tally of the refcount.  If this ever deviates from
142 * what we expect (which is the refcountbt's refcount minus the
143 * number of extents that totally covered the refcountbt extent),
144 * we have a refcountbt error.
145 */
146STATIC void
147xchk_refcountbt_process_rmap_fragments(
148	struct xchk_refcnt_check	*refchk)
149{
150	struct list_head		worklist;
151	struct xchk_refcnt_frag		*frag;
152	struct xchk_refcnt_frag		*n;
153	xfs_agblock_t			bno;
154	xfs_agblock_t			rbno;
155	xfs_agblock_t			next_rbno;
156	xfs_nlink_t			nr;
157	xfs_nlink_t			target_nr;
158
159	target_nr = refchk->refcount - refchk->seen;
160	if (target_nr == 0)
161		return;
162
163	/*
164	 * There are (refchk->rc.rc_refcount - refchk->nr refcount)
165	 * references we haven't found yet.  Pull that many off the
166	 * fragment list and figure out where the smallest rmap ends
167	 * (and therefore the next rmap should start).  All the rmaps
168	 * we pull off should start at or before the beginning of the
169	 * refcount record's range.
170	 */
171	INIT_LIST_HEAD(&worklist);
172	rbno = NULLAGBLOCK;
173
174	/* Make sure the fragments actually /are/ in agbno order. */
175	bno = 0;
176	list_for_each_entry(frag, &refchk->fragments, list) {
177		if (frag->rm.rm_startblock < bno)
178			goto done;
179		bno = frag->rm.rm_startblock;
180	}
181
182	/*
183	 * Find all the rmaps that start at or before the refc extent,
184	 * and put them on the worklist.
185	 */
186	nr = 0;
187	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
188		if (frag->rm.rm_startblock > refchk->bno || nr > target_nr)
189			break;
190		bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
191		if (bno < rbno)
192			rbno = bno;
193		list_move_tail(&frag->list, &worklist);
194		nr++;
195	}
196
197	/*
198	 * We should have found exactly $target_nr rmap fragments starting
199	 * at or before the refcount extent.
200	 */
201	if (nr != target_nr)
202		goto done;
203
204	while (!list_empty(&refchk->fragments)) {
205		/* Discard any fragments ending at rbno from the worklist. */
206		nr = 0;
207		next_rbno = NULLAGBLOCK;
208		list_for_each_entry_safe(frag, n, &worklist, list) {
209			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
210			if (bno != rbno) {
211				if (bno < next_rbno)
212					next_rbno = bno;
213				continue;
214			}
215			list_del(&frag->list);
216			kmem_free(frag);
217			nr++;
218		}
219
220		/* Try to add nr rmaps starting at rbno to the worklist. */
221		list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
222			bno = frag->rm.rm_startblock + frag->rm.rm_blockcount;
223			if (frag->rm.rm_startblock != rbno)
224				goto done;
225			list_move_tail(&frag->list, &worklist);
226			if (next_rbno > bno)
227				next_rbno = bno;
228			nr--;
229			if (nr == 0)
230				break;
231		}
232
233		/*
234		 * If we get here and nr > 0, this means that we added fewer
235		 * items to the worklist than we discarded because the fragment
236		 * list ran out of items.  Therefore, we cannot maintain the
237		 * required refcount.  Something is wrong, so we're done.
238		 */
239		if (nr)
240			goto done;
241
242		rbno = next_rbno;
243	}
244
245	/*
246	 * Make sure the last extent we processed ends at or beyond
247	 * the end of the refcount extent.
248	 */
249	if (rbno < refchk->bno + refchk->len)
250		goto done;
251
252	/* Actually record us having seen the remaining refcount. */
253	refchk->seen = refchk->refcount;
254done:
255	/* Delete fragments and work list. */
256	list_for_each_entry_safe(frag, n, &worklist, list) {
257		list_del(&frag->list);
258		kmem_free(frag);
259	}
260	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
261		list_del(&frag->list);
262		kmem_free(frag);
263	}
264}
265
266/* Use the rmap entries covering this extent to verify the refcount. */
267STATIC void
268xchk_refcountbt_xref_rmap(
269	struct xfs_scrub		*sc,
270	xfs_agblock_t			bno,
271	xfs_extlen_t			len,
272	xfs_nlink_t			refcount)
273{
274	struct xchk_refcnt_check	refchk = {
275		.sc = sc,
276		.bno = bno,
277		.len = len,
278		.refcount = refcount,
279		.seen = 0,
280	};
281	struct xfs_rmap_irec		low;
282	struct xfs_rmap_irec		high;
283	struct xchk_refcnt_frag		*frag;
284	struct xchk_refcnt_frag		*n;
285	int				error;
286
287	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
288		return;
289
290	/* Cross-reference with the rmapbt to confirm the refcount. */
291	memset(&low, 0, sizeof(low));
292	low.rm_startblock = bno;
293	memset(&high, 0xFF, sizeof(high));
294	high.rm_startblock = bno + len - 1;
295
296	INIT_LIST_HEAD(&refchk.fragments);
297	error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high,
298			&xchk_refcountbt_rmap_check, &refchk);
299	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
300		goto out_free;
301
302	xchk_refcountbt_process_rmap_fragments(&refchk);
303	if (refcount != refchk.seen)
304		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
305
306out_free:
307	list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
308		list_del(&frag->list);
309		kmem_free(frag);
310	}
311}
312
313/* Cross-reference with the other btrees. */
314STATIC void
315xchk_refcountbt_xref(
316	struct xfs_scrub	*sc,
317	xfs_agblock_t		agbno,
318	xfs_extlen_t		len,
319	xfs_nlink_t		refcount)
320{
321	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
322		return;
323
324	xchk_xref_is_used_space(sc, agbno, len);
325	xchk_xref_is_not_inode_chunk(sc, agbno, len);
326	xchk_refcountbt_xref_rmap(sc, agbno, len, refcount);
327}
328
329/* Scrub a refcountbt record. */
330STATIC int
331xchk_refcountbt_rec(
332	struct xchk_btree	*bs,
333	union xfs_btree_rec	*rec)
334{
335	struct xfs_mount	*mp = bs->cur->bc_mp;
336	xfs_agblock_t		*cow_blocks = bs->private;
337	xfs_agnumber_t		agno = bs->cur->bc_ag.agno;
338	xfs_agblock_t		bno;
339	xfs_extlen_t		len;
340	xfs_nlink_t		refcount;
341	bool			has_cowflag;
342
343	bno = be32_to_cpu(rec->refc.rc_startblock);
344	len = be32_to_cpu(rec->refc.rc_blockcount);
345	refcount = be32_to_cpu(rec->refc.rc_refcount);
346
347	/* Only CoW records can have refcount == 1. */
348	has_cowflag = (bno & XFS_REFC_COW_START);
349	if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag))
350		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
351	if (has_cowflag)
352		(*cow_blocks) += len;
353
354	/* Check the extent. */
355	bno &= ~XFS_REFC_COW_START;
356	if (bno + len <= bno ||
357	    !xfs_verify_agbno(mp, agno, bno) ||
358	    !xfs_verify_agbno(mp, agno, bno + len - 1))
359		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
360
361	if (refcount == 0)
362		xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
363
364	xchk_refcountbt_xref(bs->sc, bno, len, refcount);
365
366	return 0;
367}
368
369/* Make sure we have as many refc blocks as the rmap says. */
370STATIC void
371xchk_refcount_xref_rmap(
372	struct xfs_scrub	*sc,
373	xfs_filblks_t		cow_blocks)
374{
375	xfs_extlen_t		refcbt_blocks = 0;
376	xfs_filblks_t		blocks;
377	int			error;
378
379	if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm))
380		return;
381
382	/* Check that we saw as many refcbt blocks as the rmap knows about. */
383	error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks);
384	if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error))
385		return;
386	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
387			&XFS_RMAP_OINFO_REFC, &blocks);
388	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
389		return;
390	if (blocks != refcbt_blocks)
391		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
392
393	/* Check that we saw as many cow blocks as the rmap knows about. */
394	error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur,
395			&XFS_RMAP_OINFO_COW, &blocks);
396	if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur))
397		return;
398	if (blocks != cow_blocks)
399		xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0);
400}
401
402/* Scrub the refcount btree for some AG. */
403int
404xchk_refcountbt(
405	struct xfs_scrub	*sc)
406{
407	xfs_agblock_t		cow_blocks = 0;
408	int			error;
409
410	error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec,
411			&XFS_RMAP_OINFO_REFC, &cow_blocks);
412	if (error)
413		return error;
414
415	xchk_refcount_xref_rmap(sc, cow_blocks);
416
417	return 0;
418}
419
420/* xref check that a cow staging extent is marked in the refcountbt. */
421void
422xchk_xref_is_cow_staging(
423	struct xfs_scrub		*sc,
424	xfs_agblock_t			agbno,
425	xfs_extlen_t			len)
426{
427	struct xfs_refcount_irec	rc;
428	bool				has_cowflag;
429	int				has_refcount;
430	int				error;
431
432	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
433		return;
434
435	/* Find the CoW staging extent. */
436	error = xfs_refcount_lookup_le(sc->sa.refc_cur,
437			agbno + XFS_REFC_COW_START, &has_refcount);
438	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
439		return;
440	if (!has_refcount) {
441		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
442		return;
443	}
444
445	error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount);
446	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
447		return;
448	if (!has_refcount) {
449		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
450		return;
451	}
452
453	/* CoW flag must be set, refcount must be 1. */
454	has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START);
455	if (!has_cowflag || rc.rc_refcount != 1)
456		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
457
458	/* Must be at least as long as what was passed in */
459	if (rc.rc_blockcount < len)
460		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
461}
462
463/*
464 * xref check that the extent is not shared.  Only file data blocks
465 * can have multiple owners.
466 */
467void
468xchk_xref_is_not_shared(
469	struct xfs_scrub	*sc,
470	xfs_agblock_t		agbno,
471	xfs_extlen_t		len)
472{
473	bool			shared;
474	int			error;
475
476	if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm))
477		return;
478
479	error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared);
480	if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur))
481		return;
482	if (shared)
483		xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0);
484}
485