xref: /kernel/linux/linux-5.10/fs/xfs/scrub/dabtree.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2017 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_log_format.h"
13#include "xfs_trans.h"
14#include "xfs_inode.h"
15#include "xfs_dir2.h"
16#include "xfs_dir2_priv.h"
17#include "xfs_attr_leaf.h"
18#include "scrub/scrub.h"
19#include "scrub/common.h"
20#include "scrub/trace.h"
21#include "scrub/dabtree.h"
22
23/* Directory/Attribute Btree */
24
25/*
26 * Check for da btree operation errors.  See the section about handling
27 * operational errors in common.c.
28 */
29bool
30xchk_da_process_error(
31	struct xchk_da_btree	*ds,
32	int			level,
33	int			*error)
34{
35	struct xfs_scrub	*sc = ds->sc;
36
37	if (*error == 0)
38		return true;
39
40	switch (*error) {
41	case -EDEADLOCK:
42		/* Used to restart an op with deadlock avoidance. */
43		trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
44		break;
45	case -EFSBADCRC:
46	case -EFSCORRUPTED:
47		/* Note the badness but don't abort. */
48		sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
49		*error = 0;
50		/* fall through */
51	default:
52		trace_xchk_file_op_error(sc, ds->dargs.whichfork,
53				xfs_dir2_da_to_db(ds->dargs.geo,
54					ds->state->path.blk[level].blkno),
55				*error, __return_address);
56		break;
57	}
58	return false;
59}
60
61/*
62 * Check for da btree corruption.  See the section about handling
63 * operational errors in common.c.
64 */
65void
66xchk_da_set_corrupt(
67	struct xchk_da_btree	*ds,
68	int			level)
69{
70	struct xfs_scrub	*sc = ds->sc;
71
72	sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
73
74	trace_xchk_fblock_error(sc, ds->dargs.whichfork,
75			xfs_dir2_da_to_db(ds->dargs.geo,
76				ds->state->path.blk[level].blkno),
77			__return_address);
78}
79
80static struct xfs_da_node_entry *
81xchk_da_btree_node_entry(
82	struct xchk_da_btree		*ds,
83	int				level)
84{
85	struct xfs_da_state_blk		*blk = &ds->state->path.blk[level];
86	struct xfs_da3_icnode_hdr	hdr;
87
88	ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
89
90	xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
91	return hdr.btree + blk->index;
92}
93
94/* Scrub a da btree hash (key). */
95int
96xchk_da_btree_hash(
97	struct xchk_da_btree		*ds,
98	int				level,
99	__be32				*hashp)
100{
101	struct xfs_da_node_entry	*entry;
102	xfs_dahash_t			hash;
103	xfs_dahash_t			parent_hash;
104
105	/* Is this hash in order? */
106	hash = be32_to_cpu(*hashp);
107	if (hash < ds->hashes[level])
108		xchk_da_set_corrupt(ds, level);
109	ds->hashes[level] = hash;
110
111	if (level == 0)
112		return 0;
113
114	/* Is this hash no larger than the parent hash? */
115	entry = xchk_da_btree_node_entry(ds, level - 1);
116	parent_hash = be32_to_cpu(entry->hashval);
117	if (parent_hash < hash)
118		xchk_da_set_corrupt(ds, level);
119
120	return 0;
121}
122
123/*
124 * Check a da btree pointer.  Returns true if it's ok to use this
125 * pointer.
126 */
127STATIC bool
128xchk_da_btree_ptr_ok(
129	struct xchk_da_btree	*ds,
130	int			level,
131	xfs_dablk_t		blkno)
132{
133	if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) {
134		xchk_da_set_corrupt(ds, level);
135		return false;
136	}
137
138	return true;
139}
140
141/*
142 * The da btree scrubber can handle leaf1 blocks as a degenerate
143 * form of leafn blocks.  Since the regular da code doesn't handle
144 * leaf1, we must multiplex the verifiers.
145 */
146static void
147xchk_da_btree_read_verify(
148	struct xfs_buf		*bp)
149{
150	struct xfs_da_blkinfo	*info = bp->b_addr;
151
152	switch (be16_to_cpu(info->magic)) {
153	case XFS_DIR2_LEAF1_MAGIC:
154	case XFS_DIR3_LEAF1_MAGIC:
155		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
156		bp->b_ops->verify_read(bp);
157		return;
158	default:
159		/*
160		 * xfs_da3_node_buf_ops already know how to handle
161		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
162		 */
163		bp->b_ops = &xfs_da3_node_buf_ops;
164		bp->b_ops->verify_read(bp);
165		return;
166	}
167}
168static void
169xchk_da_btree_write_verify(
170	struct xfs_buf		*bp)
171{
172	struct xfs_da_blkinfo	*info = bp->b_addr;
173
174	switch (be16_to_cpu(info->magic)) {
175	case XFS_DIR2_LEAF1_MAGIC:
176	case XFS_DIR3_LEAF1_MAGIC:
177		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
178		bp->b_ops->verify_write(bp);
179		return;
180	default:
181		/*
182		 * xfs_da3_node_buf_ops already know how to handle
183		 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks.
184		 */
185		bp->b_ops = &xfs_da3_node_buf_ops;
186		bp->b_ops->verify_write(bp);
187		return;
188	}
189}
190static void *
191xchk_da_btree_verify(
192	struct xfs_buf		*bp)
193{
194	struct xfs_da_blkinfo	*info = bp->b_addr;
195
196	switch (be16_to_cpu(info->magic)) {
197	case XFS_DIR2_LEAF1_MAGIC:
198	case XFS_DIR3_LEAF1_MAGIC:
199		bp->b_ops = &xfs_dir3_leaf1_buf_ops;
200		return bp->b_ops->verify_struct(bp);
201	default:
202		bp->b_ops = &xfs_da3_node_buf_ops;
203		return bp->b_ops->verify_struct(bp);
204	}
205}
206
207static const struct xfs_buf_ops xchk_da_btree_buf_ops = {
208	.name = "xchk_da_btree",
209	.verify_read = xchk_da_btree_read_verify,
210	.verify_write = xchk_da_btree_write_verify,
211	.verify_struct = xchk_da_btree_verify,
212};
213
214/* Check a block's sibling. */
215STATIC int
216xchk_da_btree_block_check_sibling(
217	struct xchk_da_btree	*ds,
218	int			level,
219	int			direction,
220	xfs_dablk_t		sibling)
221{
222	struct xfs_da_state_path *path = &ds->state->path;
223	struct xfs_da_state_path *altpath = &ds->state->altpath;
224	int			retval;
225	int			plevel;
226	int			error;
227
228	memcpy(altpath, path, sizeof(ds->state->altpath));
229
230	/*
231	 * If the pointer is null, we shouldn't be able to move the upper
232	 * level pointer anywhere.
233	 */
234	if (sibling == 0) {
235		error = xfs_da3_path_shift(ds->state, altpath, direction,
236				false, &retval);
237		if (error == 0 && retval == 0)
238			xchk_da_set_corrupt(ds, level);
239		error = 0;
240		goto out;
241	}
242
243	/* Move the alternate cursor one block in the direction given. */
244	error = xfs_da3_path_shift(ds->state, altpath, direction, false,
245			&retval);
246	if (!xchk_da_process_error(ds, level, &error))
247		goto out;
248	if (retval) {
249		xchk_da_set_corrupt(ds, level);
250		goto out;
251	}
252	if (altpath->blk[level].bp)
253		xchk_buffer_recheck(ds->sc, altpath->blk[level].bp);
254
255	/* Compare upper level pointer to sibling pointer. */
256	if (altpath->blk[level].blkno != sibling)
257		xchk_da_set_corrupt(ds, level);
258
259out:
260	/* Free all buffers in the altpath that aren't referenced from path. */
261	for (plevel = 0; plevel < altpath->active; plevel++) {
262		if (altpath->blk[plevel].bp == NULL ||
263		    (plevel < path->active &&
264		     altpath->blk[plevel].bp == path->blk[plevel].bp))
265			continue;
266
267		xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp);
268		altpath->blk[plevel].bp = NULL;
269	}
270
271	return error;
272}
273
274/* Check a block's sibling pointers. */
275STATIC int
276xchk_da_btree_block_check_siblings(
277	struct xchk_da_btree	*ds,
278	int			level,
279	struct xfs_da_blkinfo	*hdr)
280{
281	xfs_dablk_t		forw;
282	xfs_dablk_t		back;
283	int			error = 0;
284
285	forw = be32_to_cpu(hdr->forw);
286	back = be32_to_cpu(hdr->back);
287
288	/* Top level blocks should not have sibling pointers. */
289	if (level == 0) {
290		if (forw != 0 || back != 0)
291			xchk_da_set_corrupt(ds, level);
292		return 0;
293	}
294
295	/*
296	 * Check back (left) and forw (right) pointers.  These functions
297	 * absorb error codes for us.
298	 */
299	error = xchk_da_btree_block_check_sibling(ds, level, 0, back);
300	if (error)
301		goto out;
302	error = xchk_da_btree_block_check_sibling(ds, level, 1, forw);
303
304out:
305	memset(&ds->state->altpath, 0, sizeof(ds->state->altpath));
306	return error;
307}
308
309/* Load a dir/attribute block from a btree. */
310STATIC int
311xchk_da_btree_block(
312	struct xchk_da_btree		*ds,
313	int				level,
314	xfs_dablk_t			blkno)
315{
316	struct xfs_da_state_blk		*blk;
317	struct xfs_da_intnode		*node;
318	struct xfs_da_node_entry	*btree;
319	struct xfs_da3_blkinfo		*hdr3;
320	struct xfs_da_args		*dargs = &ds->dargs;
321	struct xfs_inode		*ip = ds->dargs.dp;
322	xfs_ino_t			owner;
323	int				*pmaxrecs;
324	struct xfs_da3_icnode_hdr	nodehdr;
325	int				error = 0;
326
327	blk = &ds->state->path.blk[level];
328	ds->state->path.active = level + 1;
329
330	/* Release old block. */
331	if (blk->bp) {
332		xfs_trans_brelse(dargs->trans, blk->bp);
333		blk->bp = NULL;
334	}
335
336	/* Check the pointer. */
337	blk->blkno = blkno;
338	if (!xchk_da_btree_ptr_ok(ds, level, blkno))
339		goto out_nobuf;
340
341	/* Read the buffer. */
342	error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
343			XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
344			&xchk_da_btree_buf_ops);
345	if (!xchk_da_process_error(ds, level, &error))
346		goto out_nobuf;
347	if (blk->bp)
348		xchk_buffer_recheck(ds->sc, blk->bp);
349
350	/*
351	 * We didn't find a dir btree root block, which means that
352	 * there's no LEAF1/LEAFN tree (at least not where it's supposed
353	 * to be), so jump out now.
354	 */
355	if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 &&
356			blk->bp == NULL)
357		goto out_nobuf;
358
359	/* It's /not/ ok for attr trees not to have a da btree. */
360	if (blk->bp == NULL) {
361		xchk_da_set_corrupt(ds, level);
362		goto out_nobuf;
363	}
364
365	hdr3 = blk->bp->b_addr;
366	blk->magic = be16_to_cpu(hdr3->hdr.magic);
367	pmaxrecs = &ds->maxrecs[level];
368
369	/* We only started zeroing the header on v5 filesystems. */
370	if (xfs_sb_version_hascrc(&ds->sc->mp->m_sb) && hdr3->hdr.pad)
371		xchk_da_set_corrupt(ds, level);
372
373	/* Check the owner. */
374	if (xfs_sb_version_hascrc(&ip->i_mount->m_sb)) {
375		owner = be64_to_cpu(hdr3->owner);
376		if (owner != ip->i_ino)
377			xchk_da_set_corrupt(ds, level);
378	}
379
380	/* Check the siblings. */
381	error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr);
382	if (error)
383		goto out;
384
385	/* Interpret the buffer. */
386	switch (blk->magic) {
387	case XFS_ATTR_LEAF_MAGIC:
388	case XFS_ATTR3_LEAF_MAGIC:
389		xfs_trans_buf_set_type(dargs->trans, blk->bp,
390				XFS_BLFT_ATTR_LEAF_BUF);
391		blk->magic = XFS_ATTR_LEAF_MAGIC;
392		blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs);
393		if (ds->tree_level != 0)
394			xchk_da_set_corrupt(ds, level);
395		break;
396	case XFS_DIR2_LEAFN_MAGIC:
397	case XFS_DIR3_LEAFN_MAGIC:
398		xfs_trans_buf_set_type(dargs->trans, blk->bp,
399				XFS_BLFT_DIR_LEAFN_BUF);
400		blk->magic = XFS_DIR2_LEAFN_MAGIC;
401		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
402		if (ds->tree_level != 0)
403			xchk_da_set_corrupt(ds, level);
404		break;
405	case XFS_DIR2_LEAF1_MAGIC:
406	case XFS_DIR3_LEAF1_MAGIC:
407		xfs_trans_buf_set_type(dargs->trans, blk->bp,
408				XFS_BLFT_DIR_LEAF1_BUF);
409		blk->magic = XFS_DIR2_LEAF1_MAGIC;
410		blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs);
411		if (ds->tree_level != 0)
412			xchk_da_set_corrupt(ds, level);
413		break;
414	case XFS_DA_NODE_MAGIC:
415	case XFS_DA3_NODE_MAGIC:
416		xfs_trans_buf_set_type(dargs->trans, blk->bp,
417				XFS_BLFT_DA_NODE_BUF);
418		blk->magic = XFS_DA_NODE_MAGIC;
419		node = blk->bp->b_addr;
420		xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
421		btree = nodehdr.btree;
422		*pmaxrecs = nodehdr.count;
423		blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
424		if (level == 0) {
425			if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
426				xchk_da_set_corrupt(ds, level);
427				goto out_freebp;
428			}
429			ds->tree_level = nodehdr.level;
430		} else {
431			if (ds->tree_level != nodehdr.level) {
432				xchk_da_set_corrupt(ds, level);
433				goto out_freebp;
434			}
435		}
436
437		/* XXX: Check hdr3.pad32 once we know how to fix it. */
438		break;
439	default:
440		xchk_da_set_corrupt(ds, level);
441		goto out_freebp;
442	}
443
444	/*
445	 * If we've been handed a block that is below the dabtree root, does
446	 * its hashval match what the parent block expected to see?
447	 */
448	if (level > 0) {
449		struct xfs_da_node_entry	*key;
450
451		key = xchk_da_btree_node_entry(ds, level - 1);
452		if (be32_to_cpu(key->hashval) != blk->hashval) {
453			xchk_da_set_corrupt(ds, level);
454			goto out_freebp;
455		}
456	}
457
458out:
459	return error;
460out_freebp:
461	xfs_trans_brelse(dargs->trans, blk->bp);
462	blk->bp = NULL;
463out_nobuf:
464	blk->blkno = 0;
465	return error;
466}
467
468/* Visit all nodes and leaves of a da btree. */
469int
470xchk_da_btree(
471	struct xfs_scrub		*sc,
472	int				whichfork,
473	xchk_da_btree_rec_fn		scrub_fn,
474	void				*private)
475{
476	struct xchk_da_btree		ds = {};
477	struct xfs_mount		*mp = sc->mp;
478	struct xfs_da_state_blk		*blks;
479	struct xfs_da_node_entry	*key;
480	xfs_dablk_t			blkno;
481	int				level;
482	int				error;
483
484	/* Skip short format data structures; no btree to scan. */
485	if (!xfs_ifork_has_extents(XFS_IFORK_PTR(sc->ip, whichfork)))
486		return 0;
487
488	/* Set up initial da state. */
489	ds.dargs.dp = sc->ip;
490	ds.dargs.whichfork = whichfork;
491	ds.dargs.trans = sc->tp;
492	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
493	ds.state = xfs_da_state_alloc(&ds.dargs);
494	ds.sc = sc;
495	ds.private = private;
496	if (whichfork == XFS_ATTR_FORK) {
497		ds.dargs.geo = mp->m_attr_geo;
498		ds.lowest = 0;
499		ds.highest = 0;
500	} else {
501		ds.dargs.geo = mp->m_dir_geo;
502		ds.lowest = ds.dargs.geo->leafblk;
503		ds.highest = ds.dargs.geo->freeblk;
504	}
505	blkno = ds.lowest;
506	level = 0;
507
508	/* Find the root of the da tree, if present. */
509	blks = ds.state->path.blk;
510	error = xchk_da_btree_block(&ds, level, blkno);
511	if (error)
512		goto out_state;
513	/*
514	 * We didn't find a block at ds.lowest, which means that there's
515	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
516	 * so jump out now.
517	 */
518	if (blks[level].bp == NULL)
519		goto out_state;
520
521	blks[level].index = 0;
522	while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) {
523		/* Handle leaf block. */
524		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
525			/* End of leaf, pop back towards the root. */
526			if (blks[level].index >= ds.maxrecs[level]) {
527				if (level > 0)
528					blks[level - 1].index++;
529				ds.tree_level++;
530				level--;
531				continue;
532			}
533
534			/* Dispatch record scrubbing. */
535			error = scrub_fn(&ds, level);
536			if (error)
537				break;
538			if (xchk_should_terminate(sc, &error) ||
539			    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
540				break;
541
542			blks[level].index++;
543			continue;
544		}
545
546
547		/* End of node, pop back towards the root. */
548		if (blks[level].index >= ds.maxrecs[level]) {
549			if (level > 0)
550				blks[level - 1].index++;
551			ds.tree_level++;
552			level--;
553			continue;
554		}
555
556		/* Hashes in order for scrub? */
557		key = xchk_da_btree_node_entry(&ds, level);
558		error = xchk_da_btree_hash(&ds, level, &key->hashval);
559		if (error)
560			goto out;
561
562		/* Drill another level deeper. */
563		blkno = be32_to_cpu(key->before);
564		level++;
565		if (level >= XFS_DA_NODE_MAXDEPTH) {
566			/* Too deep! */
567			xchk_da_set_corrupt(&ds, level - 1);
568			break;
569		}
570		ds.tree_level--;
571		error = xchk_da_btree_block(&ds, level, blkno);
572		if (error)
573			goto out;
574		if (blks[level].bp == NULL)
575			goto out;
576
577		blks[level].index = 0;
578	}
579
580out:
581	/* Release all the buffers we're tracking. */
582	for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) {
583		if (blks[level].bp == NULL)
584			continue;
585		xfs_trans_brelse(sc->tp, blks[level].bp);
586		blks[level].bp = NULL;
587	}
588
589out_state:
590	xfs_da_state_free(ds.state);
591	return error;
592}
593