xref: /kernel/linux/linux-6.6/fs/xfs/scrub/bmap.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2017-2023 Oracle.  All Rights Reserved.
4 * Author: Darrick J. Wong <djwong@kernel.org>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_btree.h"
13#include "xfs_bit.h"
14#include "xfs_log_format.h"
15#include "xfs_trans.h"
16#include "xfs_inode.h"
17#include "xfs_alloc.h"
18#include "xfs_bmap.h"
19#include "xfs_bmap_btree.h"
20#include "xfs_rmap.h"
21#include "xfs_rmap_btree.h"
22#include "scrub/scrub.h"
23#include "scrub/common.h"
24#include "scrub/btree.h"
25#include "xfs_ag.h"
26
27/* Set us up with an inode's bmap. */
28int
29xchk_setup_inode_bmap(
30	struct xfs_scrub	*sc)
31{
32	int			error;
33
34	if (xchk_need_intent_drain(sc))
35		xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
36
37	error = xchk_iget_for_scrubbing(sc);
38	if (error)
39		goto out;
40
41	xchk_ilock(sc, XFS_IOLOCK_EXCL);
42
43	/*
44	 * We don't want any ephemeral data/cow fork updates sitting around
45	 * while we inspect block mappings, so wait for directio to finish
46	 * and flush dirty data if we have delalloc reservations.
47	 */
48	if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
49	    sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
50		struct address_space	*mapping = VFS_I(sc->ip)->i_mapping;
51
52		xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
53
54		inode_dio_wait(VFS_I(sc->ip));
55
56		/*
57		 * Try to flush all incore state to disk before we examine the
58		 * space mappings for the data fork.  Leave accumulated errors
59		 * in the mapping for the writer threads to consume.
60		 *
61		 * On ENOSPC or EIO writeback errors, we continue into the
62		 * extent mapping checks because write failures do not
63		 * necessarily imply anything about the correctness of the file
64		 * metadata.  The metadata and the file data could be on
65		 * completely separate devices; a media failure might only
66		 * affect a subset of the disk, etc.  We can handle delalloc
67		 * extents in the scrubber, so leaving them in memory is fine.
68		 */
69		error = filemap_fdatawrite(mapping);
70		if (!error)
71			error = filemap_fdatawait_keep_errors(mapping);
72		if (error && (error != -ENOSPC && error != -EIO))
73			goto out;
74	}
75
76	/* Got the inode, lock it and we're ready to go. */
77	error = xchk_trans_alloc(sc, 0);
78	if (error)
79		goto out;
80
81	xchk_ilock(sc, XFS_ILOCK_EXCL);
82out:
83	/* scrub teardown will unlock and release the inode */
84	return error;
85}
86
87/*
88 * Inode fork block mapping (BMBT) scrubber.
89 * More complex than the others because we have to scrub
90 * all the extents regardless of whether or not the fork
91 * is in btree format.
92 */
93
94struct xchk_bmap_info {
95	struct xfs_scrub	*sc;
96
97	/* Incore extent tree cursor */
98	struct xfs_iext_cursor	icur;
99
100	/* Previous fork mapping that we examined */
101	struct xfs_bmbt_irec	prev_rec;
102
103	/* Is this a realtime fork? */
104	bool			is_rt;
105
106	/* May mappings point to shared space? */
107	bool			is_shared;
108
109	/* Was the incore extent tree loaded? */
110	bool			was_loaded;
111
112	/* Which inode fork are we checking? */
113	int			whichfork;
114};
115
116/* Look for a corresponding rmap for this irec. */
117static inline bool
118xchk_bmap_get_rmap(
119	struct xchk_bmap_info	*info,
120	struct xfs_bmbt_irec	*irec,
121	xfs_agblock_t		agbno,
122	uint64_t		owner,
123	struct xfs_rmap_irec	*rmap)
124{
125	xfs_fileoff_t		offset;
126	unsigned int		rflags = 0;
127	int			has_rmap;
128	int			error;
129
130	if (info->whichfork == XFS_ATTR_FORK)
131		rflags |= XFS_RMAP_ATTR_FORK;
132	if (irec->br_state == XFS_EXT_UNWRITTEN)
133		rflags |= XFS_RMAP_UNWRITTEN;
134
135	/*
136	 * CoW staging extents are owned (on disk) by the refcountbt, so
137	 * their rmaps do not have offsets.
138	 */
139	if (info->whichfork == XFS_COW_FORK)
140		offset = 0;
141	else
142		offset = irec->br_startoff;
143
144	/*
145	 * If the caller thinks this could be a shared bmbt extent (IOWs,
146	 * any data fork extent of a reflink inode) then we have to use the
147	 * range rmap lookup to make sure we get the correct owner/offset.
148	 */
149	if (info->is_shared) {
150		error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
151				owner, offset, rflags, rmap, &has_rmap);
152	} else {
153		error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
154				owner, offset, rflags, rmap, &has_rmap);
155	}
156	if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
157		return false;
158
159	if (!has_rmap)
160		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
161			irec->br_startoff);
162	return has_rmap;
163}
164
165/* Make sure that we have rmapbt records for this data/attr fork extent. */
166STATIC void
167xchk_bmap_xref_rmap(
168	struct xchk_bmap_info	*info,
169	struct xfs_bmbt_irec	*irec,
170	xfs_agblock_t		agbno)
171{
172	struct xfs_rmap_irec	rmap;
173	unsigned long long	rmap_end;
174	uint64_t		owner = info->sc->ip->i_ino;
175
176	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
177		return;
178
179	/* Find the rmap record for this irec. */
180	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
181		return;
182
183	/*
184	 * The rmap must be an exact match for this incore file mapping record,
185	 * which may have arisen from multiple ondisk records.
186	 */
187	if (rmap.rm_startblock != agbno)
188		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
189				irec->br_startoff);
190
191	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
192	if (rmap_end != agbno + irec->br_blockcount)
193		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
194				irec->br_startoff);
195
196	/* Check the logical offsets. */
197	if (rmap.rm_offset != irec->br_startoff)
198		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
199				irec->br_startoff);
200
201	rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount;
202	if (rmap_end != irec->br_startoff + irec->br_blockcount)
203		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
204				irec->br_startoff);
205
206	/* Check the owner */
207	if (rmap.rm_owner != owner)
208		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
209				irec->br_startoff);
210
211	/*
212	 * Check for discrepancies between the unwritten flag in the irec and
213	 * the rmap.  Note that the (in-memory) CoW fork distinguishes between
214	 * unwritten and written extents, but we don't track that in the rmap
215	 * records because the blocks are owned (on-disk) by the refcountbt,
216	 * which doesn't track unwritten state.
217	 */
218	if (!!(irec->br_state == XFS_EXT_UNWRITTEN) !=
219	    !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN))
220		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
221				irec->br_startoff);
222
223	if (!!(info->whichfork == XFS_ATTR_FORK) !=
224	    !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK))
225		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
226				irec->br_startoff);
227	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
228		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
229				irec->br_startoff);
230}
231
232/* Make sure that we have rmapbt records for this COW fork extent. */
233STATIC void
234xchk_bmap_xref_rmap_cow(
235	struct xchk_bmap_info	*info,
236	struct xfs_bmbt_irec	*irec,
237	xfs_agblock_t		agbno)
238{
239	struct xfs_rmap_irec	rmap;
240	unsigned long long	rmap_end;
241	uint64_t		owner = XFS_RMAP_OWN_COW;
242
243	if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
244		return;
245
246	/* Find the rmap record for this irec. */
247	if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
248		return;
249
250	/*
251	 * CoW staging extents are owned by the refcount btree, so the rmap
252	 * can start before and end after the physical space allocated to this
253	 * mapping.  There are no offsets to check.
254	 */
255	if (rmap.rm_startblock > agbno)
256		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
257				irec->br_startoff);
258
259	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
260	if (rmap_end < agbno + irec->br_blockcount)
261		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
262				irec->br_startoff);
263
264	/* Check the owner */
265	if (rmap.rm_owner != owner)
266		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
267				irec->br_startoff);
268
269	/*
270	 * No flags allowed.  Note that the (in-memory) CoW fork distinguishes
271	 * between unwritten and written extents, but we don't track that in
272	 * the rmap records because the blocks are owned (on-disk) by the
273	 * refcountbt, which doesn't track unwritten state.
274	 */
275	if (rmap.rm_flags & XFS_RMAP_ATTR_FORK)
276		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
277				irec->br_startoff);
278	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
279		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
280				irec->br_startoff);
281	if (rmap.rm_flags & XFS_RMAP_UNWRITTEN)
282		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
283				irec->br_startoff);
284}
285
286/* Cross-reference a single rtdev extent record. */
287STATIC void
288xchk_bmap_rt_iextent_xref(
289	struct xfs_inode	*ip,
290	struct xchk_bmap_info	*info,
291	struct xfs_bmbt_irec	*irec)
292{
293	xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
294			irec->br_blockcount);
295}
296
297/* Cross-reference a single datadev extent record. */
298STATIC void
299xchk_bmap_iextent_xref(
300	struct xfs_inode	*ip,
301	struct xchk_bmap_info	*info,
302	struct xfs_bmbt_irec	*irec)
303{
304	struct xfs_owner_info	oinfo;
305	struct xfs_mount	*mp = info->sc->mp;
306	xfs_agnumber_t		agno;
307	xfs_agblock_t		agbno;
308	xfs_extlen_t		len;
309	int			error;
310
311	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
312	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
313	len = irec->br_blockcount;
314
315	error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa);
316	if (!xchk_fblock_process_error(info->sc, info->whichfork,
317			irec->br_startoff, &error))
318		goto out_free;
319
320	xchk_xref_is_used_space(info->sc, agbno, len);
321	xchk_xref_is_not_inode_chunk(info->sc, agbno, len);
322	switch (info->whichfork) {
323	case XFS_DATA_FORK:
324		xchk_bmap_xref_rmap(info, irec, agbno);
325		if (!xfs_is_reflink_inode(info->sc->ip)) {
326			xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
327					info->whichfork, irec->br_startoff);
328			xchk_xref_is_only_owned_by(info->sc, agbno,
329					irec->br_blockcount, &oinfo);
330			xchk_xref_is_not_shared(info->sc, agbno,
331					irec->br_blockcount);
332		}
333		xchk_xref_is_not_cow_staging(info->sc, agbno,
334				irec->br_blockcount);
335		break;
336	case XFS_ATTR_FORK:
337		xchk_bmap_xref_rmap(info, irec, agbno);
338		xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino,
339				info->whichfork, irec->br_startoff);
340		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
341				&oinfo);
342		xchk_xref_is_not_shared(info->sc, agbno,
343				irec->br_blockcount);
344		xchk_xref_is_not_cow_staging(info->sc, agbno,
345				irec->br_blockcount);
346		break;
347	case XFS_COW_FORK:
348		xchk_bmap_xref_rmap_cow(info, irec, agbno);
349		xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount,
350				&XFS_RMAP_OINFO_COW);
351		xchk_xref_is_cow_staging(info->sc, agbno,
352				irec->br_blockcount);
353		xchk_xref_is_not_shared(info->sc, agbno,
354				irec->br_blockcount);
355		break;
356	}
357
358out_free:
359	xchk_ag_free(info->sc, &info->sc->sa);
360}
361
362/*
363 * Directories and attr forks should never have blocks that can't be addressed
364 * by a xfs_dablk_t.
365 */
366STATIC void
367xchk_bmap_dirattr_extent(
368	struct xfs_inode	*ip,
369	struct xchk_bmap_info	*info,
370	struct xfs_bmbt_irec	*irec)
371{
372	struct xfs_mount	*mp = ip->i_mount;
373	xfs_fileoff_t		off;
374
375	if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK)
376		return;
377
378	if (!xfs_verify_dablk(mp, irec->br_startoff))
379		xchk_fblock_set_corrupt(info->sc, info->whichfork,
380				irec->br_startoff);
381
382	off = irec->br_startoff + irec->br_blockcount - 1;
383	if (!xfs_verify_dablk(mp, off))
384		xchk_fblock_set_corrupt(info->sc, info->whichfork, off);
385}
386
387/* Scrub a single extent record. */
388STATIC void
389xchk_bmap_iextent(
390	struct xfs_inode	*ip,
391	struct xchk_bmap_info	*info,
392	struct xfs_bmbt_irec	*irec)
393{
394	struct xfs_mount	*mp = info->sc->mp;
395
396	/*
397	 * Check for out-of-order extents.  This record could have come
398	 * from the incore list, for which there is no ordering check.
399	 */
400	if (irec->br_startoff < info->prev_rec.br_startoff +
401				info->prev_rec.br_blockcount)
402		xchk_fblock_set_corrupt(info->sc, info->whichfork,
403				irec->br_startoff);
404
405	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
406		xchk_fblock_set_corrupt(info->sc, info->whichfork,
407				irec->br_startoff);
408
409	xchk_bmap_dirattr_extent(ip, info, irec);
410
411	/* Make sure the extent points to a valid place. */
412	if (info->is_rt &&
413	    !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
414		xchk_fblock_set_corrupt(info->sc, info->whichfork,
415				irec->br_startoff);
416	if (!info->is_rt &&
417	    !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
418		xchk_fblock_set_corrupt(info->sc, info->whichfork,
419				irec->br_startoff);
420
421	/* We don't allow unwritten extents on attr forks. */
422	if (irec->br_state == XFS_EXT_UNWRITTEN &&
423	    info->whichfork == XFS_ATTR_FORK)
424		xchk_fblock_set_corrupt(info->sc, info->whichfork,
425				irec->br_startoff);
426
427	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
428		return;
429
430	if (info->is_rt)
431		xchk_bmap_rt_iextent_xref(ip, info, irec);
432	else
433		xchk_bmap_iextent_xref(ip, info, irec);
434}
435
436/* Scrub a bmbt record. */
437STATIC int
438xchk_bmapbt_rec(
439	struct xchk_btree	*bs,
440	const union xfs_btree_rec *rec)
441{
442	struct xfs_bmbt_irec	irec;
443	struct xfs_bmbt_irec	iext_irec;
444	struct xfs_iext_cursor	icur;
445	struct xchk_bmap_info	*info = bs->private;
446	struct xfs_inode	*ip = bs->cur->bc_ino.ip;
447	struct xfs_buf		*bp = NULL;
448	struct xfs_btree_block	*block;
449	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, info->whichfork);
450	uint64_t		owner;
451	int			i;
452
453	/*
454	 * Check the owners of the btree blocks up to the level below
455	 * the root since the verifiers don't do that.
456	 */
457	if (xfs_has_crc(bs->cur->bc_mp) &&
458	    bs->cur->bc_levels[0].ptr == 1) {
459		for (i = 0; i < bs->cur->bc_nlevels - 1; i++) {
460			block = xfs_btree_get_block(bs->cur, i, &bp);
461			owner = be64_to_cpu(block->bb_u.l.bb_owner);
462			if (owner != ip->i_ino)
463				xchk_fblock_set_corrupt(bs->sc,
464						info->whichfork, 0);
465		}
466	}
467
468	/*
469	 * Check that the incore extent tree contains an extent that matches
470	 * this one exactly.  We validate those cached bmaps later, so we don't
471	 * need to check them here.  If the incore extent tree was just loaded
472	 * from disk by the scrubber, we assume that its contents match what's
473	 * on disk (we still hold the ILOCK) and skip the equivalence check.
474	 */
475	if (!info->was_loaded)
476		return 0;
477
478	xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
479	if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) {
480		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
481				irec.br_startoff);
482		return 0;
483	}
484
485	if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
486				&iext_irec) ||
487	    irec.br_startoff != iext_irec.br_startoff ||
488	    irec.br_startblock != iext_irec.br_startblock ||
489	    irec.br_blockcount != iext_irec.br_blockcount ||
490	    irec.br_state != iext_irec.br_state)
491		xchk_fblock_set_corrupt(bs->sc, info->whichfork,
492				irec.br_startoff);
493	return 0;
494}
495
496/* Scan the btree records. */
497STATIC int
498xchk_bmap_btree(
499	struct xfs_scrub	*sc,
500	int			whichfork,
501	struct xchk_bmap_info	*info)
502{
503	struct xfs_owner_info	oinfo;
504	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, whichfork);
505	struct xfs_mount	*mp = sc->mp;
506	struct xfs_inode	*ip = sc->ip;
507	struct xfs_btree_cur	*cur;
508	int			error;
509
510	/* Load the incore bmap cache if it's not loaded. */
511	info->was_loaded = !xfs_need_iread_extents(ifp);
512
513	error = xfs_iread_extents(sc->tp, ip, whichfork);
514	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
515		goto out;
516
517	/* Check the btree structure. */
518	cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
519	xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
520	error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
521	xfs_btree_del_cursor(cur, error);
522out:
523	return error;
524}
525
526struct xchk_bmap_check_rmap_info {
527	struct xfs_scrub	*sc;
528	int			whichfork;
529	struct xfs_iext_cursor	icur;
530};
531
532/* Can we find bmaps that fit this rmap? */
533STATIC int
534xchk_bmap_check_rmap(
535	struct xfs_btree_cur		*cur,
536	const struct xfs_rmap_irec	*rec,
537	void				*priv)
538{
539	struct xfs_bmbt_irec		irec;
540	struct xfs_rmap_irec		check_rec;
541	struct xchk_bmap_check_rmap_info	*sbcri = priv;
542	struct xfs_ifork		*ifp;
543	struct xfs_scrub		*sc = sbcri->sc;
544	bool				have_map;
545
546	/* Is this even the right fork? */
547	if (rec->rm_owner != sc->ip->i_ino)
548		return 0;
549	if ((sbcri->whichfork == XFS_ATTR_FORK) ^
550	    !!(rec->rm_flags & XFS_RMAP_ATTR_FORK))
551		return 0;
552	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
553		return 0;
554
555	/* Now look up the bmbt record. */
556	ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork);
557	if (!ifp) {
558		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
559				rec->rm_offset);
560		goto out;
561	}
562	have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset,
563			&sbcri->icur, &irec);
564	if (!have_map)
565		xchk_fblock_set_corrupt(sc, sbcri->whichfork,
566				rec->rm_offset);
567	/*
568	 * bmap extent record lengths are constrained to 2^21 blocks in length
569	 * because of space constraints in the on-disk metadata structure.
570	 * However, rmap extent record lengths are constrained only by AG
571	 * length, so we have to loop through the bmbt to make sure that the
572	 * entire rmap is covered by bmbt records.
573	 */
574	check_rec = *rec;
575	while (have_map) {
576		if (irec.br_startoff != check_rec.rm_offset)
577			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
578					check_rec.rm_offset);
579		if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
580				cur->bc_ag.pag->pag_agno,
581				check_rec.rm_startblock))
582			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
583					check_rec.rm_offset);
584		if (irec.br_blockcount > check_rec.rm_blockcount)
585			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
586					check_rec.rm_offset);
587		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
588			break;
589		check_rec.rm_startblock += irec.br_blockcount;
590		check_rec.rm_offset += irec.br_blockcount;
591		check_rec.rm_blockcount -= irec.br_blockcount;
592		if (check_rec.rm_blockcount == 0)
593			break;
594		have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec);
595		if (!have_map)
596			xchk_fblock_set_corrupt(sc, sbcri->whichfork,
597					check_rec.rm_offset);
598	}
599
600out:
601	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
602		return -ECANCELED;
603	return 0;
604}
605
606/* Make sure each rmap has a corresponding bmbt entry. */
607STATIC int
608xchk_bmap_check_ag_rmaps(
609	struct xfs_scrub		*sc,
610	int				whichfork,
611	struct xfs_perag		*pag)
612{
613	struct xchk_bmap_check_rmap_info	sbcri;
614	struct xfs_btree_cur		*cur;
615	struct xfs_buf			*agf;
616	int				error;
617
618	error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf);
619	if (error)
620		return error;
621
622	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag);
623
624	sbcri.sc = sc;
625	sbcri.whichfork = whichfork;
626	error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
627	if (error == -ECANCELED)
628		error = 0;
629
630	xfs_btree_del_cursor(cur, error);
631	xfs_trans_brelse(sc->tp, agf);
632	return error;
633}
634
635/*
636 * Decide if we want to walk every rmap btree in the fs to make sure that each
637 * rmap for this file fork has corresponding bmbt entries.
638 */
639static bool
640xchk_bmap_want_check_rmaps(
641	struct xchk_bmap_info	*info)
642{
643	struct xfs_scrub	*sc = info->sc;
644	struct xfs_ifork	*ifp;
645
646	if (!xfs_has_rmapbt(sc->mp))
647		return false;
648	if (info->whichfork == XFS_COW_FORK)
649		return false;
650	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
651		return false;
652
653	/* Don't support realtime rmap checks yet. */
654	if (info->is_rt)
655		return false;
656
657	/*
658	 * The inode repair code zaps broken inode forks by resetting them back
659	 * to EXTENTS format and zero extent records.  If we encounter a fork
660	 * in this state along with evidence that the fork isn't supposed to be
661	 * empty, we need to scan the reverse mappings to decide if we're going
662	 * to rebuild the fork.  Data forks with nonzero file size are scanned.
663	 * xattr forks are never empty of content, so they are always scanned.
664	 */
665	ifp = xfs_ifork_ptr(sc->ip, info->whichfork);
666	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) {
667		if (info->whichfork == XFS_DATA_FORK &&
668		    i_size_read(VFS_I(sc->ip)) == 0)
669			return false;
670
671		return true;
672	}
673
674	return false;
675}
676
677/* Make sure each rmap has a corresponding bmbt entry. */
678STATIC int
679xchk_bmap_check_rmaps(
680	struct xfs_scrub	*sc,
681	int			whichfork)
682{
683	struct xfs_perag	*pag;
684	xfs_agnumber_t		agno;
685	int			error;
686
687	for_each_perag(sc->mp, agno, pag) {
688		error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
689		if (error ||
690		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
691			xfs_perag_rele(pag);
692			return error;
693		}
694	}
695
696	return 0;
697}
698
699/* Scrub a delalloc reservation from the incore extent map tree. */
700STATIC void
701xchk_bmap_iextent_delalloc(
702	struct xfs_inode	*ip,
703	struct xchk_bmap_info	*info,
704	struct xfs_bmbt_irec	*irec)
705{
706	struct xfs_mount	*mp = info->sc->mp;
707
708	/*
709	 * Check for out-of-order extents.  This record could have come
710	 * from the incore list, for which there is no ordering check.
711	 */
712	if (irec->br_startoff < info->prev_rec.br_startoff +
713				info->prev_rec.br_blockcount)
714		xchk_fblock_set_corrupt(info->sc, info->whichfork,
715				irec->br_startoff);
716
717	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
718		xchk_fblock_set_corrupt(info->sc, info->whichfork,
719				irec->br_startoff);
720
721	/* Make sure the extent points to a valid place. */
722	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
723		xchk_fblock_set_corrupt(info->sc, info->whichfork,
724				irec->br_startoff);
725}
726
727/* Decide if this individual fork mapping is ok. */
728static bool
729xchk_bmap_iext_mapping(
730	struct xchk_bmap_info		*info,
731	const struct xfs_bmbt_irec	*irec)
732{
733	/* There should never be a "hole" extent in either extent list. */
734	if (irec->br_startblock == HOLESTARTBLOCK)
735		return false;
736	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
737		return false;
738	return true;
739}
740
741/* Are these two mappings contiguous with each other? */
742static inline bool
743xchk_are_bmaps_contiguous(
744	const struct xfs_bmbt_irec	*b1,
745	const struct xfs_bmbt_irec	*b2)
746{
747	/* Don't try to combine unallocated mappings. */
748	if (!xfs_bmap_is_real_extent(b1))
749		return false;
750	if (!xfs_bmap_is_real_extent(b2))
751		return false;
752
753	/* Does b2 come right after b1 in the logical and physical range? */
754	if (b1->br_startoff + b1->br_blockcount != b2->br_startoff)
755		return false;
756	if (b1->br_startblock + b1->br_blockcount != b2->br_startblock)
757		return false;
758	if (b1->br_state != b2->br_state)
759		return false;
760	return true;
761}
762
763/*
764 * Walk the incore extent records, accumulating consecutive contiguous records
765 * into a single incore mapping.  Returns true if @irec has been set to a
766 * mapping or false if there are no more mappings.  Caller must ensure that
767 * @info.icur is zeroed before the first call.
768 */
769static bool
770xchk_bmap_iext_iter(
771	struct xchk_bmap_info	*info,
772	struct xfs_bmbt_irec	*irec)
773{
774	struct xfs_bmbt_irec	got;
775	struct xfs_ifork	*ifp;
776	unsigned int		nr = 0;
777
778	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
779
780	/* Advance to the next iextent record and check the mapping. */
781	xfs_iext_next(ifp, &info->icur);
782	if (!xfs_iext_get_extent(ifp, &info->icur, irec))
783		return false;
784
785	if (!xchk_bmap_iext_mapping(info, irec)) {
786		xchk_fblock_set_corrupt(info->sc, info->whichfork,
787				irec->br_startoff);
788		return false;
789	}
790	nr++;
791
792	/*
793	 * Iterate subsequent iextent records and merge them with the one
794	 * that we just read, if possible.
795	 */
796	while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
797		if (!xchk_are_bmaps_contiguous(irec, &got))
798			break;
799
800		if (!xchk_bmap_iext_mapping(info, &got)) {
801			xchk_fblock_set_corrupt(info->sc, info->whichfork,
802					got.br_startoff);
803			return false;
804		}
805		nr++;
806
807		irec->br_blockcount += got.br_blockcount;
808		xfs_iext_next(ifp, &info->icur);
809	}
810
811	/*
812	 * If the merged mapping could be expressed with fewer bmbt records
813	 * than we actually found, notify the user that this fork could be
814	 * optimized.  CoW forks only exist in memory so we ignore them.
815	 */
816	if (nr > 1 && info->whichfork != XFS_COW_FORK &&
817	    howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr)
818		xchk_ino_set_preen(info->sc, info->sc->ip->i_ino);
819
820	return true;
821}
822
823/*
824 * Scrub an inode fork's block mappings.
825 *
826 * First we scan every record in every btree block, if applicable.
827 * Then we unconditionally scan the incore extent cache.
828 */
829STATIC int
830xchk_bmap(
831	struct xfs_scrub	*sc,
832	int			whichfork)
833{
834	struct xfs_bmbt_irec	irec;
835	struct xchk_bmap_info	info = { NULL };
836	struct xfs_mount	*mp = sc->mp;
837	struct xfs_inode	*ip = sc->ip;
838	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
839	xfs_fileoff_t		endoff;
840	int			error = 0;
841
842	/* Non-existent forks can be ignored. */
843	if (!ifp)
844		return -ENOENT;
845
846	info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip);
847	info.whichfork = whichfork;
848	info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip);
849	info.sc = sc;
850
851	switch (whichfork) {
852	case XFS_COW_FORK:
853		/* No CoW forks on non-reflink filesystems. */
854		if (!xfs_has_reflink(mp)) {
855			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
856			return 0;
857		}
858		break;
859	case XFS_ATTR_FORK:
860		if (!xfs_has_attr(mp) && !xfs_has_attr2(mp))
861			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
862		break;
863	default:
864		ASSERT(whichfork == XFS_DATA_FORK);
865		break;
866	}
867
868	/* Check the fork values */
869	switch (ifp->if_format) {
870	case XFS_DINODE_FMT_UUID:
871	case XFS_DINODE_FMT_DEV:
872	case XFS_DINODE_FMT_LOCAL:
873		/* No mappings to check. */
874		if (whichfork == XFS_COW_FORK)
875			xchk_fblock_set_corrupt(sc, whichfork, 0);
876		return 0;
877	case XFS_DINODE_FMT_EXTENTS:
878		break;
879	case XFS_DINODE_FMT_BTREE:
880		if (whichfork == XFS_COW_FORK) {
881			xchk_fblock_set_corrupt(sc, whichfork, 0);
882			return 0;
883		}
884
885		error = xchk_bmap_btree(sc, whichfork, &info);
886		if (error)
887			return error;
888		break;
889	default:
890		xchk_fblock_set_corrupt(sc, whichfork, 0);
891		return 0;
892	}
893
894	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
895		return 0;
896
897	/* Find the offset of the last extent in the mapping. */
898	error = xfs_bmap_last_offset(ip, &endoff, whichfork);
899	if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
900		return error;
901
902	/*
903	 * Scrub extent records.  We use a special iterator function here that
904	 * combines adjacent mappings if they are logically and physically
905	 * contiguous.   For large allocations that require multiple bmbt
906	 * records, this reduces the number of cross-referencing calls, which
907	 * reduces runtime.  Cross referencing with the rmap is simpler because
908	 * the rmap must match the combined mapping exactly.
909	 */
910	while (xchk_bmap_iext_iter(&info, &irec)) {
911		if (xchk_should_terminate(sc, &error) ||
912		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
913			return 0;
914
915		if (irec.br_startoff >= endoff) {
916			xchk_fblock_set_corrupt(sc, whichfork,
917					irec.br_startoff);
918			return 0;
919		}
920
921		if (isnullstartblock(irec.br_startblock))
922			xchk_bmap_iextent_delalloc(ip, &info, &irec);
923		else
924			xchk_bmap_iextent(ip, &info, &irec);
925		memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec));
926	}
927
928	if (xchk_bmap_want_check_rmaps(&info)) {
929		error = xchk_bmap_check_rmaps(sc, whichfork);
930		if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error))
931			return error;
932	}
933
934	return 0;
935}
936
937/* Scrub an inode's data fork. */
938int
939xchk_bmap_data(
940	struct xfs_scrub	*sc)
941{
942	return xchk_bmap(sc, XFS_DATA_FORK);
943}
944
945/* Scrub an inode's attr fork. */
946int
947xchk_bmap_attr(
948	struct xfs_scrub	*sc)
949{
950	return xchk_bmap(sc, XFS_ATTR_FORK);
951}
952
953/* Scrub an inode's CoW fork. */
954int
955xchk_bmap_cow(
956	struct xfs_scrub	*sc)
957{
958	return xchk_bmap(sc, XFS_COW_FORK);
959}
960