18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2018 Oracle.  All Rights Reserved.
48c2ecf20Sopenharmony_ci * Author: Darrick J. Wong <darrick.wong@oracle.com>
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci#include "xfs.h"
78c2ecf20Sopenharmony_ci#include "xfs_fs.h"
88c2ecf20Sopenharmony_ci#include "xfs_shared.h"
98c2ecf20Sopenharmony_ci#include "xfs_format.h"
108c2ecf20Sopenharmony_ci#include "xfs_trans_resv.h"
118c2ecf20Sopenharmony_ci#include "xfs_mount.h"
128c2ecf20Sopenharmony_ci#include "xfs_btree.h"
138c2ecf20Sopenharmony_ci#include "scrub/bitmap.h"
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci/*
168c2ecf20Sopenharmony_ci * Set a range of this bitmap.  Caller must ensure the range is not set.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * This is the logical equivalent of bitmap |= mask(start, len).
198c2ecf20Sopenharmony_ci */
208c2ecf20Sopenharmony_ciint
218c2ecf20Sopenharmony_cixbitmap_set(
228c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap,
238c2ecf20Sopenharmony_ci	uint64_t		start,
248c2ecf20Sopenharmony_ci	uint64_t		len)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	struct xbitmap_range	*bmr;
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL);
298c2ecf20Sopenharmony_ci	if (!bmr)
308c2ecf20Sopenharmony_ci		return -ENOMEM;
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&bmr->list);
338c2ecf20Sopenharmony_ci	bmr->start = start;
348c2ecf20Sopenharmony_ci	bmr->len = len;
358c2ecf20Sopenharmony_ci	list_add_tail(&bmr->list, &bitmap->list);
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci	return 0;
388c2ecf20Sopenharmony_ci}
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci/* Free everything related to this bitmap. */
418c2ecf20Sopenharmony_civoid
428c2ecf20Sopenharmony_cixbitmap_destroy(
438c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap)
448c2ecf20Sopenharmony_ci{
458c2ecf20Sopenharmony_ci	struct xbitmap_range	*bmr;
468c2ecf20Sopenharmony_ci	struct xbitmap_range	*n;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	for_each_xbitmap_extent(bmr, n, bitmap) {
498c2ecf20Sopenharmony_ci		list_del(&bmr->list);
508c2ecf20Sopenharmony_ci		kmem_free(bmr);
518c2ecf20Sopenharmony_ci	}
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/* Set up a per-AG block bitmap. */
558c2ecf20Sopenharmony_civoid
568c2ecf20Sopenharmony_cixbitmap_init(
578c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap)
588c2ecf20Sopenharmony_ci{
598c2ecf20Sopenharmony_ci	INIT_LIST_HEAD(&bitmap->list);
608c2ecf20Sopenharmony_ci}
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci/* Compare two btree extents. */
638c2ecf20Sopenharmony_cistatic int
648c2ecf20Sopenharmony_cixbitmap_range_cmp(
658c2ecf20Sopenharmony_ci	void			*priv,
668c2ecf20Sopenharmony_ci	const struct list_head	*a,
678c2ecf20Sopenharmony_ci	const struct list_head	*b)
688c2ecf20Sopenharmony_ci{
698c2ecf20Sopenharmony_ci	struct xbitmap_range	*ap;
708c2ecf20Sopenharmony_ci	struct xbitmap_range	*bp;
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	ap = container_of(a, struct xbitmap_range, list);
738c2ecf20Sopenharmony_ci	bp = container_of(b, struct xbitmap_range, list);
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	if (ap->start > bp->start)
768c2ecf20Sopenharmony_ci		return 1;
778c2ecf20Sopenharmony_ci	if (ap->start < bp->start)
788c2ecf20Sopenharmony_ci		return -1;
798c2ecf20Sopenharmony_ci	return 0;
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci/*
838c2ecf20Sopenharmony_ci * Remove all the blocks mentioned in @sub from the extents in @bitmap.
848c2ecf20Sopenharmony_ci *
858c2ecf20Sopenharmony_ci * The intent is that callers will iterate the rmapbt for all of its records
868c2ecf20Sopenharmony_ci * for a given owner to generate @bitmap; and iterate all the blocks of the
878c2ecf20Sopenharmony_ci * metadata structures that are not being rebuilt and have the same rmapbt
888c2ecf20Sopenharmony_ci * owner to generate @sub.  This routine subtracts all the extents
898c2ecf20Sopenharmony_ci * mentioned in sub from all the extents linked in @bitmap, which leaves
908c2ecf20Sopenharmony_ci * @bitmap as the list of blocks that are not accounted for, which we assume
918c2ecf20Sopenharmony_ci * are the dead blocks of the old metadata structure.  The blocks mentioned in
928c2ecf20Sopenharmony_ci * @bitmap can be reaped.
938c2ecf20Sopenharmony_ci *
948c2ecf20Sopenharmony_ci * This is the logical equivalent of bitmap &= ~sub.
958c2ecf20Sopenharmony_ci */
968c2ecf20Sopenharmony_ci#define LEFT_ALIGNED	(1 << 0)
978c2ecf20Sopenharmony_ci#define RIGHT_ALIGNED	(1 << 1)
988c2ecf20Sopenharmony_ciint
998c2ecf20Sopenharmony_cixbitmap_disunion(
1008c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap,
1018c2ecf20Sopenharmony_ci	struct xbitmap		*sub)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	struct list_head	*lp;
1048c2ecf20Sopenharmony_ci	struct xbitmap_range	*br;
1058c2ecf20Sopenharmony_ci	struct xbitmap_range	*new_br;
1068c2ecf20Sopenharmony_ci	struct xbitmap_range	*sub_br;
1078c2ecf20Sopenharmony_ci	uint64_t		sub_start;
1088c2ecf20Sopenharmony_ci	uint64_t		sub_len;
1098c2ecf20Sopenharmony_ci	int			state;
1108c2ecf20Sopenharmony_ci	int			error = 0;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	if (list_empty(&bitmap->list) || list_empty(&sub->list))
1138c2ecf20Sopenharmony_ci		return 0;
1148c2ecf20Sopenharmony_ci	ASSERT(!list_empty(&sub->list));
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	list_sort(NULL, &bitmap->list, xbitmap_range_cmp);
1178c2ecf20Sopenharmony_ci	list_sort(NULL, &sub->list, xbitmap_range_cmp);
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	/*
1208c2ecf20Sopenharmony_ci	 * Now that we've sorted both lists, we iterate bitmap once, rolling
1218c2ecf20Sopenharmony_ci	 * forward through sub and/or bitmap as necessary until we find an
1228c2ecf20Sopenharmony_ci	 * overlap or reach the end of either list.  We do not reset lp to the
1238c2ecf20Sopenharmony_ci	 * head of bitmap nor do we reset sub_br to the head of sub.  The
1248c2ecf20Sopenharmony_ci	 * list traversal is similar to merge sort, but we're deleting
1258c2ecf20Sopenharmony_ci	 * instead.  In this manner we avoid O(n^2) operations.
1268c2ecf20Sopenharmony_ci	 */
1278c2ecf20Sopenharmony_ci	sub_br = list_first_entry(&sub->list, struct xbitmap_range,
1288c2ecf20Sopenharmony_ci			list);
1298c2ecf20Sopenharmony_ci	lp = bitmap->list.next;
1308c2ecf20Sopenharmony_ci	while (lp != &bitmap->list) {
1318c2ecf20Sopenharmony_ci		br = list_entry(lp, struct xbitmap_range, list);
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci		/*
1348c2ecf20Sopenharmony_ci		 * Advance sub_br and/or br until we find a pair that
1358c2ecf20Sopenharmony_ci		 * intersect or we run out of extents.
1368c2ecf20Sopenharmony_ci		 */
1378c2ecf20Sopenharmony_ci		while (sub_br->start + sub_br->len <= br->start) {
1388c2ecf20Sopenharmony_ci			if (list_is_last(&sub_br->list, &sub->list))
1398c2ecf20Sopenharmony_ci				goto out;
1408c2ecf20Sopenharmony_ci			sub_br = list_next_entry(sub_br, list);
1418c2ecf20Sopenharmony_ci		}
1428c2ecf20Sopenharmony_ci		if (sub_br->start >= br->start + br->len) {
1438c2ecf20Sopenharmony_ci			lp = lp->next;
1448c2ecf20Sopenharmony_ci			continue;
1458c2ecf20Sopenharmony_ci		}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci		/* trim sub_br to fit the extent we have */
1488c2ecf20Sopenharmony_ci		sub_start = sub_br->start;
1498c2ecf20Sopenharmony_ci		sub_len = sub_br->len;
1508c2ecf20Sopenharmony_ci		if (sub_br->start < br->start) {
1518c2ecf20Sopenharmony_ci			sub_len -= br->start - sub_br->start;
1528c2ecf20Sopenharmony_ci			sub_start = br->start;
1538c2ecf20Sopenharmony_ci		}
1548c2ecf20Sopenharmony_ci		if (sub_len > br->len)
1558c2ecf20Sopenharmony_ci			sub_len = br->len;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci		state = 0;
1588c2ecf20Sopenharmony_ci		if (sub_start == br->start)
1598c2ecf20Sopenharmony_ci			state |= LEFT_ALIGNED;
1608c2ecf20Sopenharmony_ci		if (sub_start + sub_len == br->start + br->len)
1618c2ecf20Sopenharmony_ci			state |= RIGHT_ALIGNED;
1628c2ecf20Sopenharmony_ci		switch (state) {
1638c2ecf20Sopenharmony_ci		case LEFT_ALIGNED:
1648c2ecf20Sopenharmony_ci			/* Coincides with only the left. */
1658c2ecf20Sopenharmony_ci			br->start += sub_len;
1668c2ecf20Sopenharmony_ci			br->len -= sub_len;
1678c2ecf20Sopenharmony_ci			break;
1688c2ecf20Sopenharmony_ci		case RIGHT_ALIGNED:
1698c2ecf20Sopenharmony_ci			/* Coincides with only the right. */
1708c2ecf20Sopenharmony_ci			br->len -= sub_len;
1718c2ecf20Sopenharmony_ci			lp = lp->next;
1728c2ecf20Sopenharmony_ci			break;
1738c2ecf20Sopenharmony_ci		case LEFT_ALIGNED | RIGHT_ALIGNED:
1748c2ecf20Sopenharmony_ci			/* Total overlap, just delete ex. */
1758c2ecf20Sopenharmony_ci			lp = lp->next;
1768c2ecf20Sopenharmony_ci			list_del(&br->list);
1778c2ecf20Sopenharmony_ci			kmem_free(br);
1788c2ecf20Sopenharmony_ci			break;
1798c2ecf20Sopenharmony_ci		case 0:
1808c2ecf20Sopenharmony_ci			/*
1818c2ecf20Sopenharmony_ci			 * Deleting from the middle: add the new right extent
1828c2ecf20Sopenharmony_ci			 * and then shrink the left extent.
1838c2ecf20Sopenharmony_ci			 */
1848c2ecf20Sopenharmony_ci			new_br = kmem_alloc(sizeof(struct xbitmap_range),
1858c2ecf20Sopenharmony_ci					KM_MAYFAIL);
1868c2ecf20Sopenharmony_ci			if (!new_br) {
1878c2ecf20Sopenharmony_ci				error = -ENOMEM;
1888c2ecf20Sopenharmony_ci				goto out;
1898c2ecf20Sopenharmony_ci			}
1908c2ecf20Sopenharmony_ci			INIT_LIST_HEAD(&new_br->list);
1918c2ecf20Sopenharmony_ci			new_br->start = sub_start + sub_len;
1928c2ecf20Sopenharmony_ci			new_br->len = br->start + br->len - new_br->start;
1938c2ecf20Sopenharmony_ci			list_add(&new_br->list, &br->list);
1948c2ecf20Sopenharmony_ci			br->len = sub_start - br->start;
1958c2ecf20Sopenharmony_ci			lp = lp->next;
1968c2ecf20Sopenharmony_ci			break;
1978c2ecf20Sopenharmony_ci		default:
1988c2ecf20Sopenharmony_ci			ASSERT(0);
1998c2ecf20Sopenharmony_ci			break;
2008c2ecf20Sopenharmony_ci		}
2018c2ecf20Sopenharmony_ci	}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ciout:
2048c2ecf20Sopenharmony_ci	return error;
2058c2ecf20Sopenharmony_ci}
2068c2ecf20Sopenharmony_ci#undef LEFT_ALIGNED
2078c2ecf20Sopenharmony_ci#undef RIGHT_ALIGNED
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci/*
2108c2ecf20Sopenharmony_ci * Record all btree blocks seen while iterating all records of a btree.
2118c2ecf20Sopenharmony_ci *
2128c2ecf20Sopenharmony_ci * We know that the btree query_all function starts at the left edge and walks
2138c2ecf20Sopenharmony_ci * towards the right edge of the tree.  Therefore, we know that we can walk up
2148c2ecf20Sopenharmony_ci * the btree cursor towards the root; if the pointer for a given level points
2158c2ecf20Sopenharmony_ci * to the first record/key in that block, we haven't seen this block before;
2168c2ecf20Sopenharmony_ci * and therefore we need to remember that we saw this block in the btree.
2178c2ecf20Sopenharmony_ci *
2188c2ecf20Sopenharmony_ci * So if our btree is:
2198c2ecf20Sopenharmony_ci *
2208c2ecf20Sopenharmony_ci *    4
2218c2ecf20Sopenharmony_ci *  / | \
2228c2ecf20Sopenharmony_ci * 1  2  3
2238c2ecf20Sopenharmony_ci *
2248c2ecf20Sopenharmony_ci * Pretend for this example that each leaf block has 100 btree records.  For
2258c2ecf20Sopenharmony_ci * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record
2268c2ecf20Sopenharmony_ci * that we saw block 1.  Then we observe that bc_ptrs[1] == 1, so we record
2278c2ecf20Sopenharmony_ci * block 4.  The list is [1, 4].
2288c2ecf20Sopenharmony_ci *
2298c2ecf20Sopenharmony_ci * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the
2308c2ecf20Sopenharmony_ci * loop.  The list remains [1, 4].
2318c2ecf20Sopenharmony_ci *
2328c2ecf20Sopenharmony_ci * For the 101st btree record, we've moved onto leaf block 2.  Now
2338c2ecf20Sopenharmony_ci * bc_ptrs[0] == 1 again, so we record that we saw block 2.  We see that
2348c2ecf20Sopenharmony_ci * bc_ptrs[1] == 2, so we exit the loop.  The list is now [1, 4, 2].
2358c2ecf20Sopenharmony_ci *
2368c2ecf20Sopenharmony_ci * For the 102nd record, bc_ptrs[0] == 2, so we continue.
2378c2ecf20Sopenharmony_ci *
2388c2ecf20Sopenharmony_ci * For the 201st record, we've moved on to leaf block 3.  bc_ptrs[0] == 1, so
2398c2ecf20Sopenharmony_ci * we add 3 to the list.  Now it is [1, 4, 2, 3].
2408c2ecf20Sopenharmony_ci *
2418c2ecf20Sopenharmony_ci * For the 300th record we just exit, with the list being [1, 4, 2, 3].
2428c2ecf20Sopenharmony_ci */
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_ci/*
2458c2ecf20Sopenharmony_ci * Record all the buffers pointed to by the btree cursor.  Callers already
2468c2ecf20Sopenharmony_ci * engaged in a btree walk should call this function to capture the list of
2478c2ecf20Sopenharmony_ci * blocks going from the leaf towards the root.
2488c2ecf20Sopenharmony_ci */
2498c2ecf20Sopenharmony_ciint
2508c2ecf20Sopenharmony_cixbitmap_set_btcur_path(
2518c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap,
2528c2ecf20Sopenharmony_ci	struct xfs_btree_cur	*cur)
2538c2ecf20Sopenharmony_ci{
2548c2ecf20Sopenharmony_ci	struct xfs_buf		*bp;
2558c2ecf20Sopenharmony_ci	xfs_fsblock_t		fsb;
2568c2ecf20Sopenharmony_ci	int			i;
2578c2ecf20Sopenharmony_ci	int			error;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci	for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) {
2608c2ecf20Sopenharmony_ci		xfs_btree_get_block(cur, i, &bp);
2618c2ecf20Sopenharmony_ci		if (!bp)
2628c2ecf20Sopenharmony_ci			continue;
2638c2ecf20Sopenharmony_ci		fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
2648c2ecf20Sopenharmony_ci		error = xbitmap_set(bitmap, fsb, 1);
2658c2ecf20Sopenharmony_ci		if (error)
2668c2ecf20Sopenharmony_ci			return error;
2678c2ecf20Sopenharmony_ci	}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci	return 0;
2708c2ecf20Sopenharmony_ci}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci/* Collect a btree's block in the bitmap. */
2738c2ecf20Sopenharmony_ciSTATIC int
2748c2ecf20Sopenharmony_cixbitmap_collect_btblock(
2758c2ecf20Sopenharmony_ci	struct xfs_btree_cur	*cur,
2768c2ecf20Sopenharmony_ci	int			level,
2778c2ecf20Sopenharmony_ci	void			*priv)
2788c2ecf20Sopenharmony_ci{
2798c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap = priv;
2808c2ecf20Sopenharmony_ci	struct xfs_buf		*bp;
2818c2ecf20Sopenharmony_ci	xfs_fsblock_t		fsbno;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	xfs_btree_get_block(cur, level, &bp);
2848c2ecf20Sopenharmony_ci	if (!bp)
2858c2ecf20Sopenharmony_ci		return 0;
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn);
2888c2ecf20Sopenharmony_ci	return xbitmap_set(bitmap, fsbno, 1);
2898c2ecf20Sopenharmony_ci}
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci/* Walk the btree and mark the bitmap wherever a btree block is found. */
2928c2ecf20Sopenharmony_ciint
2938c2ecf20Sopenharmony_cixbitmap_set_btblocks(
2948c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap,
2958c2ecf20Sopenharmony_ci	struct xfs_btree_cur	*cur)
2968c2ecf20Sopenharmony_ci{
2978c2ecf20Sopenharmony_ci	return xfs_btree_visit_blocks(cur, xbitmap_collect_btblock,
2988c2ecf20Sopenharmony_ci			XFS_BTREE_VISIT_ALL, bitmap);
2998c2ecf20Sopenharmony_ci}
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci/* How many bits are set in this bitmap? */
3028c2ecf20Sopenharmony_ciuint64_t
3038c2ecf20Sopenharmony_cixbitmap_hweight(
3048c2ecf20Sopenharmony_ci	struct xbitmap		*bitmap)
3058c2ecf20Sopenharmony_ci{
3068c2ecf20Sopenharmony_ci	struct xbitmap_range	*bmr;
3078c2ecf20Sopenharmony_ci	struct xbitmap_range	*n;
3088c2ecf20Sopenharmony_ci	uint64_t		ret = 0;
3098c2ecf20Sopenharmony_ci
3108c2ecf20Sopenharmony_ci	for_each_xbitmap_extent(bmr, n, bitmap)
3118c2ecf20Sopenharmony_ci		ret += bmr->len;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	return ret;
3148c2ecf20Sopenharmony_ci}
315