18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0+ 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2018 Oracle. All Rights Reserved. 48c2ecf20Sopenharmony_ci * Author: Darrick J. Wong <darrick.wong@oracle.com> 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci#include "xfs.h" 78c2ecf20Sopenharmony_ci#include "xfs_fs.h" 88c2ecf20Sopenharmony_ci#include "xfs_shared.h" 98c2ecf20Sopenharmony_ci#include "xfs_format.h" 108c2ecf20Sopenharmony_ci#include "xfs_trans_resv.h" 118c2ecf20Sopenharmony_ci#include "xfs_mount.h" 128c2ecf20Sopenharmony_ci#include "xfs_btree.h" 138c2ecf20Sopenharmony_ci#include "scrub/bitmap.h" 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci/* 168c2ecf20Sopenharmony_ci * Set a range of this bitmap. Caller must ensure the range is not set. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * This is the logical equivalent of bitmap |= mask(start, len). 198c2ecf20Sopenharmony_ci */ 208c2ecf20Sopenharmony_ciint 218c2ecf20Sopenharmony_cixbitmap_set( 228c2ecf20Sopenharmony_ci struct xbitmap *bitmap, 238c2ecf20Sopenharmony_ci uint64_t start, 248c2ecf20Sopenharmony_ci uint64_t len) 258c2ecf20Sopenharmony_ci{ 268c2ecf20Sopenharmony_ci struct xbitmap_range *bmr; 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL); 298c2ecf20Sopenharmony_ci if (!bmr) 308c2ecf20Sopenharmony_ci return -ENOMEM; 318c2ecf20Sopenharmony_ci 328c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&bmr->list); 338c2ecf20Sopenharmony_ci bmr->start = start; 348c2ecf20Sopenharmony_ci bmr->len = len; 358c2ecf20Sopenharmony_ci list_add_tail(&bmr->list, &bitmap->list); 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci return 0; 388c2ecf20Sopenharmony_ci} 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci/* Free everything related to this bitmap. */ 418c2ecf20Sopenharmony_civoid 428c2ecf20Sopenharmony_cixbitmap_destroy( 438c2ecf20Sopenharmony_ci struct xbitmap *bitmap) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci struct xbitmap_range *bmr; 468c2ecf20Sopenharmony_ci struct xbitmap_range *n; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci for_each_xbitmap_extent(bmr, n, bitmap) { 498c2ecf20Sopenharmony_ci list_del(&bmr->list); 508c2ecf20Sopenharmony_ci kmem_free(bmr); 518c2ecf20Sopenharmony_ci } 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* Set up a per-AG block bitmap. */ 558c2ecf20Sopenharmony_civoid 568c2ecf20Sopenharmony_cixbitmap_init( 578c2ecf20Sopenharmony_ci struct xbitmap *bitmap) 588c2ecf20Sopenharmony_ci{ 598c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&bitmap->list); 608c2ecf20Sopenharmony_ci} 618c2ecf20Sopenharmony_ci 628c2ecf20Sopenharmony_ci/* Compare two btree extents. */ 638c2ecf20Sopenharmony_cistatic int 648c2ecf20Sopenharmony_cixbitmap_range_cmp( 658c2ecf20Sopenharmony_ci void *priv, 668c2ecf20Sopenharmony_ci const struct list_head *a, 678c2ecf20Sopenharmony_ci const struct list_head *b) 688c2ecf20Sopenharmony_ci{ 698c2ecf20Sopenharmony_ci struct xbitmap_range *ap; 708c2ecf20Sopenharmony_ci struct xbitmap_range *bp; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci ap = container_of(a, struct xbitmap_range, list); 738c2ecf20Sopenharmony_ci bp = container_of(b, struct xbitmap_range, list); 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci if (ap->start > bp->start) 768c2ecf20Sopenharmony_ci return 1; 778c2ecf20Sopenharmony_ci if (ap->start < bp->start) 788c2ecf20Sopenharmony_ci return -1; 798c2ecf20Sopenharmony_ci return 0; 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci/* 838c2ecf20Sopenharmony_ci * Remove all the blocks mentioned in @sub from the extents in @bitmap. 848c2ecf20Sopenharmony_ci * 858c2ecf20Sopenharmony_ci * The intent is that callers will iterate the rmapbt for all of its records 868c2ecf20Sopenharmony_ci * for a given owner to generate @bitmap; and iterate all the blocks of the 878c2ecf20Sopenharmony_ci * metadata structures that are not being rebuilt and have the same rmapbt 888c2ecf20Sopenharmony_ci * owner to generate @sub. This routine subtracts all the extents 898c2ecf20Sopenharmony_ci * mentioned in sub from all the extents linked in @bitmap, which leaves 908c2ecf20Sopenharmony_ci * @bitmap as the list of blocks that are not accounted for, which we assume 918c2ecf20Sopenharmony_ci * are the dead blocks of the old metadata structure. The blocks mentioned in 928c2ecf20Sopenharmony_ci * @bitmap can be reaped. 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci * This is the logical equivalent of bitmap &= ~sub. 958c2ecf20Sopenharmony_ci */ 968c2ecf20Sopenharmony_ci#define LEFT_ALIGNED (1 << 0) 978c2ecf20Sopenharmony_ci#define RIGHT_ALIGNED (1 << 1) 988c2ecf20Sopenharmony_ciint 998c2ecf20Sopenharmony_cixbitmap_disunion( 1008c2ecf20Sopenharmony_ci struct xbitmap *bitmap, 1018c2ecf20Sopenharmony_ci struct xbitmap *sub) 1028c2ecf20Sopenharmony_ci{ 1038c2ecf20Sopenharmony_ci struct list_head *lp; 1048c2ecf20Sopenharmony_ci struct xbitmap_range *br; 1058c2ecf20Sopenharmony_ci struct xbitmap_range *new_br; 1068c2ecf20Sopenharmony_ci struct xbitmap_range *sub_br; 1078c2ecf20Sopenharmony_ci uint64_t sub_start; 1088c2ecf20Sopenharmony_ci uint64_t sub_len; 1098c2ecf20Sopenharmony_ci int state; 1108c2ecf20Sopenharmony_ci int error = 0; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci if (list_empty(&bitmap->list) || list_empty(&sub->list)) 1138c2ecf20Sopenharmony_ci return 0; 1148c2ecf20Sopenharmony_ci ASSERT(!list_empty(&sub->list)); 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci list_sort(NULL, &bitmap->list, xbitmap_range_cmp); 1178c2ecf20Sopenharmony_ci list_sort(NULL, &sub->list, xbitmap_range_cmp); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci /* 1208c2ecf20Sopenharmony_ci * Now that we've sorted both lists, we iterate bitmap once, rolling 1218c2ecf20Sopenharmony_ci * forward through sub and/or bitmap as necessary until we find an 1228c2ecf20Sopenharmony_ci * overlap or reach the end of either list. We do not reset lp to the 1238c2ecf20Sopenharmony_ci * head of bitmap nor do we reset sub_br to the head of sub. The 1248c2ecf20Sopenharmony_ci * list traversal is similar to merge sort, but we're deleting 1258c2ecf20Sopenharmony_ci * instead. In this manner we avoid O(n^2) operations. 1268c2ecf20Sopenharmony_ci */ 1278c2ecf20Sopenharmony_ci sub_br = list_first_entry(&sub->list, struct xbitmap_range, 1288c2ecf20Sopenharmony_ci list); 1298c2ecf20Sopenharmony_ci lp = bitmap->list.next; 1308c2ecf20Sopenharmony_ci while (lp != &bitmap->list) { 1318c2ecf20Sopenharmony_ci br = list_entry(lp, struct xbitmap_range, list); 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_ci /* 1348c2ecf20Sopenharmony_ci * Advance sub_br and/or br until we find a pair that 1358c2ecf20Sopenharmony_ci * intersect or we run out of extents. 1368c2ecf20Sopenharmony_ci */ 1378c2ecf20Sopenharmony_ci while (sub_br->start + sub_br->len <= br->start) { 1388c2ecf20Sopenharmony_ci if (list_is_last(&sub_br->list, &sub->list)) 1398c2ecf20Sopenharmony_ci goto out; 1408c2ecf20Sopenharmony_ci sub_br = list_next_entry(sub_br, list); 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci if (sub_br->start >= br->start + br->len) { 1438c2ecf20Sopenharmony_ci lp = lp->next; 1448c2ecf20Sopenharmony_ci continue; 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* trim sub_br to fit the extent we have */ 1488c2ecf20Sopenharmony_ci sub_start = sub_br->start; 1498c2ecf20Sopenharmony_ci sub_len = sub_br->len; 1508c2ecf20Sopenharmony_ci if (sub_br->start < br->start) { 1518c2ecf20Sopenharmony_ci sub_len -= br->start - sub_br->start; 1528c2ecf20Sopenharmony_ci sub_start = br->start; 1538c2ecf20Sopenharmony_ci } 1548c2ecf20Sopenharmony_ci if (sub_len > br->len) 1558c2ecf20Sopenharmony_ci sub_len = br->len; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci state = 0; 1588c2ecf20Sopenharmony_ci if (sub_start == br->start) 1598c2ecf20Sopenharmony_ci state |= LEFT_ALIGNED; 1608c2ecf20Sopenharmony_ci if (sub_start + sub_len == br->start + br->len) 1618c2ecf20Sopenharmony_ci state |= RIGHT_ALIGNED; 1628c2ecf20Sopenharmony_ci switch (state) { 1638c2ecf20Sopenharmony_ci case LEFT_ALIGNED: 1648c2ecf20Sopenharmony_ci /* Coincides with only the left. */ 1658c2ecf20Sopenharmony_ci br->start += sub_len; 1668c2ecf20Sopenharmony_ci br->len -= sub_len; 1678c2ecf20Sopenharmony_ci break; 1688c2ecf20Sopenharmony_ci case RIGHT_ALIGNED: 1698c2ecf20Sopenharmony_ci /* Coincides with only the right. */ 1708c2ecf20Sopenharmony_ci br->len -= sub_len; 1718c2ecf20Sopenharmony_ci lp = lp->next; 1728c2ecf20Sopenharmony_ci break; 1738c2ecf20Sopenharmony_ci case LEFT_ALIGNED | RIGHT_ALIGNED: 1748c2ecf20Sopenharmony_ci /* Total overlap, just delete ex. */ 1758c2ecf20Sopenharmony_ci lp = lp->next; 1768c2ecf20Sopenharmony_ci list_del(&br->list); 1778c2ecf20Sopenharmony_ci kmem_free(br); 1788c2ecf20Sopenharmony_ci break; 1798c2ecf20Sopenharmony_ci case 0: 1808c2ecf20Sopenharmony_ci /* 1818c2ecf20Sopenharmony_ci * Deleting from the middle: add the new right extent 1828c2ecf20Sopenharmony_ci * and then shrink the left extent. 1838c2ecf20Sopenharmony_ci */ 1848c2ecf20Sopenharmony_ci new_br = kmem_alloc(sizeof(struct xbitmap_range), 1858c2ecf20Sopenharmony_ci KM_MAYFAIL); 1868c2ecf20Sopenharmony_ci if (!new_br) { 1878c2ecf20Sopenharmony_ci error = -ENOMEM; 1888c2ecf20Sopenharmony_ci goto out; 1898c2ecf20Sopenharmony_ci } 1908c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&new_br->list); 1918c2ecf20Sopenharmony_ci new_br->start = sub_start + sub_len; 1928c2ecf20Sopenharmony_ci new_br->len = br->start + br->len - new_br->start; 1938c2ecf20Sopenharmony_ci list_add(&new_br->list, &br->list); 1948c2ecf20Sopenharmony_ci br->len = sub_start - br->start; 1958c2ecf20Sopenharmony_ci lp = lp->next; 1968c2ecf20Sopenharmony_ci break; 1978c2ecf20Sopenharmony_ci default: 1988c2ecf20Sopenharmony_ci ASSERT(0); 1998c2ecf20Sopenharmony_ci break; 2008c2ecf20Sopenharmony_ci } 2018c2ecf20Sopenharmony_ci } 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ciout: 2048c2ecf20Sopenharmony_ci return error; 2058c2ecf20Sopenharmony_ci} 2068c2ecf20Sopenharmony_ci#undef LEFT_ALIGNED 2078c2ecf20Sopenharmony_ci#undef RIGHT_ALIGNED 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci/* 2108c2ecf20Sopenharmony_ci * Record all btree blocks seen while iterating all records of a btree. 2118c2ecf20Sopenharmony_ci * 2128c2ecf20Sopenharmony_ci * We know that the btree query_all function starts at the left edge and walks 2138c2ecf20Sopenharmony_ci * towards the right edge of the tree. Therefore, we know that we can walk up 2148c2ecf20Sopenharmony_ci * the btree cursor towards the root; if the pointer for a given level points 2158c2ecf20Sopenharmony_ci * to the first record/key in that block, we haven't seen this block before; 2168c2ecf20Sopenharmony_ci * and therefore we need to remember that we saw this block in the btree. 2178c2ecf20Sopenharmony_ci * 2188c2ecf20Sopenharmony_ci * So if our btree is: 2198c2ecf20Sopenharmony_ci * 2208c2ecf20Sopenharmony_ci * 4 2218c2ecf20Sopenharmony_ci * / | \ 2228c2ecf20Sopenharmony_ci * 1 2 3 2238c2ecf20Sopenharmony_ci * 2248c2ecf20Sopenharmony_ci * Pretend for this example that each leaf block has 100 btree records. For 2258c2ecf20Sopenharmony_ci * the first btree record, we'll observe that bc_ptrs[0] == 1, so we record 2268c2ecf20Sopenharmony_ci * that we saw block 1. Then we observe that bc_ptrs[1] == 1, so we record 2278c2ecf20Sopenharmony_ci * block 4. The list is [1, 4]. 2288c2ecf20Sopenharmony_ci * 2298c2ecf20Sopenharmony_ci * For the second btree record, we see that bc_ptrs[0] == 2, so we exit the 2308c2ecf20Sopenharmony_ci * loop. The list remains [1, 4]. 2318c2ecf20Sopenharmony_ci * 2328c2ecf20Sopenharmony_ci * For the 101st btree record, we've moved onto leaf block 2. Now 2338c2ecf20Sopenharmony_ci * bc_ptrs[0] == 1 again, so we record that we saw block 2. We see that 2348c2ecf20Sopenharmony_ci * bc_ptrs[1] == 2, so we exit the loop. The list is now [1, 4, 2]. 2358c2ecf20Sopenharmony_ci * 2368c2ecf20Sopenharmony_ci * For the 102nd record, bc_ptrs[0] == 2, so we continue. 2378c2ecf20Sopenharmony_ci * 2388c2ecf20Sopenharmony_ci * For the 201st record, we've moved on to leaf block 3. bc_ptrs[0] == 1, so 2398c2ecf20Sopenharmony_ci * we add 3 to the list. Now it is [1, 4, 2, 3]. 2408c2ecf20Sopenharmony_ci * 2418c2ecf20Sopenharmony_ci * For the 300th record we just exit, with the list being [1, 4, 2, 3]. 2428c2ecf20Sopenharmony_ci */ 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci/* 2458c2ecf20Sopenharmony_ci * Record all the buffers pointed to by the btree cursor. Callers already 2468c2ecf20Sopenharmony_ci * engaged in a btree walk should call this function to capture the list of 2478c2ecf20Sopenharmony_ci * blocks going from the leaf towards the root. 2488c2ecf20Sopenharmony_ci */ 2498c2ecf20Sopenharmony_ciint 2508c2ecf20Sopenharmony_cixbitmap_set_btcur_path( 2518c2ecf20Sopenharmony_ci struct xbitmap *bitmap, 2528c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur) 2538c2ecf20Sopenharmony_ci{ 2548c2ecf20Sopenharmony_ci struct xfs_buf *bp; 2558c2ecf20Sopenharmony_ci xfs_fsblock_t fsb; 2568c2ecf20Sopenharmony_ci int i; 2578c2ecf20Sopenharmony_ci int error; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ci for (i = 0; i < cur->bc_nlevels && cur->bc_ptrs[i] == 1; i++) { 2608c2ecf20Sopenharmony_ci xfs_btree_get_block(cur, i, &bp); 2618c2ecf20Sopenharmony_ci if (!bp) 2628c2ecf20Sopenharmony_ci continue; 2638c2ecf20Sopenharmony_ci fsb = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); 2648c2ecf20Sopenharmony_ci error = xbitmap_set(bitmap, fsb, 1); 2658c2ecf20Sopenharmony_ci if (error) 2668c2ecf20Sopenharmony_ci return error; 2678c2ecf20Sopenharmony_ci } 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci return 0; 2708c2ecf20Sopenharmony_ci} 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci/* Collect a btree's block in the bitmap. */ 2738c2ecf20Sopenharmony_ciSTATIC int 2748c2ecf20Sopenharmony_cixbitmap_collect_btblock( 2758c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 2768c2ecf20Sopenharmony_ci int level, 2778c2ecf20Sopenharmony_ci void *priv) 2788c2ecf20Sopenharmony_ci{ 2798c2ecf20Sopenharmony_ci struct xbitmap *bitmap = priv; 2808c2ecf20Sopenharmony_ci struct xfs_buf *bp; 2818c2ecf20Sopenharmony_ci xfs_fsblock_t fsbno; 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci xfs_btree_get_block(cur, level, &bp); 2848c2ecf20Sopenharmony_ci if (!bp) 2858c2ecf20Sopenharmony_ci return 0; 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, bp->b_bn); 2888c2ecf20Sopenharmony_ci return xbitmap_set(bitmap, fsbno, 1); 2898c2ecf20Sopenharmony_ci} 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci/* Walk the btree and mark the bitmap wherever a btree block is found. */ 2928c2ecf20Sopenharmony_ciint 2938c2ecf20Sopenharmony_cixbitmap_set_btblocks( 2948c2ecf20Sopenharmony_ci struct xbitmap *bitmap, 2958c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur) 2968c2ecf20Sopenharmony_ci{ 2978c2ecf20Sopenharmony_ci return xfs_btree_visit_blocks(cur, xbitmap_collect_btblock, 2988c2ecf20Sopenharmony_ci XFS_BTREE_VISIT_ALL, bitmap); 2998c2ecf20Sopenharmony_ci} 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci/* How many bits are set in this bitmap? */ 3028c2ecf20Sopenharmony_ciuint64_t 3038c2ecf20Sopenharmony_cixbitmap_hweight( 3048c2ecf20Sopenharmony_ci struct xbitmap *bitmap) 3058c2ecf20Sopenharmony_ci{ 3068c2ecf20Sopenharmony_ci struct xbitmap_range *bmr; 3078c2ecf20Sopenharmony_ci struct xbitmap_range *n; 3088c2ecf20Sopenharmony_ci uint64_t ret = 0; 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci for_each_xbitmap_extent(bmr, n, bitmap) 3118c2ecf20Sopenharmony_ci ret += bmr->len; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci return ret; 3148c2ecf20Sopenharmony_ci} 315