18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (c) 2014 Red Hat, Inc. 48c2ecf20Sopenharmony_ci * All Rights Reserved. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci#include "xfs.h" 78c2ecf20Sopenharmony_ci#include "xfs_fs.h" 88c2ecf20Sopenharmony_ci#include "xfs_shared.h" 98c2ecf20Sopenharmony_ci#include "xfs_format.h" 108c2ecf20Sopenharmony_ci#include "xfs_log_format.h" 118c2ecf20Sopenharmony_ci#include "xfs_trans_resv.h" 128c2ecf20Sopenharmony_ci#include "xfs_sb.h" 138c2ecf20Sopenharmony_ci#include "xfs_mount.h" 148c2ecf20Sopenharmony_ci#include "xfs_trans.h" 158c2ecf20Sopenharmony_ci#include "xfs_alloc.h" 168c2ecf20Sopenharmony_ci#include "xfs_btree.h" 178c2ecf20Sopenharmony_ci#include "xfs_btree_staging.h" 188c2ecf20Sopenharmony_ci#include "xfs_rmap.h" 198c2ecf20Sopenharmony_ci#include "xfs_rmap_btree.h" 208c2ecf20Sopenharmony_ci#include "xfs_trace.h" 218c2ecf20Sopenharmony_ci#include "xfs_error.h" 228c2ecf20Sopenharmony_ci#include "xfs_extent_busy.h" 238c2ecf20Sopenharmony_ci#include "xfs_ag_resv.h" 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci/* 268c2ecf20Sopenharmony_ci * Reverse map btree. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * This is a per-ag tree used to track the owner(s) of a given extent. With 298c2ecf20Sopenharmony_ci * reflink it is possible for there to be multiple owners, which is a departure 308c2ecf20Sopenharmony_ci * from classic XFS. Owner records for data extents are inserted when the 318c2ecf20Sopenharmony_ci * extent is mapped and removed when an extent is unmapped. Owner records for 328c2ecf20Sopenharmony_ci * all other block types (i.e. metadata) are inserted when an extent is 338c2ecf20Sopenharmony_ci * allocated and removed when an extent is freed. There can only be one owner 348c2ecf20Sopenharmony_ci * of a metadata extent, usually an inode or some other metadata structure like 358c2ecf20Sopenharmony_ci * an AG btree. 368c2ecf20Sopenharmony_ci * 378c2ecf20Sopenharmony_ci * The rmap btree is part of the free space management, so blocks for the tree 388c2ecf20Sopenharmony_ci * are sourced from the agfl. Hence we need transaction reservation support for 398c2ecf20Sopenharmony_ci * this tree so that the freelist is always large enough. This also impacts on 408c2ecf20Sopenharmony_ci * the minimum space we need to leave free in the AG. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * The tree is ordered by [ag block, owner, offset]. This is a large key size, 438c2ecf20Sopenharmony_ci * but it is the only way to enforce unique keys when a block can be owned by 448c2ecf20Sopenharmony_ci * multiple files at any offset. There's no need to order/search by extent 458c2ecf20Sopenharmony_ci * size for online updating/management of the tree. It is intended that most 468c2ecf20Sopenharmony_ci * reverse lookups will be to find the owner(s) of a particular block, or to 478c2ecf20Sopenharmony_ci * try to recover tree and file data from corrupt primary metadata. 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_cistatic struct xfs_btree_cur * 518c2ecf20Sopenharmony_cixfs_rmapbt_dup_cursor( 528c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur) 538c2ecf20Sopenharmony_ci{ 548c2ecf20Sopenharmony_ci return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp, 558c2ecf20Sopenharmony_ci cur->bc_ag.agbp, cur->bc_ag.agno); 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ciSTATIC void 598c2ecf20Sopenharmony_cixfs_rmapbt_set_root( 608c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 618c2ecf20Sopenharmony_ci union xfs_btree_ptr *ptr, 628c2ecf20Sopenharmony_ci int inc) 638c2ecf20Sopenharmony_ci{ 648c2ecf20Sopenharmony_ci struct xfs_buf *agbp = cur->bc_ag.agbp; 658c2ecf20Sopenharmony_ci struct xfs_agf *agf = agbp->b_addr; 668c2ecf20Sopenharmony_ci int btnum = cur->bc_btnum; 678c2ecf20Sopenharmony_ci struct xfs_perag *pag = agbp->b_pag; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_ci ASSERT(ptr->s != 0); 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci agf->agf_roots[btnum] = ptr->s; 728c2ecf20Sopenharmony_ci be32_add_cpu(&agf->agf_levels[btnum], inc); 738c2ecf20Sopenharmony_ci pag->pagf_levels[btnum] += inc; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); 768c2ecf20Sopenharmony_ci} 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ciSTATIC int 798c2ecf20Sopenharmony_cixfs_rmapbt_alloc_block( 808c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 818c2ecf20Sopenharmony_ci union xfs_btree_ptr *start, 828c2ecf20Sopenharmony_ci union xfs_btree_ptr *new, 838c2ecf20Sopenharmony_ci int *stat) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci struct xfs_buf *agbp = cur->bc_ag.agbp; 868c2ecf20Sopenharmony_ci struct xfs_agf *agf = agbp->b_addr; 878c2ecf20Sopenharmony_ci int error; 888c2ecf20Sopenharmony_ci xfs_agblock_t bno; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci /* Allocate the new block from the freelist. If we can't, give up. */ 918c2ecf20Sopenharmony_ci error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, 928c2ecf20Sopenharmony_ci &bno, 1); 938c2ecf20Sopenharmony_ci if (error) 948c2ecf20Sopenharmony_ci return error; 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci trace_xfs_rmapbt_alloc_block(cur->bc_mp, cur->bc_ag.agno, 978c2ecf20Sopenharmony_ci bno, 1); 988c2ecf20Sopenharmony_ci if (bno == NULLAGBLOCK) { 998c2ecf20Sopenharmony_ci *stat = 0; 1008c2ecf20Sopenharmony_ci return 0; 1018c2ecf20Sopenharmony_ci } 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, 1048c2ecf20Sopenharmony_ci false); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci xfs_trans_agbtree_delta(cur->bc_tp, 1); 1078c2ecf20Sopenharmony_ci new->s = cpu_to_be32(bno); 1088c2ecf20Sopenharmony_ci be32_add_cpu(&agf->agf_rmap_blocks, 1); 1098c2ecf20Sopenharmony_ci xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci xfs_ag_resv_rmapbt_alloc(cur->bc_mp, cur->bc_ag.agno); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci *stat = 1; 1148c2ecf20Sopenharmony_ci return 0; 1158c2ecf20Sopenharmony_ci} 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ciSTATIC int 1188c2ecf20Sopenharmony_cixfs_rmapbt_free_block( 1198c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 1208c2ecf20Sopenharmony_ci struct xfs_buf *bp) 1218c2ecf20Sopenharmony_ci{ 1228c2ecf20Sopenharmony_ci struct xfs_buf *agbp = cur->bc_ag.agbp; 1238c2ecf20Sopenharmony_ci struct xfs_agf *agf = agbp->b_addr; 1248c2ecf20Sopenharmony_ci struct xfs_perag *pag; 1258c2ecf20Sopenharmony_ci xfs_agblock_t bno; 1268c2ecf20Sopenharmony_ci int error; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); 1298c2ecf20Sopenharmony_ci trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_ag.agno, 1308c2ecf20Sopenharmony_ci bno, 1); 1318c2ecf20Sopenharmony_ci be32_add_cpu(&agf->agf_rmap_blocks, -1); 1328c2ecf20Sopenharmony_ci xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); 1338c2ecf20Sopenharmony_ci error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); 1348c2ecf20Sopenharmony_ci if (error) 1358c2ecf20Sopenharmony_ci return error; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1, 1388c2ecf20Sopenharmony_ci XFS_EXTENT_BUSY_SKIP_DISCARD); 1398c2ecf20Sopenharmony_ci xfs_trans_agbtree_delta(cur->bc_tp, -1); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci pag = cur->bc_ag.agbp->b_pag; 1428c2ecf20Sopenharmony_ci xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); 1438c2ecf20Sopenharmony_ci return 0; 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ciSTATIC int 1478c2ecf20Sopenharmony_cixfs_rmapbt_get_minrecs( 1488c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 1498c2ecf20Sopenharmony_ci int level) 1508c2ecf20Sopenharmony_ci{ 1518c2ecf20Sopenharmony_ci return cur->bc_mp->m_rmap_mnr[level != 0]; 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ciSTATIC int 1558c2ecf20Sopenharmony_cixfs_rmapbt_get_maxrecs( 1568c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 1578c2ecf20Sopenharmony_ci int level) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci return cur->bc_mp->m_rmap_mxr[level != 0]; 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ciSTATIC void 1638c2ecf20Sopenharmony_cixfs_rmapbt_init_key_from_rec( 1648c2ecf20Sopenharmony_ci union xfs_btree_key *key, 1658c2ecf20Sopenharmony_ci union xfs_btree_rec *rec) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci key->rmap.rm_startblock = rec->rmap.rm_startblock; 1688c2ecf20Sopenharmony_ci key->rmap.rm_owner = rec->rmap.rm_owner; 1698c2ecf20Sopenharmony_ci key->rmap.rm_offset = rec->rmap.rm_offset; 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci/* 1738c2ecf20Sopenharmony_ci * The high key for a reverse mapping record can be computed by shifting 1748c2ecf20Sopenharmony_ci * the startblock and offset to the highest value that would still map 1758c2ecf20Sopenharmony_ci * to that record. In practice this means that we add blockcount-1 to 1768c2ecf20Sopenharmony_ci * the startblock for all records, and if the record is for a data/attr 1778c2ecf20Sopenharmony_ci * fork mapping, we add blockcount-1 to the offset too. 1788c2ecf20Sopenharmony_ci */ 1798c2ecf20Sopenharmony_ciSTATIC void 1808c2ecf20Sopenharmony_cixfs_rmapbt_init_high_key_from_rec( 1818c2ecf20Sopenharmony_ci union xfs_btree_key *key, 1828c2ecf20Sopenharmony_ci union xfs_btree_rec *rec) 1838c2ecf20Sopenharmony_ci{ 1848c2ecf20Sopenharmony_ci uint64_t off; 1858c2ecf20Sopenharmony_ci int adj; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1; 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci key->rmap.rm_startblock = rec->rmap.rm_startblock; 1908c2ecf20Sopenharmony_ci be32_add_cpu(&key->rmap.rm_startblock, adj); 1918c2ecf20Sopenharmony_ci key->rmap.rm_owner = rec->rmap.rm_owner; 1928c2ecf20Sopenharmony_ci key->rmap.rm_offset = rec->rmap.rm_offset; 1938c2ecf20Sopenharmony_ci if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) || 1948c2ecf20Sopenharmony_ci XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset))) 1958c2ecf20Sopenharmony_ci return; 1968c2ecf20Sopenharmony_ci off = be64_to_cpu(key->rmap.rm_offset); 1978c2ecf20Sopenharmony_ci off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK); 1988c2ecf20Sopenharmony_ci key->rmap.rm_offset = cpu_to_be64(off); 1998c2ecf20Sopenharmony_ci} 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ciSTATIC void 2028c2ecf20Sopenharmony_cixfs_rmapbt_init_rec_from_cur( 2038c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 2048c2ecf20Sopenharmony_ci union xfs_btree_rec *rec) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock); 2078c2ecf20Sopenharmony_ci rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount); 2088c2ecf20Sopenharmony_ci rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner); 2098c2ecf20Sopenharmony_ci rec->rmap.rm_offset = cpu_to_be64( 2108c2ecf20Sopenharmony_ci xfs_rmap_irec_offset_pack(&cur->bc_rec.r)); 2118c2ecf20Sopenharmony_ci} 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ciSTATIC void 2148c2ecf20Sopenharmony_cixfs_rmapbt_init_ptr_from_cur( 2158c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 2168c2ecf20Sopenharmony_ci union xfs_btree_ptr *ptr) 2178c2ecf20Sopenharmony_ci{ 2188c2ecf20Sopenharmony_ci struct xfs_agf *agf = cur->bc_ag.agbp->b_addr; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci ASSERT(cur->bc_ag.agno == be32_to_cpu(agf->agf_seqno)); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci ptr->s = agf->agf_roots[cur->bc_btnum]; 2238c2ecf20Sopenharmony_ci} 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ciSTATIC int64_t 2268c2ecf20Sopenharmony_cixfs_rmapbt_key_diff( 2278c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 2288c2ecf20Sopenharmony_ci union xfs_btree_key *key) 2298c2ecf20Sopenharmony_ci{ 2308c2ecf20Sopenharmony_ci struct xfs_rmap_irec *rec = &cur->bc_rec.r; 2318c2ecf20Sopenharmony_ci struct xfs_rmap_key *kp = &key->rmap; 2328c2ecf20Sopenharmony_ci __u64 x, y; 2338c2ecf20Sopenharmony_ci int64_t d; 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ci d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock; 2368c2ecf20Sopenharmony_ci if (d) 2378c2ecf20Sopenharmony_ci return d; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci x = be64_to_cpu(kp->rm_owner); 2408c2ecf20Sopenharmony_ci y = rec->rm_owner; 2418c2ecf20Sopenharmony_ci if (x > y) 2428c2ecf20Sopenharmony_ci return 1; 2438c2ecf20Sopenharmony_ci else if (y > x) 2448c2ecf20Sopenharmony_ci return -1; 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci x = XFS_RMAP_OFF(be64_to_cpu(kp->rm_offset)); 2478c2ecf20Sopenharmony_ci y = rec->rm_offset; 2488c2ecf20Sopenharmony_ci if (x > y) 2498c2ecf20Sopenharmony_ci return 1; 2508c2ecf20Sopenharmony_ci else if (y > x) 2518c2ecf20Sopenharmony_ci return -1; 2528c2ecf20Sopenharmony_ci return 0; 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ciSTATIC int64_t 2568c2ecf20Sopenharmony_cixfs_rmapbt_diff_two_keys( 2578c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 2588c2ecf20Sopenharmony_ci union xfs_btree_key *k1, 2598c2ecf20Sopenharmony_ci union xfs_btree_key *k2) 2608c2ecf20Sopenharmony_ci{ 2618c2ecf20Sopenharmony_ci struct xfs_rmap_key *kp1 = &k1->rmap; 2628c2ecf20Sopenharmony_ci struct xfs_rmap_key *kp2 = &k2->rmap; 2638c2ecf20Sopenharmony_ci int64_t d; 2648c2ecf20Sopenharmony_ci __u64 x, y; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci d = (int64_t)be32_to_cpu(kp1->rm_startblock) - 2678c2ecf20Sopenharmony_ci be32_to_cpu(kp2->rm_startblock); 2688c2ecf20Sopenharmony_ci if (d) 2698c2ecf20Sopenharmony_ci return d; 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci x = be64_to_cpu(kp1->rm_owner); 2728c2ecf20Sopenharmony_ci y = be64_to_cpu(kp2->rm_owner); 2738c2ecf20Sopenharmony_ci if (x > y) 2748c2ecf20Sopenharmony_ci return 1; 2758c2ecf20Sopenharmony_ci else if (y > x) 2768c2ecf20Sopenharmony_ci return -1; 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci x = XFS_RMAP_OFF(be64_to_cpu(kp1->rm_offset)); 2798c2ecf20Sopenharmony_ci y = XFS_RMAP_OFF(be64_to_cpu(kp2->rm_offset)); 2808c2ecf20Sopenharmony_ci if (x > y) 2818c2ecf20Sopenharmony_ci return 1; 2828c2ecf20Sopenharmony_ci else if (y > x) 2838c2ecf20Sopenharmony_ci return -1; 2848c2ecf20Sopenharmony_ci return 0; 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_cistatic xfs_failaddr_t 2888c2ecf20Sopenharmony_cixfs_rmapbt_verify( 2898c2ecf20Sopenharmony_ci struct xfs_buf *bp) 2908c2ecf20Sopenharmony_ci{ 2918c2ecf20Sopenharmony_ci struct xfs_mount *mp = bp->b_mount; 2928c2ecf20Sopenharmony_ci struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 2938c2ecf20Sopenharmony_ci struct xfs_perag *pag = bp->b_pag; 2948c2ecf20Sopenharmony_ci xfs_failaddr_t fa; 2958c2ecf20Sopenharmony_ci unsigned int level; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci /* 2988c2ecf20Sopenharmony_ci * magic number and level verification 2998c2ecf20Sopenharmony_ci * 3008c2ecf20Sopenharmony_ci * During growfs operations, we can't verify the exact level or owner as 3018c2ecf20Sopenharmony_ci * the perag is not fully initialised and hence not attached to the 3028c2ecf20Sopenharmony_ci * buffer. In this case, check against the maximum tree depth. 3038c2ecf20Sopenharmony_ci * 3048c2ecf20Sopenharmony_ci * Similarly, during log recovery we will have a perag structure 3058c2ecf20Sopenharmony_ci * attached, but the agf information will not yet have been initialised 3068c2ecf20Sopenharmony_ci * from the on disk AGF. Again, we can only check against maximum limits 3078c2ecf20Sopenharmony_ci * in this case. 3088c2ecf20Sopenharmony_ci */ 3098c2ecf20Sopenharmony_ci if (!xfs_verify_magic(bp, block->bb_magic)) 3108c2ecf20Sopenharmony_ci return __this_address; 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 3138c2ecf20Sopenharmony_ci return __this_address; 3148c2ecf20Sopenharmony_ci fa = xfs_btree_sblock_v5hdr_verify(bp); 3158c2ecf20Sopenharmony_ci if (fa) 3168c2ecf20Sopenharmony_ci return fa; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci level = be16_to_cpu(block->bb_level); 3198c2ecf20Sopenharmony_ci if (pag && pag->pagf_init) { 3208c2ecf20Sopenharmony_ci if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) 3218c2ecf20Sopenharmony_ci return __this_address; 3228c2ecf20Sopenharmony_ci } else if (level >= mp->m_rmap_maxlevels) 3238c2ecf20Sopenharmony_ci return __this_address; 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_cistatic void 3298c2ecf20Sopenharmony_cixfs_rmapbt_read_verify( 3308c2ecf20Sopenharmony_ci struct xfs_buf *bp) 3318c2ecf20Sopenharmony_ci{ 3328c2ecf20Sopenharmony_ci xfs_failaddr_t fa; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci if (!xfs_btree_sblock_verify_crc(bp)) 3358c2ecf20Sopenharmony_ci xfs_verifier_error(bp, -EFSBADCRC, __this_address); 3368c2ecf20Sopenharmony_ci else { 3378c2ecf20Sopenharmony_ci fa = xfs_rmapbt_verify(bp); 3388c2ecf20Sopenharmony_ci if (fa) 3398c2ecf20Sopenharmony_ci xfs_verifier_error(bp, -EFSCORRUPTED, fa); 3408c2ecf20Sopenharmony_ci } 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci if (bp->b_error) 3438c2ecf20Sopenharmony_ci trace_xfs_btree_corrupt(bp, _RET_IP_); 3448c2ecf20Sopenharmony_ci} 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_cistatic void 3478c2ecf20Sopenharmony_cixfs_rmapbt_write_verify( 3488c2ecf20Sopenharmony_ci struct xfs_buf *bp) 3498c2ecf20Sopenharmony_ci{ 3508c2ecf20Sopenharmony_ci xfs_failaddr_t fa; 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci fa = xfs_rmapbt_verify(bp); 3538c2ecf20Sopenharmony_ci if (fa) { 3548c2ecf20Sopenharmony_ci trace_xfs_btree_corrupt(bp, _RET_IP_); 3558c2ecf20Sopenharmony_ci xfs_verifier_error(bp, -EFSCORRUPTED, fa); 3568c2ecf20Sopenharmony_ci return; 3578c2ecf20Sopenharmony_ci } 3588c2ecf20Sopenharmony_ci xfs_btree_sblock_calc_crc(bp); 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci} 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ciconst struct xfs_buf_ops xfs_rmapbt_buf_ops = { 3638c2ecf20Sopenharmony_ci .name = "xfs_rmapbt", 3648c2ecf20Sopenharmony_ci .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) }, 3658c2ecf20Sopenharmony_ci .verify_read = xfs_rmapbt_read_verify, 3668c2ecf20Sopenharmony_ci .verify_write = xfs_rmapbt_write_verify, 3678c2ecf20Sopenharmony_ci .verify_struct = xfs_rmapbt_verify, 3688c2ecf20Sopenharmony_ci}; 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ciSTATIC int 3718c2ecf20Sopenharmony_cixfs_rmapbt_keys_inorder( 3728c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 3738c2ecf20Sopenharmony_ci union xfs_btree_key *k1, 3748c2ecf20Sopenharmony_ci union xfs_btree_key *k2) 3758c2ecf20Sopenharmony_ci{ 3768c2ecf20Sopenharmony_ci uint32_t x; 3778c2ecf20Sopenharmony_ci uint32_t y; 3788c2ecf20Sopenharmony_ci uint64_t a; 3798c2ecf20Sopenharmony_ci uint64_t b; 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci x = be32_to_cpu(k1->rmap.rm_startblock); 3828c2ecf20Sopenharmony_ci y = be32_to_cpu(k2->rmap.rm_startblock); 3838c2ecf20Sopenharmony_ci if (x < y) 3848c2ecf20Sopenharmony_ci return 1; 3858c2ecf20Sopenharmony_ci else if (x > y) 3868c2ecf20Sopenharmony_ci return 0; 3878c2ecf20Sopenharmony_ci a = be64_to_cpu(k1->rmap.rm_owner); 3888c2ecf20Sopenharmony_ci b = be64_to_cpu(k2->rmap.rm_owner); 3898c2ecf20Sopenharmony_ci if (a < b) 3908c2ecf20Sopenharmony_ci return 1; 3918c2ecf20Sopenharmony_ci else if (a > b) 3928c2ecf20Sopenharmony_ci return 0; 3938c2ecf20Sopenharmony_ci a = XFS_RMAP_OFF(be64_to_cpu(k1->rmap.rm_offset)); 3948c2ecf20Sopenharmony_ci b = XFS_RMAP_OFF(be64_to_cpu(k2->rmap.rm_offset)); 3958c2ecf20Sopenharmony_ci if (a <= b) 3968c2ecf20Sopenharmony_ci return 1; 3978c2ecf20Sopenharmony_ci return 0; 3988c2ecf20Sopenharmony_ci} 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ciSTATIC int 4018c2ecf20Sopenharmony_cixfs_rmapbt_recs_inorder( 4028c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 4038c2ecf20Sopenharmony_ci union xfs_btree_rec *r1, 4048c2ecf20Sopenharmony_ci union xfs_btree_rec *r2) 4058c2ecf20Sopenharmony_ci{ 4068c2ecf20Sopenharmony_ci uint32_t x; 4078c2ecf20Sopenharmony_ci uint32_t y; 4088c2ecf20Sopenharmony_ci uint64_t a; 4098c2ecf20Sopenharmony_ci uint64_t b; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci x = be32_to_cpu(r1->rmap.rm_startblock); 4128c2ecf20Sopenharmony_ci y = be32_to_cpu(r2->rmap.rm_startblock); 4138c2ecf20Sopenharmony_ci if (x < y) 4148c2ecf20Sopenharmony_ci return 1; 4158c2ecf20Sopenharmony_ci else if (x > y) 4168c2ecf20Sopenharmony_ci return 0; 4178c2ecf20Sopenharmony_ci a = be64_to_cpu(r1->rmap.rm_owner); 4188c2ecf20Sopenharmony_ci b = be64_to_cpu(r2->rmap.rm_owner); 4198c2ecf20Sopenharmony_ci if (a < b) 4208c2ecf20Sopenharmony_ci return 1; 4218c2ecf20Sopenharmony_ci else if (a > b) 4228c2ecf20Sopenharmony_ci return 0; 4238c2ecf20Sopenharmony_ci a = XFS_RMAP_OFF(be64_to_cpu(r1->rmap.rm_offset)); 4248c2ecf20Sopenharmony_ci b = XFS_RMAP_OFF(be64_to_cpu(r2->rmap.rm_offset)); 4258c2ecf20Sopenharmony_ci if (a <= b) 4268c2ecf20Sopenharmony_ci return 1; 4278c2ecf20Sopenharmony_ci return 0; 4288c2ecf20Sopenharmony_ci} 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_cistatic const struct xfs_btree_ops xfs_rmapbt_ops = { 4318c2ecf20Sopenharmony_ci .rec_len = sizeof(struct xfs_rmap_rec), 4328c2ecf20Sopenharmony_ci .key_len = 2 * sizeof(struct xfs_rmap_key), 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci .dup_cursor = xfs_rmapbt_dup_cursor, 4358c2ecf20Sopenharmony_ci .set_root = xfs_rmapbt_set_root, 4368c2ecf20Sopenharmony_ci .alloc_block = xfs_rmapbt_alloc_block, 4378c2ecf20Sopenharmony_ci .free_block = xfs_rmapbt_free_block, 4388c2ecf20Sopenharmony_ci .get_minrecs = xfs_rmapbt_get_minrecs, 4398c2ecf20Sopenharmony_ci .get_maxrecs = xfs_rmapbt_get_maxrecs, 4408c2ecf20Sopenharmony_ci .init_key_from_rec = xfs_rmapbt_init_key_from_rec, 4418c2ecf20Sopenharmony_ci .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, 4428c2ecf20Sopenharmony_ci .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, 4438c2ecf20Sopenharmony_ci .init_ptr_from_cur = xfs_rmapbt_init_ptr_from_cur, 4448c2ecf20Sopenharmony_ci .key_diff = xfs_rmapbt_key_diff, 4458c2ecf20Sopenharmony_ci .buf_ops = &xfs_rmapbt_buf_ops, 4468c2ecf20Sopenharmony_ci .diff_two_keys = xfs_rmapbt_diff_two_keys, 4478c2ecf20Sopenharmony_ci .keys_inorder = xfs_rmapbt_keys_inorder, 4488c2ecf20Sopenharmony_ci .recs_inorder = xfs_rmapbt_recs_inorder, 4498c2ecf20Sopenharmony_ci}; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_cistatic struct xfs_btree_cur * 4528c2ecf20Sopenharmony_cixfs_rmapbt_init_common( 4538c2ecf20Sopenharmony_ci struct xfs_mount *mp, 4548c2ecf20Sopenharmony_ci struct xfs_trans *tp, 4558c2ecf20Sopenharmony_ci xfs_agnumber_t agno) 4568c2ecf20Sopenharmony_ci{ 4578c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur; 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci cur = kmem_cache_zalloc(xfs_btree_cur_zone, GFP_NOFS | __GFP_NOFAIL); 4608c2ecf20Sopenharmony_ci cur->bc_tp = tp; 4618c2ecf20Sopenharmony_ci cur->bc_mp = mp; 4628c2ecf20Sopenharmony_ci /* Overlapping btree; 2 keys per pointer. */ 4638c2ecf20Sopenharmony_ci cur->bc_btnum = XFS_BTNUM_RMAP; 4648c2ecf20Sopenharmony_ci cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; 4658c2ecf20Sopenharmony_ci cur->bc_blocklog = mp->m_sb.sb_blocklog; 4668c2ecf20Sopenharmony_ci cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); 4678c2ecf20Sopenharmony_ci cur->bc_ag.agno = agno; 4688c2ecf20Sopenharmony_ci cur->bc_ops = &xfs_rmapbt_ops; 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci return cur; 4718c2ecf20Sopenharmony_ci} 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci/* Create a new reverse mapping btree cursor. */ 4748c2ecf20Sopenharmony_cistruct xfs_btree_cur * 4758c2ecf20Sopenharmony_cixfs_rmapbt_init_cursor( 4768c2ecf20Sopenharmony_ci struct xfs_mount *mp, 4778c2ecf20Sopenharmony_ci struct xfs_trans *tp, 4788c2ecf20Sopenharmony_ci struct xfs_buf *agbp, 4798c2ecf20Sopenharmony_ci xfs_agnumber_t agno) 4808c2ecf20Sopenharmony_ci{ 4818c2ecf20Sopenharmony_ci struct xfs_agf *agf = agbp->b_addr; 4828c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci cur = xfs_rmapbt_init_common(mp, tp, agno); 4858c2ecf20Sopenharmony_ci cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); 4868c2ecf20Sopenharmony_ci cur->bc_ag.agbp = agbp; 4878c2ecf20Sopenharmony_ci return cur; 4888c2ecf20Sopenharmony_ci} 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci/* Create a new reverse mapping btree cursor with a fake root for staging. */ 4918c2ecf20Sopenharmony_cistruct xfs_btree_cur * 4928c2ecf20Sopenharmony_cixfs_rmapbt_stage_cursor( 4938c2ecf20Sopenharmony_ci struct xfs_mount *mp, 4948c2ecf20Sopenharmony_ci struct xbtree_afakeroot *afake, 4958c2ecf20Sopenharmony_ci xfs_agnumber_t agno) 4968c2ecf20Sopenharmony_ci{ 4978c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur; 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci cur = xfs_rmapbt_init_common(mp, NULL, agno); 5008c2ecf20Sopenharmony_ci xfs_btree_stage_afakeroot(cur, afake); 5018c2ecf20Sopenharmony_ci return cur; 5028c2ecf20Sopenharmony_ci} 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci/* 5058c2ecf20Sopenharmony_ci * Install a new reverse mapping btree root. Caller is responsible for 5068c2ecf20Sopenharmony_ci * invalidating and freeing the old btree blocks. 5078c2ecf20Sopenharmony_ci */ 5088c2ecf20Sopenharmony_civoid 5098c2ecf20Sopenharmony_cixfs_rmapbt_commit_staged_btree( 5108c2ecf20Sopenharmony_ci struct xfs_btree_cur *cur, 5118c2ecf20Sopenharmony_ci struct xfs_trans *tp, 5128c2ecf20Sopenharmony_ci struct xfs_buf *agbp) 5138c2ecf20Sopenharmony_ci{ 5148c2ecf20Sopenharmony_ci struct xfs_agf *agf = agbp->b_addr; 5158c2ecf20Sopenharmony_ci struct xbtree_afakeroot *afake = cur->bc_ag.afake; 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci ASSERT(cur->bc_flags & XFS_BTREE_STAGING); 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); 5208c2ecf20Sopenharmony_ci agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); 5218c2ecf20Sopenharmony_ci agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks); 5228c2ecf20Sopenharmony_ci xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS | 5238c2ecf20Sopenharmony_ci XFS_AGF_RMAP_BLOCKS); 5248c2ecf20Sopenharmony_ci xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); 5258c2ecf20Sopenharmony_ci} 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci/* 5288c2ecf20Sopenharmony_ci * Calculate number of records in an rmap btree block. 5298c2ecf20Sopenharmony_ci */ 5308c2ecf20Sopenharmony_ciint 5318c2ecf20Sopenharmony_cixfs_rmapbt_maxrecs( 5328c2ecf20Sopenharmony_ci int blocklen, 5338c2ecf20Sopenharmony_ci int leaf) 5348c2ecf20Sopenharmony_ci{ 5358c2ecf20Sopenharmony_ci blocklen -= XFS_RMAP_BLOCK_LEN; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci if (leaf) 5388c2ecf20Sopenharmony_ci return blocklen / sizeof(struct xfs_rmap_rec); 5398c2ecf20Sopenharmony_ci return blocklen / 5408c2ecf20Sopenharmony_ci (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rmap_ptr_t)); 5418c2ecf20Sopenharmony_ci} 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci/* Compute the maximum height of an rmap btree. */ 5448c2ecf20Sopenharmony_civoid 5458c2ecf20Sopenharmony_cixfs_rmapbt_compute_maxlevels( 5468c2ecf20Sopenharmony_ci struct xfs_mount *mp) 5478c2ecf20Sopenharmony_ci{ 5488c2ecf20Sopenharmony_ci /* 5498c2ecf20Sopenharmony_ci * On a non-reflink filesystem, the maximum number of rmap 5508c2ecf20Sopenharmony_ci * records is the number of blocks in the AG, hence the max 5518c2ecf20Sopenharmony_ci * rmapbt height is log_$maxrecs($agblocks). However, with 5528c2ecf20Sopenharmony_ci * reflink each AG block can have up to 2^32 (per the refcount 5538c2ecf20Sopenharmony_ci * record format) owners, which means that theoretically we 5548c2ecf20Sopenharmony_ci * could face up to 2^64 rmap records. 5558c2ecf20Sopenharmony_ci * 5568c2ecf20Sopenharmony_ci * That effectively means that the max rmapbt height must be 5578c2ecf20Sopenharmony_ci * XFS_BTREE_MAXLEVELS. "Fortunately" we'll run out of AG 5588c2ecf20Sopenharmony_ci * blocks to feed the rmapbt long before the rmapbt reaches 5598c2ecf20Sopenharmony_ci * maximum height. The reflink code uses ag_resv_critical to 5608c2ecf20Sopenharmony_ci * disallow reflinking when less than 10% of the per-AG metadata 5618c2ecf20Sopenharmony_ci * block reservation since the fallback is a regular file copy. 5628c2ecf20Sopenharmony_ci */ 5638c2ecf20Sopenharmony_ci if (xfs_sb_version_hasreflink(&mp->m_sb)) 5648c2ecf20Sopenharmony_ci mp->m_rmap_maxlevels = XFS_BTREE_MAXLEVELS; 5658c2ecf20Sopenharmony_ci else 5668c2ecf20Sopenharmony_ci mp->m_rmap_maxlevels = xfs_btree_compute_maxlevels( 5678c2ecf20Sopenharmony_ci mp->m_rmap_mnr, mp->m_sb.sb_agblocks); 5688c2ecf20Sopenharmony_ci} 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_ci/* Calculate the refcount btree size for some records. */ 5718c2ecf20Sopenharmony_cixfs_extlen_t 5728c2ecf20Sopenharmony_cixfs_rmapbt_calc_size( 5738c2ecf20Sopenharmony_ci struct xfs_mount *mp, 5748c2ecf20Sopenharmony_ci unsigned long long len) 5758c2ecf20Sopenharmony_ci{ 5768c2ecf20Sopenharmony_ci return xfs_btree_calc_size(mp->m_rmap_mnr, len); 5778c2ecf20Sopenharmony_ci} 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci/* 5808c2ecf20Sopenharmony_ci * Calculate the maximum refcount btree size. 5818c2ecf20Sopenharmony_ci */ 5828c2ecf20Sopenharmony_cixfs_extlen_t 5838c2ecf20Sopenharmony_cixfs_rmapbt_max_size( 5848c2ecf20Sopenharmony_ci struct xfs_mount *mp, 5858c2ecf20Sopenharmony_ci xfs_agblock_t agblocks) 5868c2ecf20Sopenharmony_ci{ 5878c2ecf20Sopenharmony_ci /* Bail out if we're uninitialized, which can happen in mkfs. */ 5888c2ecf20Sopenharmony_ci if (mp->m_rmap_mxr[0] == 0) 5898c2ecf20Sopenharmony_ci return 0; 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci return xfs_rmapbt_calc_size(mp, agblocks); 5928c2ecf20Sopenharmony_ci} 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci/* 5958c2ecf20Sopenharmony_ci * Figure out how many blocks to reserve and how many are used by this btree. 5968c2ecf20Sopenharmony_ci */ 5978c2ecf20Sopenharmony_ciint 5988c2ecf20Sopenharmony_cixfs_rmapbt_calc_reserves( 5998c2ecf20Sopenharmony_ci struct xfs_mount *mp, 6008c2ecf20Sopenharmony_ci struct xfs_trans *tp, 6018c2ecf20Sopenharmony_ci xfs_agnumber_t agno, 6028c2ecf20Sopenharmony_ci xfs_extlen_t *ask, 6038c2ecf20Sopenharmony_ci xfs_extlen_t *used) 6048c2ecf20Sopenharmony_ci{ 6058c2ecf20Sopenharmony_ci struct xfs_buf *agbp; 6068c2ecf20Sopenharmony_ci struct xfs_agf *agf; 6078c2ecf20Sopenharmony_ci xfs_agblock_t agblocks; 6088c2ecf20Sopenharmony_ci xfs_extlen_t tree_len; 6098c2ecf20Sopenharmony_ci int error; 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) 6128c2ecf20Sopenharmony_ci return 0; 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); 6158c2ecf20Sopenharmony_ci if (error) 6168c2ecf20Sopenharmony_ci return error; 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci agf = agbp->b_addr; 6198c2ecf20Sopenharmony_ci agblocks = be32_to_cpu(agf->agf_length); 6208c2ecf20Sopenharmony_ci tree_len = be32_to_cpu(agf->agf_rmap_blocks); 6218c2ecf20Sopenharmony_ci xfs_trans_brelse(tp, agbp); 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci /* 6248c2ecf20Sopenharmony_ci * The log is permanently allocated, so the space it occupies will 6258c2ecf20Sopenharmony_ci * never be available for the kinds of things that would require btree 6268c2ecf20Sopenharmony_ci * expansion. We therefore can pretend the space isn't there. 6278c2ecf20Sopenharmony_ci */ 6288c2ecf20Sopenharmony_ci if (mp->m_sb.sb_logstart && 6298c2ecf20Sopenharmony_ci XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) 6308c2ecf20Sopenharmony_ci agblocks -= mp->m_sb.sb_logblocks; 6318c2ecf20Sopenharmony_ci 6328c2ecf20Sopenharmony_ci /* Reserve 1% of the AG or enough for 1 block per record. */ 6338c2ecf20Sopenharmony_ci *ask += max(agblocks / 100, xfs_rmapbt_max_size(mp, agblocks)); 6348c2ecf20Sopenharmony_ci *used += tree_len; 6358c2ecf20Sopenharmony_ci 6368c2ecf20Sopenharmony_ci return error; 6378c2ecf20Sopenharmony_ci} 638