xref: /kernel/linux/linux-6.6/fs/xfs/libxfs/xfs_btree.c (revision 62306a36)
162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
462306a36Sopenharmony_ci * All Rights Reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#include "xfs.h"
762306a36Sopenharmony_ci#include "xfs_fs.h"
862306a36Sopenharmony_ci#include "xfs_shared.h"
962306a36Sopenharmony_ci#include "xfs_format.h"
1062306a36Sopenharmony_ci#include "xfs_log_format.h"
1162306a36Sopenharmony_ci#include "xfs_trans_resv.h"
1262306a36Sopenharmony_ci#include "xfs_bit.h"
1362306a36Sopenharmony_ci#include "xfs_mount.h"
1462306a36Sopenharmony_ci#include "xfs_inode.h"
1562306a36Sopenharmony_ci#include "xfs_trans.h"
1662306a36Sopenharmony_ci#include "xfs_buf_item.h"
1762306a36Sopenharmony_ci#include "xfs_btree.h"
1862306a36Sopenharmony_ci#include "xfs_errortag.h"
1962306a36Sopenharmony_ci#include "xfs_error.h"
2062306a36Sopenharmony_ci#include "xfs_trace.h"
2162306a36Sopenharmony_ci#include "xfs_alloc.h"
2262306a36Sopenharmony_ci#include "xfs_log.h"
2362306a36Sopenharmony_ci#include "xfs_btree_staging.h"
2462306a36Sopenharmony_ci#include "xfs_ag.h"
2562306a36Sopenharmony_ci#include "xfs_alloc_btree.h"
2662306a36Sopenharmony_ci#include "xfs_ialloc_btree.h"
2762306a36Sopenharmony_ci#include "xfs_bmap_btree.h"
2862306a36Sopenharmony_ci#include "xfs_rmap_btree.h"
2962306a36Sopenharmony_ci#include "xfs_refcount_btree.h"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci/*
3262306a36Sopenharmony_ci * Btree magic numbers.
3362306a36Sopenharmony_ci */
3462306a36Sopenharmony_cistatic const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
3562306a36Sopenharmony_ci	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
3662306a36Sopenharmony_ci	  XFS_FIBT_MAGIC, 0 },
3762306a36Sopenharmony_ci	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC,
3862306a36Sopenharmony_ci	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC,
3962306a36Sopenharmony_ci	  XFS_REFC_CRC_MAGIC }
4062306a36Sopenharmony_ci};
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ciuint32_t
4362306a36Sopenharmony_cixfs_btree_magic(
4462306a36Sopenharmony_ci	int			crc,
4562306a36Sopenharmony_ci	xfs_btnum_t		btnum)
4662306a36Sopenharmony_ci{
4762306a36Sopenharmony_ci	uint32_t		magic = xfs_magics[crc][btnum];
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	/* Ensure we asked for crc for crc-only magics. */
5062306a36Sopenharmony_ci	ASSERT(magic != 0);
5162306a36Sopenharmony_ci	return magic;
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci/*
5562306a36Sopenharmony_ci * These sibling pointer checks are optimised for null sibling pointers. This
5662306a36Sopenharmony_ci * happens a lot, and we don't need to byte swap at runtime if the sibling
5762306a36Sopenharmony_ci * pointer is NULL.
5862306a36Sopenharmony_ci *
5962306a36Sopenharmony_ci * These are explicitly marked at inline because the cost of calling them as
6062306a36Sopenharmony_ci * functions instead of inlining them is about 36 bytes extra code per call site
6162306a36Sopenharmony_ci * on x86-64. Yes, gcc-11 fails to inline them, and explicit inlining of these
6262306a36Sopenharmony_ci * two sibling check functions reduces the compiled code size by over 300
6362306a36Sopenharmony_ci * bytes.
6462306a36Sopenharmony_ci */
6562306a36Sopenharmony_cistatic inline xfs_failaddr_t
6662306a36Sopenharmony_cixfs_btree_check_lblock_siblings(
6762306a36Sopenharmony_ci	struct xfs_mount	*mp,
6862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
6962306a36Sopenharmony_ci	int			level,
7062306a36Sopenharmony_ci	xfs_fsblock_t		fsb,
7162306a36Sopenharmony_ci	__be64			dsibling)
7262306a36Sopenharmony_ci{
7362306a36Sopenharmony_ci	xfs_fsblock_t		sibling;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	if (dsibling == cpu_to_be64(NULLFSBLOCK))
7662306a36Sopenharmony_ci		return NULL;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	sibling = be64_to_cpu(dsibling);
7962306a36Sopenharmony_ci	if (sibling == fsb)
8062306a36Sopenharmony_ci		return __this_address;
8162306a36Sopenharmony_ci	if (level >= 0) {
8262306a36Sopenharmony_ci		if (!xfs_btree_check_lptr(cur, sibling, level + 1))
8362306a36Sopenharmony_ci			return __this_address;
8462306a36Sopenharmony_ci	} else {
8562306a36Sopenharmony_ci		if (!xfs_verify_fsbno(mp, sibling))
8662306a36Sopenharmony_ci			return __this_address;
8762306a36Sopenharmony_ci	}
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	return NULL;
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_cistatic inline xfs_failaddr_t
9362306a36Sopenharmony_cixfs_btree_check_sblock_siblings(
9462306a36Sopenharmony_ci	struct xfs_perag	*pag,
9562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
9662306a36Sopenharmony_ci	int			level,
9762306a36Sopenharmony_ci	xfs_agblock_t		agbno,
9862306a36Sopenharmony_ci	__be32			dsibling)
9962306a36Sopenharmony_ci{
10062306a36Sopenharmony_ci	xfs_agblock_t		sibling;
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	if (dsibling == cpu_to_be32(NULLAGBLOCK))
10362306a36Sopenharmony_ci		return NULL;
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	sibling = be32_to_cpu(dsibling);
10662306a36Sopenharmony_ci	if (sibling == agbno)
10762306a36Sopenharmony_ci		return __this_address;
10862306a36Sopenharmony_ci	if (level >= 0) {
10962306a36Sopenharmony_ci		if (!xfs_btree_check_sptr(cur, sibling, level + 1))
11062306a36Sopenharmony_ci			return __this_address;
11162306a36Sopenharmony_ci	} else {
11262306a36Sopenharmony_ci		if (!xfs_verify_agbno(pag, sibling))
11362306a36Sopenharmony_ci			return __this_address;
11462306a36Sopenharmony_ci	}
11562306a36Sopenharmony_ci	return NULL;
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci/*
11962306a36Sopenharmony_ci * Check a long btree block header.  Return the address of the failing check,
12062306a36Sopenharmony_ci * or NULL if everything is ok.
12162306a36Sopenharmony_ci */
12262306a36Sopenharmony_cixfs_failaddr_t
12362306a36Sopenharmony_ci__xfs_btree_check_lblock(
12462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
12562306a36Sopenharmony_ci	struct xfs_btree_block	*block,
12662306a36Sopenharmony_ci	int			level,
12762306a36Sopenharmony_ci	struct xfs_buf		*bp)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	struct xfs_mount	*mp = cur->bc_mp;
13062306a36Sopenharmony_ci	xfs_btnum_t		btnum = cur->bc_btnum;
13162306a36Sopenharmony_ci	int			crc = xfs_has_crc(mp);
13262306a36Sopenharmony_ci	xfs_failaddr_t		fa;
13362306a36Sopenharmony_ci	xfs_fsblock_t		fsb = NULLFSBLOCK;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	if (crc) {
13662306a36Sopenharmony_ci		if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
13762306a36Sopenharmony_ci			return __this_address;
13862306a36Sopenharmony_ci		if (block->bb_u.l.bb_blkno !=
13962306a36Sopenharmony_ci		    cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
14062306a36Sopenharmony_ci			return __this_address;
14162306a36Sopenharmony_ci		if (block->bb_u.l.bb_pad != cpu_to_be32(0))
14262306a36Sopenharmony_ci			return __this_address;
14362306a36Sopenharmony_ci	}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
14662306a36Sopenharmony_ci		return __this_address;
14762306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_level) != level)
14862306a36Sopenharmony_ci		return __this_address;
14962306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_numrecs) >
15062306a36Sopenharmony_ci	    cur->bc_ops->get_maxrecs(cur, level))
15162306a36Sopenharmony_ci		return __this_address;
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	if (bp)
15462306a36Sopenharmony_ci		fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
15762306a36Sopenharmony_ci			block->bb_u.l.bb_leftsib);
15862306a36Sopenharmony_ci	if (!fa)
15962306a36Sopenharmony_ci		fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb,
16062306a36Sopenharmony_ci				block->bb_u.l.bb_rightsib);
16162306a36Sopenharmony_ci	return fa;
16262306a36Sopenharmony_ci}
16362306a36Sopenharmony_ci
16462306a36Sopenharmony_ci/* Check a long btree block header. */
16562306a36Sopenharmony_cistatic int
16662306a36Sopenharmony_cixfs_btree_check_lblock(
16762306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
16862306a36Sopenharmony_ci	struct xfs_btree_block	*block,
16962306a36Sopenharmony_ci	int			level,
17062306a36Sopenharmony_ci	struct xfs_buf		*bp)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	struct xfs_mount	*mp = cur->bc_mp;
17362306a36Sopenharmony_ci	xfs_failaddr_t		fa;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	fa = __xfs_btree_check_lblock(cur, block, level, bp);
17662306a36Sopenharmony_ci	if (XFS_IS_CORRUPT(mp, fa != NULL) ||
17762306a36Sopenharmony_ci	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
17862306a36Sopenharmony_ci		if (bp)
17962306a36Sopenharmony_ci			trace_xfs_btree_corrupt(bp, _RET_IP_);
18062306a36Sopenharmony_ci		return -EFSCORRUPTED;
18162306a36Sopenharmony_ci	}
18262306a36Sopenharmony_ci	return 0;
18362306a36Sopenharmony_ci}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci/*
18662306a36Sopenharmony_ci * Check a short btree block header.  Return the address of the failing check,
18762306a36Sopenharmony_ci * or NULL if everything is ok.
18862306a36Sopenharmony_ci */
18962306a36Sopenharmony_cixfs_failaddr_t
19062306a36Sopenharmony_ci__xfs_btree_check_sblock(
19162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
19262306a36Sopenharmony_ci	struct xfs_btree_block	*block,
19362306a36Sopenharmony_ci	int			level,
19462306a36Sopenharmony_ci	struct xfs_buf		*bp)
19562306a36Sopenharmony_ci{
19662306a36Sopenharmony_ci	struct xfs_mount	*mp = cur->bc_mp;
19762306a36Sopenharmony_ci	struct xfs_perag	*pag = cur->bc_ag.pag;
19862306a36Sopenharmony_ci	xfs_btnum_t		btnum = cur->bc_btnum;
19962306a36Sopenharmony_ci	int			crc = xfs_has_crc(mp);
20062306a36Sopenharmony_ci	xfs_failaddr_t		fa;
20162306a36Sopenharmony_ci	xfs_agblock_t		agbno = NULLAGBLOCK;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	if (crc) {
20462306a36Sopenharmony_ci		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
20562306a36Sopenharmony_ci			return __this_address;
20662306a36Sopenharmony_ci		if (block->bb_u.s.bb_blkno !=
20762306a36Sopenharmony_ci		    cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL))
20862306a36Sopenharmony_ci			return __this_address;
20962306a36Sopenharmony_ci	}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum))
21262306a36Sopenharmony_ci		return __this_address;
21362306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_level) != level)
21462306a36Sopenharmony_ci		return __this_address;
21562306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_numrecs) >
21662306a36Sopenharmony_ci	    cur->bc_ops->get_maxrecs(cur, level))
21762306a36Sopenharmony_ci		return __this_address;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	if (bp)
22062306a36Sopenharmony_ci		agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
22362306a36Sopenharmony_ci			block->bb_u.s.bb_leftsib);
22462306a36Sopenharmony_ci	if (!fa)
22562306a36Sopenharmony_ci		fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno,
22662306a36Sopenharmony_ci				block->bb_u.s.bb_rightsib);
22762306a36Sopenharmony_ci	return fa;
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci/* Check a short btree block header. */
23162306a36Sopenharmony_ciSTATIC int
23262306a36Sopenharmony_cixfs_btree_check_sblock(
23362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
23462306a36Sopenharmony_ci	struct xfs_btree_block	*block,
23562306a36Sopenharmony_ci	int			level,
23662306a36Sopenharmony_ci	struct xfs_buf		*bp)
23762306a36Sopenharmony_ci{
23862306a36Sopenharmony_ci	struct xfs_mount	*mp = cur->bc_mp;
23962306a36Sopenharmony_ci	xfs_failaddr_t		fa;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	fa = __xfs_btree_check_sblock(cur, block, level, bp);
24262306a36Sopenharmony_ci	if (XFS_IS_CORRUPT(mp, fa != NULL) ||
24362306a36Sopenharmony_ci	    XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
24462306a36Sopenharmony_ci		if (bp)
24562306a36Sopenharmony_ci			trace_xfs_btree_corrupt(bp, _RET_IP_);
24662306a36Sopenharmony_ci		return -EFSCORRUPTED;
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci	return 0;
24962306a36Sopenharmony_ci}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci/*
25262306a36Sopenharmony_ci * Debug routine: check that block header is ok.
25362306a36Sopenharmony_ci */
25462306a36Sopenharmony_ciint
25562306a36Sopenharmony_cixfs_btree_check_block(
25662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
25762306a36Sopenharmony_ci	struct xfs_btree_block	*block,	/* generic btree block pointer */
25862306a36Sopenharmony_ci	int			level,	/* level of the btree block */
25962306a36Sopenharmony_ci	struct xfs_buf		*bp)	/* buffer containing block, if any */
26062306a36Sopenharmony_ci{
26162306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
26262306a36Sopenharmony_ci		return xfs_btree_check_lblock(cur, block, level, bp);
26362306a36Sopenharmony_ci	else
26462306a36Sopenharmony_ci		return xfs_btree_check_sblock(cur, block, level, bp);
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci/* Check that this long pointer is valid and points within the fs. */
26862306a36Sopenharmony_cibool
26962306a36Sopenharmony_cixfs_btree_check_lptr(
27062306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
27162306a36Sopenharmony_ci	xfs_fsblock_t		fsbno,
27262306a36Sopenharmony_ci	int			level)
27362306a36Sopenharmony_ci{
27462306a36Sopenharmony_ci	if (level <= 0)
27562306a36Sopenharmony_ci		return false;
27662306a36Sopenharmony_ci	return xfs_verify_fsbno(cur->bc_mp, fsbno);
27762306a36Sopenharmony_ci}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci/* Check that this short pointer is valid and points within the AG. */
28062306a36Sopenharmony_cibool
28162306a36Sopenharmony_cixfs_btree_check_sptr(
28262306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
28362306a36Sopenharmony_ci	xfs_agblock_t		agbno,
28462306a36Sopenharmony_ci	int			level)
28562306a36Sopenharmony_ci{
28662306a36Sopenharmony_ci	if (level <= 0)
28762306a36Sopenharmony_ci		return false;
28862306a36Sopenharmony_ci	return xfs_verify_agbno(cur->bc_ag.pag, agbno);
28962306a36Sopenharmony_ci}
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci/*
29262306a36Sopenharmony_ci * Check that a given (indexed) btree pointer at a certain level of a
29362306a36Sopenharmony_ci * btree is valid and doesn't point past where it should.
29462306a36Sopenharmony_ci */
29562306a36Sopenharmony_cistatic int
29662306a36Sopenharmony_cixfs_btree_check_ptr(
29762306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
29862306a36Sopenharmony_ci	const union xfs_btree_ptr	*ptr,
29962306a36Sopenharmony_ci	int				index,
30062306a36Sopenharmony_ci	int				level)
30162306a36Sopenharmony_ci{
30262306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
30362306a36Sopenharmony_ci		if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]),
30462306a36Sopenharmony_ci				level))
30562306a36Sopenharmony_ci			return 0;
30662306a36Sopenharmony_ci		xfs_err(cur->bc_mp,
30762306a36Sopenharmony_ci"Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.",
30862306a36Sopenharmony_ci				cur->bc_ino.ip->i_ino,
30962306a36Sopenharmony_ci				cur->bc_ino.whichfork, cur->bc_btnum,
31062306a36Sopenharmony_ci				level, index);
31162306a36Sopenharmony_ci	} else {
31262306a36Sopenharmony_ci		if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]),
31362306a36Sopenharmony_ci				level))
31462306a36Sopenharmony_ci			return 0;
31562306a36Sopenharmony_ci		xfs_err(cur->bc_mp,
31662306a36Sopenharmony_ci"AG %u: Corrupt btree %d pointer at level %d index %d.",
31762306a36Sopenharmony_ci				cur->bc_ag.pag->pag_agno, cur->bc_btnum,
31862306a36Sopenharmony_ci				level, index);
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	return -EFSCORRUPTED;
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci#ifdef DEBUG
32562306a36Sopenharmony_ci# define xfs_btree_debug_check_ptr	xfs_btree_check_ptr
32662306a36Sopenharmony_ci#else
32762306a36Sopenharmony_ci# define xfs_btree_debug_check_ptr(...)	(0)
32862306a36Sopenharmony_ci#endif
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci/*
33162306a36Sopenharmony_ci * Calculate CRC on the whole btree block and stuff it into the
33262306a36Sopenharmony_ci * long-form btree header.
33362306a36Sopenharmony_ci *
33462306a36Sopenharmony_ci * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
33562306a36Sopenharmony_ci * it into the buffer so recovery knows what the last modification was that made
33662306a36Sopenharmony_ci * it to disk.
33762306a36Sopenharmony_ci */
33862306a36Sopenharmony_civoid
33962306a36Sopenharmony_cixfs_btree_lblock_calc_crc(
34062306a36Sopenharmony_ci	struct xfs_buf		*bp)
34162306a36Sopenharmony_ci{
34262306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
34362306a36Sopenharmony_ci	struct xfs_buf_log_item	*bip = bp->b_log_item;
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci	if (!xfs_has_crc(bp->b_mount))
34662306a36Sopenharmony_ci		return;
34762306a36Sopenharmony_ci	if (bip)
34862306a36Sopenharmony_ci		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
34962306a36Sopenharmony_ci	xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
35062306a36Sopenharmony_ci}
35162306a36Sopenharmony_ci
35262306a36Sopenharmony_cibool
35362306a36Sopenharmony_cixfs_btree_lblock_verify_crc(
35462306a36Sopenharmony_ci	struct xfs_buf		*bp)
35562306a36Sopenharmony_ci{
35662306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
35762306a36Sopenharmony_ci	struct xfs_mount	*mp = bp->b_mount;
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	if (xfs_has_crc(mp)) {
36062306a36Sopenharmony_ci		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
36162306a36Sopenharmony_ci			return false;
36262306a36Sopenharmony_ci		return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
36362306a36Sopenharmony_ci	}
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	return true;
36662306a36Sopenharmony_ci}
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci/*
36962306a36Sopenharmony_ci * Calculate CRC on the whole btree block and stuff it into the
37062306a36Sopenharmony_ci * short-form btree header.
37162306a36Sopenharmony_ci *
37262306a36Sopenharmony_ci * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
37362306a36Sopenharmony_ci * it into the buffer so recovery knows what the last modification was that made
37462306a36Sopenharmony_ci * it to disk.
37562306a36Sopenharmony_ci */
37662306a36Sopenharmony_civoid
37762306a36Sopenharmony_cixfs_btree_sblock_calc_crc(
37862306a36Sopenharmony_ci	struct xfs_buf		*bp)
37962306a36Sopenharmony_ci{
38062306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
38162306a36Sopenharmony_ci	struct xfs_buf_log_item	*bip = bp->b_log_item;
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	if (!xfs_has_crc(bp->b_mount))
38462306a36Sopenharmony_ci		return;
38562306a36Sopenharmony_ci	if (bip)
38662306a36Sopenharmony_ci		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
38762306a36Sopenharmony_ci	xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
38862306a36Sopenharmony_ci}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_cibool
39162306a36Sopenharmony_cixfs_btree_sblock_verify_crc(
39262306a36Sopenharmony_ci	struct xfs_buf		*bp)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
39562306a36Sopenharmony_ci	struct xfs_mount	*mp = bp->b_mount;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	if (xfs_has_crc(mp)) {
39862306a36Sopenharmony_ci		if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
39962306a36Sopenharmony_ci			return false;
40062306a36Sopenharmony_ci		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
40162306a36Sopenharmony_ci	}
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	return true;
40462306a36Sopenharmony_ci}
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_cistatic int
40762306a36Sopenharmony_cixfs_btree_free_block(
40862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
40962306a36Sopenharmony_ci	struct xfs_buf		*bp)
41062306a36Sopenharmony_ci{
41162306a36Sopenharmony_ci	int			error;
41262306a36Sopenharmony_ci
41362306a36Sopenharmony_ci	error = cur->bc_ops->free_block(cur, bp);
41462306a36Sopenharmony_ci	if (!error) {
41562306a36Sopenharmony_ci		xfs_trans_binval(cur->bc_tp, bp);
41662306a36Sopenharmony_ci		XFS_BTREE_STATS_INC(cur, free);
41762306a36Sopenharmony_ci	}
41862306a36Sopenharmony_ci	return error;
41962306a36Sopenharmony_ci}
42062306a36Sopenharmony_ci
42162306a36Sopenharmony_ci/*
42262306a36Sopenharmony_ci * Delete the btree cursor.
42362306a36Sopenharmony_ci */
42462306a36Sopenharmony_civoid
42562306a36Sopenharmony_cixfs_btree_del_cursor(
42662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,		/* btree cursor */
42762306a36Sopenharmony_ci	int			error)		/* del because of error */
42862306a36Sopenharmony_ci{
42962306a36Sopenharmony_ci	int			i;		/* btree level */
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	/*
43262306a36Sopenharmony_ci	 * Clear the buffer pointers and release the buffers. If we're doing
43362306a36Sopenharmony_ci	 * this because of an error, inspect all of the entries in the bc_bufs
43462306a36Sopenharmony_ci	 * array for buffers to be unlocked. This is because some of the btree
43562306a36Sopenharmony_ci	 * code works from level n down to 0, and if we get an error along the
43662306a36Sopenharmony_ci	 * way we won't have initialized all the entries down to 0.
43762306a36Sopenharmony_ci	 */
43862306a36Sopenharmony_ci	for (i = 0; i < cur->bc_nlevels; i++) {
43962306a36Sopenharmony_ci		if (cur->bc_levels[i].bp)
44062306a36Sopenharmony_ci			xfs_trans_brelse(cur->bc_tp, cur->bc_levels[i].bp);
44162306a36Sopenharmony_ci		else if (!error)
44262306a36Sopenharmony_ci			break;
44362306a36Sopenharmony_ci	}
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci	/*
44662306a36Sopenharmony_ci	 * If we are doing a BMBT update, the number of unaccounted blocks
44762306a36Sopenharmony_ci	 * allocated during this cursor life time should be zero. If it's not
44862306a36Sopenharmony_ci	 * zero, then we should be shut down or on our way to shutdown due to
44962306a36Sopenharmony_ci	 * cancelling a dirty transaction on error.
45062306a36Sopenharmony_ci	 */
45162306a36Sopenharmony_ci	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
45262306a36Sopenharmony_ci	       xfs_is_shutdown(cur->bc_mp) || error != 0);
45362306a36Sopenharmony_ci	if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
45462306a36Sopenharmony_ci		kmem_free(cur->bc_ops);
45562306a36Sopenharmony_ci	if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
45662306a36Sopenharmony_ci		xfs_perag_put(cur->bc_ag.pag);
45762306a36Sopenharmony_ci	kmem_cache_free(cur->bc_cache, cur);
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci/*
46162306a36Sopenharmony_ci * Duplicate the btree cursor.
46262306a36Sopenharmony_ci * Allocate a new one, copy the record, re-get the buffers.
46362306a36Sopenharmony_ci */
46462306a36Sopenharmony_ciint					/* error */
46562306a36Sopenharmony_cixfs_btree_dup_cursor(
46662306a36Sopenharmony_ci	struct xfs_btree_cur *cur,		/* input cursor */
46762306a36Sopenharmony_ci	struct xfs_btree_cur **ncur)		/* output cursor */
46862306a36Sopenharmony_ci{
46962306a36Sopenharmony_ci	struct xfs_buf	*bp;		/* btree block's buffer pointer */
47062306a36Sopenharmony_ci	int		error;		/* error return value */
47162306a36Sopenharmony_ci	int		i;		/* level number of btree block */
47262306a36Sopenharmony_ci	xfs_mount_t	*mp;		/* mount structure for filesystem */
47362306a36Sopenharmony_ci	struct xfs_btree_cur *new;		/* new cursor value */
47462306a36Sopenharmony_ci	xfs_trans_t	*tp;		/* transaction pointer, can be NULL */
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	tp = cur->bc_tp;
47762306a36Sopenharmony_ci	mp = cur->bc_mp;
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci	/*
48062306a36Sopenharmony_ci	 * Allocate a new cursor like the old one.
48162306a36Sopenharmony_ci	 */
48262306a36Sopenharmony_ci	new = cur->bc_ops->dup_cursor(cur);
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci	/*
48562306a36Sopenharmony_ci	 * Copy the record currently in the cursor.
48662306a36Sopenharmony_ci	 */
48762306a36Sopenharmony_ci	new->bc_rec = cur->bc_rec;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	/*
49062306a36Sopenharmony_ci	 * For each level current, re-get the buffer and copy the ptr value.
49162306a36Sopenharmony_ci	 */
49262306a36Sopenharmony_ci	for (i = 0; i < new->bc_nlevels; i++) {
49362306a36Sopenharmony_ci		new->bc_levels[i].ptr = cur->bc_levels[i].ptr;
49462306a36Sopenharmony_ci		new->bc_levels[i].ra = cur->bc_levels[i].ra;
49562306a36Sopenharmony_ci		bp = cur->bc_levels[i].bp;
49662306a36Sopenharmony_ci		if (bp) {
49762306a36Sopenharmony_ci			error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
49862306a36Sopenharmony_ci						   xfs_buf_daddr(bp), mp->m_bsize,
49962306a36Sopenharmony_ci						   0, &bp,
50062306a36Sopenharmony_ci						   cur->bc_ops->buf_ops);
50162306a36Sopenharmony_ci			if (error) {
50262306a36Sopenharmony_ci				xfs_btree_del_cursor(new, error);
50362306a36Sopenharmony_ci				*ncur = NULL;
50462306a36Sopenharmony_ci				return error;
50562306a36Sopenharmony_ci			}
50662306a36Sopenharmony_ci		}
50762306a36Sopenharmony_ci		new->bc_levels[i].bp = bp;
50862306a36Sopenharmony_ci	}
50962306a36Sopenharmony_ci	*ncur = new;
51062306a36Sopenharmony_ci	return 0;
51162306a36Sopenharmony_ci}
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci/*
51462306a36Sopenharmony_ci * XFS btree block layout and addressing:
51562306a36Sopenharmony_ci *
51662306a36Sopenharmony_ci * There are two types of blocks in the btree: leaf and non-leaf blocks.
51762306a36Sopenharmony_ci *
51862306a36Sopenharmony_ci * The leaf record start with a header then followed by records containing
51962306a36Sopenharmony_ci * the values.  A non-leaf block also starts with the same header, and
52062306a36Sopenharmony_ci * then first contains lookup keys followed by an equal number of pointers
52162306a36Sopenharmony_ci * to the btree blocks at the previous level.
52262306a36Sopenharmony_ci *
52362306a36Sopenharmony_ci *		+--------+-------+-------+-------+-------+-------+-------+
52462306a36Sopenharmony_ci * Leaf:	| header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
52562306a36Sopenharmony_ci *		+--------+-------+-------+-------+-------+-------+-------+
52662306a36Sopenharmony_ci *
52762306a36Sopenharmony_ci *		+--------+-------+-------+-------+-------+-------+-------+
52862306a36Sopenharmony_ci * Non-Leaf:	| header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
52962306a36Sopenharmony_ci *		+--------+-------+-------+-------+-------+-------+-------+
53062306a36Sopenharmony_ci *
53162306a36Sopenharmony_ci * The header is called struct xfs_btree_block for reasons better left unknown
53262306a36Sopenharmony_ci * and comes in different versions for short (32bit) and long (64bit) block
53362306a36Sopenharmony_ci * pointers.  The record and key structures are defined by the btree instances
53462306a36Sopenharmony_ci * and opaque to the btree core.  The block pointers are simple disk endian
53562306a36Sopenharmony_ci * integers, available in a short (32bit) and long (64bit) variant.
53662306a36Sopenharmony_ci *
53762306a36Sopenharmony_ci * The helpers below calculate the offset of a given record, key or pointer
53862306a36Sopenharmony_ci * into a btree block (xfs_btree_*_offset) or return a pointer to the given
53962306a36Sopenharmony_ci * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
54062306a36Sopenharmony_ci * inside the btree block is done using indices starting at one, not zero!
54162306a36Sopenharmony_ci *
54262306a36Sopenharmony_ci * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing
54362306a36Sopenharmony_ci * overlapping intervals.  In such a tree, records are still sorted lowest to
54462306a36Sopenharmony_ci * highest and indexed by the smallest key value that refers to the record.
54562306a36Sopenharmony_ci * However, nodes are different: each pointer has two associated keys -- one
54662306a36Sopenharmony_ci * indexing the lowest key available in the block(s) below (the same behavior
54762306a36Sopenharmony_ci * as the key in a regular btree) and another indexing the highest key
54862306a36Sopenharmony_ci * available in the block(s) below.  Because records are /not/ sorted by the
54962306a36Sopenharmony_ci * highest key, all leaf block updates require us to compute the highest key
55062306a36Sopenharmony_ci * that matches any record in the leaf and to recursively update the high keys
55162306a36Sopenharmony_ci * in the nodes going further up in the tree, if necessary.  Nodes look like
55262306a36Sopenharmony_ci * this:
55362306a36Sopenharmony_ci *
55462306a36Sopenharmony_ci *		+--------+-----+-----+-----+-----+-----+-------+-------+-----+
55562306a36Sopenharmony_ci * Non-Leaf:	| header | lo1 | hi1 | lo2 | hi2 | ... | ptr 1 | ptr 2 | ... |
55662306a36Sopenharmony_ci *		+--------+-----+-----+-----+-----+-----+-------+-------+-----+
55762306a36Sopenharmony_ci *
55862306a36Sopenharmony_ci * To perform an interval query on an overlapped tree, perform the usual
55962306a36Sopenharmony_ci * depth-first search and use the low and high keys to decide if we can skip
56062306a36Sopenharmony_ci * that particular node.  If a leaf node is reached, return the records that
56162306a36Sopenharmony_ci * intersect the interval.  Note that an interval query may return numerous
56262306a36Sopenharmony_ci * entries.  For a non-overlapped tree, simply search for the record associated
56362306a36Sopenharmony_ci * with the lowest key and iterate forward until a non-matching record is
56462306a36Sopenharmony_ci * found.  Section 14.3 ("Interval Trees") of _Introduction to Algorithms_ by
56562306a36Sopenharmony_ci * Cormen, Leiserson, Rivest, and Stein (2nd or 3rd ed. only) discuss this in
56662306a36Sopenharmony_ci * more detail.
56762306a36Sopenharmony_ci *
56862306a36Sopenharmony_ci * Why do we care about overlapping intervals?  Let's say you have a bunch of
56962306a36Sopenharmony_ci * reverse mapping records on a reflink filesystem:
57062306a36Sopenharmony_ci *
57162306a36Sopenharmony_ci * 1: +- file A startblock B offset C length D -----------+
57262306a36Sopenharmony_ci * 2:      +- file E startblock F offset G length H --------------+
57362306a36Sopenharmony_ci * 3:      +- file I startblock F offset J length K --+
57462306a36Sopenharmony_ci * 4:                                                        +- file L... --+
57562306a36Sopenharmony_ci *
57662306a36Sopenharmony_ci * Now say we want to map block (B+D) into file A at offset (C+D).  Ideally,
57762306a36Sopenharmony_ci * we'd simply increment the length of record 1.  But how do we find the record
57862306a36Sopenharmony_ci * that ends at (B+D-1) (i.e. record 1)?  A LE lookup of (B+D-1) would return
57962306a36Sopenharmony_ci * record 3 because the keys are ordered first by startblock.  An interval
58062306a36Sopenharmony_ci * query would return records 1 and 2 because they both overlap (B+D-1), and
58162306a36Sopenharmony_ci * from that we can pick out record 1 as the appropriate left neighbor.
58262306a36Sopenharmony_ci *
58362306a36Sopenharmony_ci * In the non-overlapped case you can do a LE lookup and decrement the cursor
58462306a36Sopenharmony_ci * because a record's interval must end before the next record.
58562306a36Sopenharmony_ci */
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci/*
58862306a36Sopenharmony_ci * Return size of the btree block header for this btree instance.
58962306a36Sopenharmony_ci */
59062306a36Sopenharmony_cistatic inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
59162306a36Sopenharmony_ci{
59262306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
59362306a36Sopenharmony_ci		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
59462306a36Sopenharmony_ci			return XFS_BTREE_LBLOCK_CRC_LEN;
59562306a36Sopenharmony_ci		return XFS_BTREE_LBLOCK_LEN;
59662306a36Sopenharmony_ci	}
59762306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS)
59862306a36Sopenharmony_ci		return XFS_BTREE_SBLOCK_CRC_LEN;
59962306a36Sopenharmony_ci	return XFS_BTREE_SBLOCK_LEN;
60062306a36Sopenharmony_ci}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci/*
60362306a36Sopenharmony_ci * Return size of btree block pointers for this btree instance.
60462306a36Sopenharmony_ci */
60562306a36Sopenharmony_cistatic inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
60662306a36Sopenharmony_ci{
60762306a36Sopenharmony_ci	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
60862306a36Sopenharmony_ci		sizeof(__be64) : sizeof(__be32);
60962306a36Sopenharmony_ci}
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci/*
61262306a36Sopenharmony_ci * Calculate offset of the n-th record in a btree block.
61362306a36Sopenharmony_ci */
61462306a36Sopenharmony_ciSTATIC size_t
61562306a36Sopenharmony_cixfs_btree_rec_offset(
61662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
61762306a36Sopenharmony_ci	int			n)
61862306a36Sopenharmony_ci{
61962306a36Sopenharmony_ci	return xfs_btree_block_len(cur) +
62062306a36Sopenharmony_ci		(n - 1) * cur->bc_ops->rec_len;
62162306a36Sopenharmony_ci}
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci/*
62462306a36Sopenharmony_ci * Calculate offset of the n-th key in a btree block.
62562306a36Sopenharmony_ci */
62662306a36Sopenharmony_ciSTATIC size_t
62762306a36Sopenharmony_cixfs_btree_key_offset(
62862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
62962306a36Sopenharmony_ci	int			n)
63062306a36Sopenharmony_ci{
63162306a36Sopenharmony_ci	return xfs_btree_block_len(cur) +
63262306a36Sopenharmony_ci		(n - 1) * cur->bc_ops->key_len;
63362306a36Sopenharmony_ci}
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci/*
63662306a36Sopenharmony_ci * Calculate offset of the n-th high key in a btree block.
63762306a36Sopenharmony_ci */
63862306a36Sopenharmony_ciSTATIC size_t
63962306a36Sopenharmony_cixfs_btree_high_key_offset(
64062306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
64162306a36Sopenharmony_ci	int			n)
64262306a36Sopenharmony_ci{
64362306a36Sopenharmony_ci	return xfs_btree_block_len(cur) +
64462306a36Sopenharmony_ci		(n - 1) * cur->bc_ops->key_len + (cur->bc_ops->key_len / 2);
64562306a36Sopenharmony_ci}
64662306a36Sopenharmony_ci
64762306a36Sopenharmony_ci/*
64862306a36Sopenharmony_ci * Calculate offset of the n-th block pointer in a btree block.
64962306a36Sopenharmony_ci */
65062306a36Sopenharmony_ciSTATIC size_t
65162306a36Sopenharmony_cixfs_btree_ptr_offset(
65262306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
65362306a36Sopenharmony_ci	int			n,
65462306a36Sopenharmony_ci	int			level)
65562306a36Sopenharmony_ci{
65662306a36Sopenharmony_ci	return xfs_btree_block_len(cur) +
65762306a36Sopenharmony_ci		cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
65862306a36Sopenharmony_ci		(n - 1) * xfs_btree_ptr_len(cur);
65962306a36Sopenharmony_ci}
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci/*
66262306a36Sopenharmony_ci * Return a pointer to the n-th record in the btree block.
66362306a36Sopenharmony_ci */
66462306a36Sopenharmony_ciunion xfs_btree_rec *
66562306a36Sopenharmony_cixfs_btree_rec_addr(
66662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
66762306a36Sopenharmony_ci	int			n,
66862306a36Sopenharmony_ci	struct xfs_btree_block	*block)
66962306a36Sopenharmony_ci{
67062306a36Sopenharmony_ci	return (union xfs_btree_rec *)
67162306a36Sopenharmony_ci		((char *)block + xfs_btree_rec_offset(cur, n));
67262306a36Sopenharmony_ci}
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci/*
67562306a36Sopenharmony_ci * Return a pointer to the n-th key in the btree block.
67662306a36Sopenharmony_ci */
67762306a36Sopenharmony_ciunion xfs_btree_key *
67862306a36Sopenharmony_cixfs_btree_key_addr(
67962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
68062306a36Sopenharmony_ci	int			n,
68162306a36Sopenharmony_ci	struct xfs_btree_block	*block)
68262306a36Sopenharmony_ci{
68362306a36Sopenharmony_ci	return (union xfs_btree_key *)
68462306a36Sopenharmony_ci		((char *)block + xfs_btree_key_offset(cur, n));
68562306a36Sopenharmony_ci}
68662306a36Sopenharmony_ci
68762306a36Sopenharmony_ci/*
68862306a36Sopenharmony_ci * Return a pointer to the n-th high key in the btree block.
68962306a36Sopenharmony_ci */
69062306a36Sopenharmony_ciunion xfs_btree_key *
69162306a36Sopenharmony_cixfs_btree_high_key_addr(
69262306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
69362306a36Sopenharmony_ci	int			n,
69462306a36Sopenharmony_ci	struct xfs_btree_block	*block)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	return (union xfs_btree_key *)
69762306a36Sopenharmony_ci		((char *)block + xfs_btree_high_key_offset(cur, n));
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci/*
70162306a36Sopenharmony_ci * Return a pointer to the n-th block pointer in the btree block.
70262306a36Sopenharmony_ci */
70362306a36Sopenharmony_ciunion xfs_btree_ptr *
70462306a36Sopenharmony_cixfs_btree_ptr_addr(
70562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
70662306a36Sopenharmony_ci	int			n,
70762306a36Sopenharmony_ci	struct xfs_btree_block	*block)
70862306a36Sopenharmony_ci{
70962306a36Sopenharmony_ci	int			level = xfs_btree_get_level(block);
71062306a36Sopenharmony_ci
71162306a36Sopenharmony_ci	ASSERT(block->bb_level != 0);
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci	return (union xfs_btree_ptr *)
71462306a36Sopenharmony_ci		((char *)block + xfs_btree_ptr_offset(cur, n, level));
71562306a36Sopenharmony_ci}
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_cistruct xfs_ifork *
71862306a36Sopenharmony_cixfs_btree_ifork_ptr(
71962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur)
72062306a36Sopenharmony_ci{
72162306a36Sopenharmony_ci	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_STAGING)
72462306a36Sopenharmony_ci		return cur->bc_ino.ifake->if_fork;
72562306a36Sopenharmony_ci	return xfs_ifork_ptr(cur->bc_ino.ip, cur->bc_ino.whichfork);
72662306a36Sopenharmony_ci}
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci/*
72962306a36Sopenharmony_ci * Get the root block which is stored in the inode.
73062306a36Sopenharmony_ci *
73162306a36Sopenharmony_ci * For now this btree implementation assumes the btree root is always
73262306a36Sopenharmony_ci * stored in the if_broot field of an inode fork.
73362306a36Sopenharmony_ci */
73462306a36Sopenharmony_ciSTATIC struct xfs_btree_block *
73562306a36Sopenharmony_cixfs_btree_get_iroot(
73662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur)
73762306a36Sopenharmony_ci{
73862306a36Sopenharmony_ci	struct xfs_ifork	*ifp = xfs_btree_ifork_ptr(cur);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	return (struct xfs_btree_block *)ifp->if_broot;
74162306a36Sopenharmony_ci}
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci/*
74462306a36Sopenharmony_ci * Retrieve the block pointer from the cursor at the given level.
74562306a36Sopenharmony_ci * This may be an inode btree root or from a buffer.
74662306a36Sopenharmony_ci */
74762306a36Sopenharmony_cistruct xfs_btree_block *		/* generic btree block pointer */
74862306a36Sopenharmony_cixfs_btree_get_block(
74962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
75062306a36Sopenharmony_ci	int			level,	/* level in btree */
75162306a36Sopenharmony_ci	struct xfs_buf		**bpp)	/* buffer containing the block */
75262306a36Sopenharmony_ci{
75362306a36Sopenharmony_ci	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
75462306a36Sopenharmony_ci	    (level == cur->bc_nlevels - 1)) {
75562306a36Sopenharmony_ci		*bpp = NULL;
75662306a36Sopenharmony_ci		return xfs_btree_get_iroot(cur);
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci	*bpp = cur->bc_levels[level].bp;
76062306a36Sopenharmony_ci	return XFS_BUF_TO_BLOCK(*bpp);
76162306a36Sopenharmony_ci}
76262306a36Sopenharmony_ci
76362306a36Sopenharmony_ci/*
76462306a36Sopenharmony_ci * Change the cursor to point to the first record at the given level.
76562306a36Sopenharmony_ci * Other levels are unaffected.
76662306a36Sopenharmony_ci */
76762306a36Sopenharmony_ciSTATIC int				/* success=1, failure=0 */
76862306a36Sopenharmony_cixfs_btree_firstrec(
76962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
77062306a36Sopenharmony_ci	int			level)	/* level to change */
77162306a36Sopenharmony_ci{
77262306a36Sopenharmony_ci	struct xfs_btree_block	*block;	/* generic btree block pointer */
77362306a36Sopenharmony_ci	struct xfs_buf		*bp;	/* buffer containing block */
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	/*
77662306a36Sopenharmony_ci	 * Get the block pointer for this level.
77762306a36Sopenharmony_ci	 */
77862306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
77962306a36Sopenharmony_ci	if (xfs_btree_check_block(cur, block, level, bp))
78062306a36Sopenharmony_ci		return 0;
78162306a36Sopenharmony_ci	/*
78262306a36Sopenharmony_ci	 * It's empty, there is no such record.
78362306a36Sopenharmony_ci	 */
78462306a36Sopenharmony_ci	if (!block->bb_numrecs)
78562306a36Sopenharmony_ci		return 0;
78662306a36Sopenharmony_ci	/*
78762306a36Sopenharmony_ci	 * Set the ptr value to 1, that's the first record/key.
78862306a36Sopenharmony_ci	 */
78962306a36Sopenharmony_ci	cur->bc_levels[level].ptr = 1;
79062306a36Sopenharmony_ci	return 1;
79162306a36Sopenharmony_ci}
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci/*
79462306a36Sopenharmony_ci * Change the cursor to point to the last record in the current block
79562306a36Sopenharmony_ci * at the given level.  Other levels are unaffected.
79662306a36Sopenharmony_ci */
79762306a36Sopenharmony_ciSTATIC int				/* success=1, failure=0 */
79862306a36Sopenharmony_cixfs_btree_lastrec(
79962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
80062306a36Sopenharmony_ci	int			level)	/* level to change */
80162306a36Sopenharmony_ci{
80262306a36Sopenharmony_ci	struct xfs_btree_block	*block;	/* generic btree block pointer */
80362306a36Sopenharmony_ci	struct xfs_buf		*bp;	/* buffer containing block */
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	/*
80662306a36Sopenharmony_ci	 * Get the block pointer for this level.
80762306a36Sopenharmony_ci	 */
80862306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
80962306a36Sopenharmony_ci	if (xfs_btree_check_block(cur, block, level, bp))
81062306a36Sopenharmony_ci		return 0;
81162306a36Sopenharmony_ci	/*
81262306a36Sopenharmony_ci	 * It's empty, there is no such record.
81362306a36Sopenharmony_ci	 */
81462306a36Sopenharmony_ci	if (!block->bb_numrecs)
81562306a36Sopenharmony_ci		return 0;
81662306a36Sopenharmony_ci	/*
81762306a36Sopenharmony_ci	 * Set the ptr value to numrecs, that's the last record/key.
81862306a36Sopenharmony_ci	 */
81962306a36Sopenharmony_ci	cur->bc_levels[level].ptr = be16_to_cpu(block->bb_numrecs);
82062306a36Sopenharmony_ci	return 1;
82162306a36Sopenharmony_ci}
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci/*
82462306a36Sopenharmony_ci * Compute first and last byte offsets for the fields given.
82562306a36Sopenharmony_ci * Interprets the offsets table, which contains struct field offsets.
82662306a36Sopenharmony_ci */
82762306a36Sopenharmony_civoid
82862306a36Sopenharmony_cixfs_btree_offsets(
82962306a36Sopenharmony_ci	uint32_t	fields,		/* bitmask of fields */
83062306a36Sopenharmony_ci	const short	*offsets,	/* table of field offsets */
83162306a36Sopenharmony_ci	int		nbits,		/* number of bits to inspect */
83262306a36Sopenharmony_ci	int		*first,		/* output: first byte offset */
83362306a36Sopenharmony_ci	int		*last)		/* output: last byte offset */
83462306a36Sopenharmony_ci{
83562306a36Sopenharmony_ci	int		i;		/* current bit number */
83662306a36Sopenharmony_ci	uint32_t	imask;		/* mask for current bit number */
83762306a36Sopenharmony_ci
83862306a36Sopenharmony_ci	ASSERT(fields != 0);
83962306a36Sopenharmony_ci	/*
84062306a36Sopenharmony_ci	 * Find the lowest bit, so the first byte offset.
84162306a36Sopenharmony_ci	 */
84262306a36Sopenharmony_ci	for (i = 0, imask = 1u; ; i++, imask <<= 1) {
84362306a36Sopenharmony_ci		if (imask & fields) {
84462306a36Sopenharmony_ci			*first = offsets[i];
84562306a36Sopenharmony_ci			break;
84662306a36Sopenharmony_ci		}
84762306a36Sopenharmony_ci	}
84862306a36Sopenharmony_ci	/*
84962306a36Sopenharmony_ci	 * Find the highest bit, so the last byte offset.
85062306a36Sopenharmony_ci	 */
85162306a36Sopenharmony_ci	for (i = nbits - 1, imask = 1u << i; ; i--, imask >>= 1) {
85262306a36Sopenharmony_ci		if (imask & fields) {
85362306a36Sopenharmony_ci			*last = offsets[i + 1] - 1;
85462306a36Sopenharmony_ci			break;
85562306a36Sopenharmony_ci		}
85662306a36Sopenharmony_ci	}
85762306a36Sopenharmony_ci}
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci/*
86062306a36Sopenharmony_ci * Get a buffer for the block, return it read in.
86162306a36Sopenharmony_ci * Long-form addressing.
86262306a36Sopenharmony_ci */
86362306a36Sopenharmony_ciint
86462306a36Sopenharmony_cixfs_btree_read_bufl(
86562306a36Sopenharmony_ci	struct xfs_mount	*mp,		/* file system mount point */
86662306a36Sopenharmony_ci	struct xfs_trans	*tp,		/* transaction pointer */
86762306a36Sopenharmony_ci	xfs_fsblock_t		fsbno,		/* file system block number */
86862306a36Sopenharmony_ci	struct xfs_buf		**bpp,		/* buffer for fsbno */
86962306a36Sopenharmony_ci	int			refval,		/* ref count value for buffer */
87062306a36Sopenharmony_ci	const struct xfs_buf_ops *ops)
87162306a36Sopenharmony_ci{
87262306a36Sopenharmony_ci	struct xfs_buf		*bp;		/* return value */
87362306a36Sopenharmony_ci	xfs_daddr_t		d;		/* real disk block address */
87462306a36Sopenharmony_ci	int			error;
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci	if (!xfs_verify_fsbno(mp, fsbno))
87762306a36Sopenharmony_ci		return -EFSCORRUPTED;
87862306a36Sopenharmony_ci	d = XFS_FSB_TO_DADDR(mp, fsbno);
87962306a36Sopenharmony_ci	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
88062306a36Sopenharmony_ci				   mp->m_bsize, 0, &bp, ops);
88162306a36Sopenharmony_ci	if (error)
88262306a36Sopenharmony_ci		return error;
88362306a36Sopenharmony_ci	if (bp)
88462306a36Sopenharmony_ci		xfs_buf_set_ref(bp, refval);
88562306a36Sopenharmony_ci	*bpp = bp;
88662306a36Sopenharmony_ci	return 0;
88762306a36Sopenharmony_ci}
88862306a36Sopenharmony_ci
88962306a36Sopenharmony_ci/*
89062306a36Sopenharmony_ci * Read-ahead the block, don't wait for it, don't return a buffer.
89162306a36Sopenharmony_ci * Long-form addressing.
89262306a36Sopenharmony_ci */
89362306a36Sopenharmony_ci/* ARGSUSED */
89462306a36Sopenharmony_civoid
89562306a36Sopenharmony_cixfs_btree_reada_bufl(
89662306a36Sopenharmony_ci	struct xfs_mount	*mp,		/* file system mount point */
89762306a36Sopenharmony_ci	xfs_fsblock_t		fsbno,		/* file system block number */
89862306a36Sopenharmony_ci	xfs_extlen_t		count,		/* count of filesystem blocks */
89962306a36Sopenharmony_ci	const struct xfs_buf_ops *ops)
90062306a36Sopenharmony_ci{
90162306a36Sopenharmony_ci	xfs_daddr_t		d;
90262306a36Sopenharmony_ci
90362306a36Sopenharmony_ci	ASSERT(fsbno != NULLFSBLOCK);
90462306a36Sopenharmony_ci	d = XFS_FSB_TO_DADDR(mp, fsbno);
90562306a36Sopenharmony_ci	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
90662306a36Sopenharmony_ci}
90762306a36Sopenharmony_ci
90862306a36Sopenharmony_ci/*
90962306a36Sopenharmony_ci * Read-ahead the block, don't wait for it, don't return a buffer.
91062306a36Sopenharmony_ci * Short-form addressing.
91162306a36Sopenharmony_ci */
91262306a36Sopenharmony_ci/* ARGSUSED */
91362306a36Sopenharmony_civoid
91462306a36Sopenharmony_cixfs_btree_reada_bufs(
91562306a36Sopenharmony_ci	struct xfs_mount	*mp,		/* file system mount point */
91662306a36Sopenharmony_ci	xfs_agnumber_t		agno,		/* allocation group number */
91762306a36Sopenharmony_ci	xfs_agblock_t		agbno,		/* allocation group block number */
91862306a36Sopenharmony_ci	xfs_extlen_t		count,		/* count of filesystem blocks */
91962306a36Sopenharmony_ci	const struct xfs_buf_ops *ops)
92062306a36Sopenharmony_ci{
92162306a36Sopenharmony_ci	xfs_daddr_t		d;
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	ASSERT(agno != NULLAGNUMBER);
92462306a36Sopenharmony_ci	ASSERT(agbno != NULLAGBLOCK);
92562306a36Sopenharmony_ci	d = XFS_AGB_TO_DADDR(mp, agno, agbno);
92662306a36Sopenharmony_ci	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);
92762306a36Sopenharmony_ci}
92862306a36Sopenharmony_ci
92962306a36Sopenharmony_ciSTATIC int
93062306a36Sopenharmony_cixfs_btree_readahead_lblock(
93162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
93262306a36Sopenharmony_ci	int			lr,
93362306a36Sopenharmony_ci	struct xfs_btree_block	*block)
93462306a36Sopenharmony_ci{
93562306a36Sopenharmony_ci	int			rval = 0;
93662306a36Sopenharmony_ci	xfs_fsblock_t		left = be64_to_cpu(block->bb_u.l.bb_leftsib);
93762306a36Sopenharmony_ci	xfs_fsblock_t		right = be64_to_cpu(block->bb_u.l.bb_rightsib);
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) {
94062306a36Sopenharmony_ci		xfs_btree_reada_bufl(cur->bc_mp, left, 1,
94162306a36Sopenharmony_ci				     cur->bc_ops->buf_ops);
94262306a36Sopenharmony_ci		rval++;
94362306a36Sopenharmony_ci	}
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) {
94662306a36Sopenharmony_ci		xfs_btree_reada_bufl(cur->bc_mp, right, 1,
94762306a36Sopenharmony_ci				     cur->bc_ops->buf_ops);
94862306a36Sopenharmony_ci		rval++;
94962306a36Sopenharmony_ci	}
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci	return rval;
95262306a36Sopenharmony_ci}
95362306a36Sopenharmony_ci
95462306a36Sopenharmony_ciSTATIC int
95562306a36Sopenharmony_cixfs_btree_readahead_sblock(
95662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
95762306a36Sopenharmony_ci	int			lr,
95862306a36Sopenharmony_ci	struct xfs_btree_block *block)
95962306a36Sopenharmony_ci{
96062306a36Sopenharmony_ci	int			rval = 0;
96162306a36Sopenharmony_ci	xfs_agblock_t		left = be32_to_cpu(block->bb_u.s.bb_leftsib);
96262306a36Sopenharmony_ci	xfs_agblock_t		right = be32_to_cpu(block->bb_u.s.bb_rightsib);
96362306a36Sopenharmony_ci
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
96662306a36Sopenharmony_ci		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
96762306a36Sopenharmony_ci				     left, 1, cur->bc_ops->buf_ops);
96862306a36Sopenharmony_ci		rval++;
96962306a36Sopenharmony_ci	}
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
97262306a36Sopenharmony_ci		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno,
97362306a36Sopenharmony_ci				     right, 1, cur->bc_ops->buf_ops);
97462306a36Sopenharmony_ci		rval++;
97562306a36Sopenharmony_ci	}
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_ci	return rval;
97862306a36Sopenharmony_ci}
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_ci/*
98162306a36Sopenharmony_ci * Read-ahead btree blocks, at the given level.
98262306a36Sopenharmony_ci * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
98362306a36Sopenharmony_ci */
98462306a36Sopenharmony_ciSTATIC int
98562306a36Sopenharmony_cixfs_btree_readahead(
98662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,		/* btree cursor */
98762306a36Sopenharmony_ci	int			lev,		/* level in btree */
98862306a36Sopenharmony_ci	int			lr)		/* left/right bits */
98962306a36Sopenharmony_ci{
99062306a36Sopenharmony_ci	struct xfs_btree_block	*block;
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	/*
99362306a36Sopenharmony_ci	 * No readahead needed if we are at the root level and the
99462306a36Sopenharmony_ci	 * btree root is stored in the inode.
99562306a36Sopenharmony_ci	 */
99662306a36Sopenharmony_ci	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
99762306a36Sopenharmony_ci	    (lev == cur->bc_nlevels - 1))
99862306a36Sopenharmony_ci		return 0;
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci	if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra)
100162306a36Sopenharmony_ci		return 0;
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci	cur->bc_levels[lev].ra |= lr;
100462306a36Sopenharmony_ci	block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp);
100562306a36Sopenharmony_ci
100662306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
100762306a36Sopenharmony_ci		return xfs_btree_readahead_lblock(cur, lr, block);
100862306a36Sopenharmony_ci	return xfs_btree_readahead_sblock(cur, lr, block);
100962306a36Sopenharmony_ci}
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ciSTATIC int
101262306a36Sopenharmony_cixfs_btree_ptr_to_daddr(
101362306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
101462306a36Sopenharmony_ci	const union xfs_btree_ptr	*ptr,
101562306a36Sopenharmony_ci	xfs_daddr_t			*daddr)
101662306a36Sopenharmony_ci{
101762306a36Sopenharmony_ci	xfs_fsblock_t		fsbno;
101862306a36Sopenharmony_ci	xfs_agblock_t		agbno;
101962306a36Sopenharmony_ci	int			error;
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_ci	error = xfs_btree_check_ptr(cur, ptr, 0, 1);
102262306a36Sopenharmony_ci	if (error)
102362306a36Sopenharmony_ci		return error;
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
102662306a36Sopenharmony_ci		fsbno = be64_to_cpu(ptr->l);
102762306a36Sopenharmony_ci		*daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno);
102862306a36Sopenharmony_ci	} else {
102962306a36Sopenharmony_ci		agbno = be32_to_cpu(ptr->s);
103062306a36Sopenharmony_ci		*daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
103162306a36Sopenharmony_ci				agbno);
103262306a36Sopenharmony_ci	}
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci	return 0;
103562306a36Sopenharmony_ci}
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci/*
103862306a36Sopenharmony_ci * Readahead @count btree blocks at the given @ptr location.
103962306a36Sopenharmony_ci *
104062306a36Sopenharmony_ci * We don't need to care about long or short form btrees here as we have a
104162306a36Sopenharmony_ci * method of converting the ptr directly to a daddr available to us.
104262306a36Sopenharmony_ci */
104362306a36Sopenharmony_ciSTATIC void
104462306a36Sopenharmony_cixfs_btree_readahead_ptr(
104562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
104662306a36Sopenharmony_ci	union xfs_btree_ptr	*ptr,
104762306a36Sopenharmony_ci	xfs_extlen_t		count)
104862306a36Sopenharmony_ci{
104962306a36Sopenharmony_ci	xfs_daddr_t		daddr;
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci	if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr))
105262306a36Sopenharmony_ci		return;
105362306a36Sopenharmony_ci	xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr,
105462306a36Sopenharmony_ci			  cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
105562306a36Sopenharmony_ci}
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci/*
105862306a36Sopenharmony_ci * Set the buffer for level "lev" in the cursor to bp, releasing
105962306a36Sopenharmony_ci * any previous buffer.
106062306a36Sopenharmony_ci */
106162306a36Sopenharmony_ciSTATIC void
106262306a36Sopenharmony_cixfs_btree_setbuf(
106362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
106462306a36Sopenharmony_ci	int			lev,	/* level in btree */
106562306a36Sopenharmony_ci	struct xfs_buf		*bp)	/* new buffer to set */
106662306a36Sopenharmony_ci{
106762306a36Sopenharmony_ci	struct xfs_btree_block	*b;	/* btree block */
106862306a36Sopenharmony_ci
106962306a36Sopenharmony_ci	if (cur->bc_levels[lev].bp)
107062306a36Sopenharmony_ci		xfs_trans_brelse(cur->bc_tp, cur->bc_levels[lev].bp);
107162306a36Sopenharmony_ci	cur->bc_levels[lev].bp = bp;
107262306a36Sopenharmony_ci	cur->bc_levels[lev].ra = 0;
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	b = XFS_BUF_TO_BLOCK(bp);
107562306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
107662306a36Sopenharmony_ci		if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK))
107762306a36Sopenharmony_ci			cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
107862306a36Sopenharmony_ci		if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK))
107962306a36Sopenharmony_ci			cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
108062306a36Sopenharmony_ci	} else {
108162306a36Sopenharmony_ci		if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
108262306a36Sopenharmony_ci			cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA;
108362306a36Sopenharmony_ci		if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
108462306a36Sopenharmony_ci			cur->bc_levels[lev].ra |= XFS_BTCUR_RIGHTRA;
108562306a36Sopenharmony_ci	}
108662306a36Sopenharmony_ci}
108762306a36Sopenharmony_ci
108862306a36Sopenharmony_cibool
108962306a36Sopenharmony_cixfs_btree_ptr_is_null(
109062306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
109162306a36Sopenharmony_ci	const union xfs_btree_ptr	*ptr)
109262306a36Sopenharmony_ci{
109362306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
109462306a36Sopenharmony_ci		return ptr->l == cpu_to_be64(NULLFSBLOCK);
109562306a36Sopenharmony_ci	else
109662306a36Sopenharmony_ci		return ptr->s == cpu_to_be32(NULLAGBLOCK);
109762306a36Sopenharmony_ci}
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_civoid
110062306a36Sopenharmony_cixfs_btree_set_ptr_null(
110162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
110262306a36Sopenharmony_ci	union xfs_btree_ptr	*ptr)
110362306a36Sopenharmony_ci{
110462306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
110562306a36Sopenharmony_ci		ptr->l = cpu_to_be64(NULLFSBLOCK);
110662306a36Sopenharmony_ci	else
110762306a36Sopenharmony_ci		ptr->s = cpu_to_be32(NULLAGBLOCK);
110862306a36Sopenharmony_ci}
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci/*
111162306a36Sopenharmony_ci * Get/set/init sibling pointers
111262306a36Sopenharmony_ci */
111362306a36Sopenharmony_civoid
111462306a36Sopenharmony_cixfs_btree_get_sibling(
111562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
111662306a36Sopenharmony_ci	struct xfs_btree_block	*block,
111762306a36Sopenharmony_ci	union xfs_btree_ptr	*ptr,
111862306a36Sopenharmony_ci	int			lr)
111962306a36Sopenharmony_ci{
112062306a36Sopenharmony_ci	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
112162306a36Sopenharmony_ci
112262306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
112362306a36Sopenharmony_ci		if (lr == XFS_BB_RIGHTSIB)
112462306a36Sopenharmony_ci			ptr->l = block->bb_u.l.bb_rightsib;
112562306a36Sopenharmony_ci		else
112662306a36Sopenharmony_ci			ptr->l = block->bb_u.l.bb_leftsib;
112762306a36Sopenharmony_ci	} else {
112862306a36Sopenharmony_ci		if (lr == XFS_BB_RIGHTSIB)
112962306a36Sopenharmony_ci			ptr->s = block->bb_u.s.bb_rightsib;
113062306a36Sopenharmony_ci		else
113162306a36Sopenharmony_ci			ptr->s = block->bb_u.s.bb_leftsib;
113262306a36Sopenharmony_ci	}
113362306a36Sopenharmony_ci}
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_civoid
113662306a36Sopenharmony_cixfs_btree_set_sibling(
113762306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
113862306a36Sopenharmony_ci	struct xfs_btree_block		*block,
113962306a36Sopenharmony_ci	const union xfs_btree_ptr	*ptr,
114062306a36Sopenharmony_ci	int				lr)
114162306a36Sopenharmony_ci{
114262306a36Sopenharmony_ci	ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
114562306a36Sopenharmony_ci		if (lr == XFS_BB_RIGHTSIB)
114662306a36Sopenharmony_ci			block->bb_u.l.bb_rightsib = ptr->l;
114762306a36Sopenharmony_ci		else
114862306a36Sopenharmony_ci			block->bb_u.l.bb_leftsib = ptr->l;
114962306a36Sopenharmony_ci	} else {
115062306a36Sopenharmony_ci		if (lr == XFS_BB_RIGHTSIB)
115162306a36Sopenharmony_ci			block->bb_u.s.bb_rightsib = ptr->s;
115262306a36Sopenharmony_ci		else
115362306a36Sopenharmony_ci			block->bb_u.s.bb_leftsib = ptr->s;
115462306a36Sopenharmony_ci	}
115562306a36Sopenharmony_ci}
115662306a36Sopenharmony_ci
115762306a36Sopenharmony_civoid
115862306a36Sopenharmony_cixfs_btree_init_block_int(
115962306a36Sopenharmony_ci	struct xfs_mount	*mp,
116062306a36Sopenharmony_ci	struct xfs_btree_block	*buf,
116162306a36Sopenharmony_ci	xfs_daddr_t		blkno,
116262306a36Sopenharmony_ci	xfs_btnum_t		btnum,
116362306a36Sopenharmony_ci	__u16			level,
116462306a36Sopenharmony_ci	__u16			numrecs,
116562306a36Sopenharmony_ci	__u64			owner,
116662306a36Sopenharmony_ci	unsigned int		flags)
116762306a36Sopenharmony_ci{
116862306a36Sopenharmony_ci	int			crc = xfs_has_crc(mp);
116962306a36Sopenharmony_ci	__u32			magic = xfs_btree_magic(crc, btnum);
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci	buf->bb_magic = cpu_to_be32(magic);
117262306a36Sopenharmony_ci	buf->bb_level = cpu_to_be16(level);
117362306a36Sopenharmony_ci	buf->bb_numrecs = cpu_to_be16(numrecs);
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_ci	if (flags & XFS_BTREE_LONG_PTRS) {
117662306a36Sopenharmony_ci		buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
117762306a36Sopenharmony_ci		buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
117862306a36Sopenharmony_ci		if (crc) {
117962306a36Sopenharmony_ci			buf->bb_u.l.bb_blkno = cpu_to_be64(blkno);
118062306a36Sopenharmony_ci			buf->bb_u.l.bb_owner = cpu_to_be64(owner);
118162306a36Sopenharmony_ci			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid);
118262306a36Sopenharmony_ci			buf->bb_u.l.bb_pad = 0;
118362306a36Sopenharmony_ci			buf->bb_u.l.bb_lsn = 0;
118462306a36Sopenharmony_ci		}
118562306a36Sopenharmony_ci	} else {
118662306a36Sopenharmony_ci		/* owner is a 32 bit value on short blocks */
118762306a36Sopenharmony_ci		__u32 __owner = (__u32)owner;
118862306a36Sopenharmony_ci
118962306a36Sopenharmony_ci		buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
119062306a36Sopenharmony_ci		buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
119162306a36Sopenharmony_ci		if (crc) {
119262306a36Sopenharmony_ci			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno);
119362306a36Sopenharmony_ci			buf->bb_u.s.bb_owner = cpu_to_be32(__owner);
119462306a36Sopenharmony_ci			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid);
119562306a36Sopenharmony_ci			buf->bb_u.s.bb_lsn = 0;
119662306a36Sopenharmony_ci		}
119762306a36Sopenharmony_ci	}
119862306a36Sopenharmony_ci}
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_civoid
120162306a36Sopenharmony_cixfs_btree_init_block(
120262306a36Sopenharmony_ci	struct xfs_mount *mp,
120362306a36Sopenharmony_ci	struct xfs_buf	*bp,
120462306a36Sopenharmony_ci	xfs_btnum_t	btnum,
120562306a36Sopenharmony_ci	__u16		level,
120662306a36Sopenharmony_ci	__u16		numrecs,
120762306a36Sopenharmony_ci	__u64		owner)
120862306a36Sopenharmony_ci{
120962306a36Sopenharmony_ci	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp),
121062306a36Sopenharmony_ci				 btnum, level, numrecs, owner, 0);
121162306a36Sopenharmony_ci}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_civoid
121462306a36Sopenharmony_cixfs_btree_init_block_cur(
121562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
121662306a36Sopenharmony_ci	struct xfs_buf		*bp,
121762306a36Sopenharmony_ci	int			level,
121862306a36Sopenharmony_ci	int			numrecs)
121962306a36Sopenharmony_ci{
122062306a36Sopenharmony_ci	__u64			owner;
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	/*
122362306a36Sopenharmony_ci	 * we can pull the owner from the cursor right now as the different
122462306a36Sopenharmony_ci	 * owners align directly with the pointer size of the btree. This may
122562306a36Sopenharmony_ci	 * change in future, but is safe for current users of the generic btree
122662306a36Sopenharmony_ci	 * code.
122762306a36Sopenharmony_ci	 */
122862306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
122962306a36Sopenharmony_ci		owner = cur->bc_ino.ip->i_ino;
123062306a36Sopenharmony_ci	else
123162306a36Sopenharmony_ci		owner = cur->bc_ag.pag->pag_agno;
123262306a36Sopenharmony_ci
123362306a36Sopenharmony_ci	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp),
123462306a36Sopenharmony_ci				xfs_buf_daddr(bp), cur->bc_btnum, level,
123562306a36Sopenharmony_ci				numrecs, owner, cur->bc_flags);
123662306a36Sopenharmony_ci}
123762306a36Sopenharmony_ci
123862306a36Sopenharmony_ci/*
123962306a36Sopenharmony_ci * Return true if ptr is the last record in the btree and
124062306a36Sopenharmony_ci * we need to track updates to this record.  The decision
124162306a36Sopenharmony_ci * will be further refined in the update_lastrec method.
124262306a36Sopenharmony_ci */
124362306a36Sopenharmony_ciSTATIC int
124462306a36Sopenharmony_cixfs_btree_is_lastrec(
124562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
124662306a36Sopenharmony_ci	struct xfs_btree_block	*block,
124762306a36Sopenharmony_ci	int			level)
124862306a36Sopenharmony_ci{
124962306a36Sopenharmony_ci	union xfs_btree_ptr	ptr;
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci	if (level > 0)
125262306a36Sopenharmony_ci		return 0;
125362306a36Sopenharmony_ci	if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
125462306a36Sopenharmony_ci		return 0;
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
125762306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &ptr))
125862306a36Sopenharmony_ci		return 0;
125962306a36Sopenharmony_ci	return 1;
126062306a36Sopenharmony_ci}
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ciSTATIC void
126362306a36Sopenharmony_cixfs_btree_buf_to_ptr(
126462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
126562306a36Sopenharmony_ci	struct xfs_buf		*bp,
126662306a36Sopenharmony_ci	union xfs_btree_ptr	*ptr)
126762306a36Sopenharmony_ci{
126862306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
126962306a36Sopenharmony_ci		ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
127062306a36Sopenharmony_ci					xfs_buf_daddr(bp)));
127162306a36Sopenharmony_ci	else {
127262306a36Sopenharmony_ci		ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp,
127362306a36Sopenharmony_ci					xfs_buf_daddr(bp)));
127462306a36Sopenharmony_ci	}
127562306a36Sopenharmony_ci}
127662306a36Sopenharmony_ci
127762306a36Sopenharmony_ciSTATIC void
127862306a36Sopenharmony_cixfs_btree_set_refs(
127962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
128062306a36Sopenharmony_ci	struct xfs_buf		*bp)
128162306a36Sopenharmony_ci{
128262306a36Sopenharmony_ci	switch (cur->bc_btnum) {
128362306a36Sopenharmony_ci	case XFS_BTNUM_BNO:
128462306a36Sopenharmony_ci	case XFS_BTNUM_CNT:
128562306a36Sopenharmony_ci		xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
128662306a36Sopenharmony_ci		break;
128762306a36Sopenharmony_ci	case XFS_BTNUM_INO:
128862306a36Sopenharmony_ci	case XFS_BTNUM_FINO:
128962306a36Sopenharmony_ci		xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
129062306a36Sopenharmony_ci		break;
129162306a36Sopenharmony_ci	case XFS_BTNUM_BMAP:
129262306a36Sopenharmony_ci		xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
129362306a36Sopenharmony_ci		break;
129462306a36Sopenharmony_ci	case XFS_BTNUM_RMAP:
129562306a36Sopenharmony_ci		xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF);
129662306a36Sopenharmony_ci		break;
129762306a36Sopenharmony_ci	case XFS_BTNUM_REFC:
129862306a36Sopenharmony_ci		xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF);
129962306a36Sopenharmony_ci		break;
130062306a36Sopenharmony_ci	default:
130162306a36Sopenharmony_ci		ASSERT(0);
130262306a36Sopenharmony_ci	}
130362306a36Sopenharmony_ci}
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ciint
130662306a36Sopenharmony_cixfs_btree_get_buf_block(
130762306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
130862306a36Sopenharmony_ci	const union xfs_btree_ptr	*ptr,
130962306a36Sopenharmony_ci	struct xfs_btree_block		**block,
131062306a36Sopenharmony_ci	struct xfs_buf			**bpp)
131162306a36Sopenharmony_ci{
131262306a36Sopenharmony_ci	struct xfs_mount	*mp = cur->bc_mp;
131362306a36Sopenharmony_ci	xfs_daddr_t		d;
131462306a36Sopenharmony_ci	int			error;
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
131762306a36Sopenharmony_ci	if (error)
131862306a36Sopenharmony_ci		return error;
131962306a36Sopenharmony_ci	error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
132062306a36Sopenharmony_ci			0, bpp);
132162306a36Sopenharmony_ci	if (error)
132262306a36Sopenharmony_ci		return error;
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci	(*bpp)->b_ops = cur->bc_ops->buf_ops;
132562306a36Sopenharmony_ci	*block = XFS_BUF_TO_BLOCK(*bpp);
132662306a36Sopenharmony_ci	return 0;
132762306a36Sopenharmony_ci}
132862306a36Sopenharmony_ci
132962306a36Sopenharmony_ci/*
133062306a36Sopenharmony_ci * Read in the buffer at the given ptr and return the buffer and
133162306a36Sopenharmony_ci * the block pointer within the buffer.
133262306a36Sopenharmony_ci */
133362306a36Sopenharmony_ciSTATIC int
133462306a36Sopenharmony_cixfs_btree_read_buf_block(
133562306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
133662306a36Sopenharmony_ci	const union xfs_btree_ptr	*ptr,
133762306a36Sopenharmony_ci	int				flags,
133862306a36Sopenharmony_ci	struct xfs_btree_block		**block,
133962306a36Sopenharmony_ci	struct xfs_buf			**bpp)
134062306a36Sopenharmony_ci{
134162306a36Sopenharmony_ci	struct xfs_mount	*mp = cur->bc_mp;
134262306a36Sopenharmony_ci	xfs_daddr_t		d;
134362306a36Sopenharmony_ci	int			error;
134462306a36Sopenharmony_ci
134562306a36Sopenharmony_ci	/* need to sort out how callers deal with failures first */
134662306a36Sopenharmony_ci	ASSERT(!(flags & XBF_TRYLOCK));
134762306a36Sopenharmony_ci
134862306a36Sopenharmony_ci	error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
134962306a36Sopenharmony_ci	if (error)
135062306a36Sopenharmony_ci		return error;
135162306a36Sopenharmony_ci	error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
135262306a36Sopenharmony_ci				   mp->m_bsize, flags, bpp,
135362306a36Sopenharmony_ci				   cur->bc_ops->buf_ops);
135462306a36Sopenharmony_ci	if (error)
135562306a36Sopenharmony_ci		return error;
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_ci	xfs_btree_set_refs(cur, *bpp);
135862306a36Sopenharmony_ci	*block = XFS_BUF_TO_BLOCK(*bpp);
135962306a36Sopenharmony_ci	return 0;
136062306a36Sopenharmony_ci}
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci/*
136362306a36Sopenharmony_ci * Copy keys from one btree block to another.
136462306a36Sopenharmony_ci */
136562306a36Sopenharmony_civoid
136662306a36Sopenharmony_cixfs_btree_copy_keys(
136762306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
136862306a36Sopenharmony_ci	union xfs_btree_key		*dst_key,
136962306a36Sopenharmony_ci	const union xfs_btree_key	*src_key,
137062306a36Sopenharmony_ci	int				numkeys)
137162306a36Sopenharmony_ci{
137262306a36Sopenharmony_ci	ASSERT(numkeys >= 0);
137362306a36Sopenharmony_ci	memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
137462306a36Sopenharmony_ci}
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci/*
137762306a36Sopenharmony_ci * Copy records from one btree block to another.
137862306a36Sopenharmony_ci */
137962306a36Sopenharmony_ciSTATIC void
138062306a36Sopenharmony_cixfs_btree_copy_recs(
138162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
138262306a36Sopenharmony_ci	union xfs_btree_rec	*dst_rec,
138362306a36Sopenharmony_ci	union xfs_btree_rec	*src_rec,
138462306a36Sopenharmony_ci	int			numrecs)
138562306a36Sopenharmony_ci{
138662306a36Sopenharmony_ci	ASSERT(numrecs >= 0);
138762306a36Sopenharmony_ci	memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
138862306a36Sopenharmony_ci}
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci/*
139162306a36Sopenharmony_ci * Copy block pointers from one btree block to another.
139262306a36Sopenharmony_ci */
139362306a36Sopenharmony_civoid
139462306a36Sopenharmony_cixfs_btree_copy_ptrs(
139562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
139662306a36Sopenharmony_ci	union xfs_btree_ptr	*dst_ptr,
139762306a36Sopenharmony_ci	const union xfs_btree_ptr *src_ptr,
139862306a36Sopenharmony_ci	int			numptrs)
139962306a36Sopenharmony_ci{
140062306a36Sopenharmony_ci	ASSERT(numptrs >= 0);
140162306a36Sopenharmony_ci	memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
140262306a36Sopenharmony_ci}
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci/*
140562306a36Sopenharmony_ci * Shift keys one index left/right inside a single btree block.
140662306a36Sopenharmony_ci */
140762306a36Sopenharmony_ciSTATIC void
140862306a36Sopenharmony_cixfs_btree_shift_keys(
140962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
141062306a36Sopenharmony_ci	union xfs_btree_key	*key,
141162306a36Sopenharmony_ci	int			dir,
141262306a36Sopenharmony_ci	int			numkeys)
141362306a36Sopenharmony_ci{
141462306a36Sopenharmony_ci	char			*dst_key;
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_ci	ASSERT(numkeys >= 0);
141762306a36Sopenharmony_ci	ASSERT(dir == 1 || dir == -1);
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_ci	dst_key = (char *)key + (dir * cur->bc_ops->key_len);
142062306a36Sopenharmony_ci	memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
142162306a36Sopenharmony_ci}
142262306a36Sopenharmony_ci
142362306a36Sopenharmony_ci/*
142462306a36Sopenharmony_ci * Shift records one index left/right inside a single btree block.
142562306a36Sopenharmony_ci */
142662306a36Sopenharmony_ciSTATIC void
142762306a36Sopenharmony_cixfs_btree_shift_recs(
142862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
142962306a36Sopenharmony_ci	union xfs_btree_rec	*rec,
143062306a36Sopenharmony_ci	int			dir,
143162306a36Sopenharmony_ci	int			numrecs)
143262306a36Sopenharmony_ci{
143362306a36Sopenharmony_ci	char			*dst_rec;
143462306a36Sopenharmony_ci
143562306a36Sopenharmony_ci	ASSERT(numrecs >= 0);
143662306a36Sopenharmony_ci	ASSERT(dir == 1 || dir == -1);
143762306a36Sopenharmony_ci
143862306a36Sopenharmony_ci	dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
143962306a36Sopenharmony_ci	memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ci/*
144362306a36Sopenharmony_ci * Shift block pointers one index left/right inside a single btree block.
144462306a36Sopenharmony_ci */
144562306a36Sopenharmony_ciSTATIC void
144662306a36Sopenharmony_cixfs_btree_shift_ptrs(
144762306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
144862306a36Sopenharmony_ci	union xfs_btree_ptr	*ptr,
144962306a36Sopenharmony_ci	int			dir,
145062306a36Sopenharmony_ci	int			numptrs)
145162306a36Sopenharmony_ci{
145262306a36Sopenharmony_ci	char			*dst_ptr;
145362306a36Sopenharmony_ci
145462306a36Sopenharmony_ci	ASSERT(numptrs >= 0);
145562306a36Sopenharmony_ci	ASSERT(dir == 1 || dir == -1);
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci	dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
145862306a36Sopenharmony_ci	memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
145962306a36Sopenharmony_ci}
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci/*
146262306a36Sopenharmony_ci * Log key values from the btree block.
146362306a36Sopenharmony_ci */
146462306a36Sopenharmony_ciSTATIC void
146562306a36Sopenharmony_cixfs_btree_log_keys(
146662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
146762306a36Sopenharmony_ci	struct xfs_buf		*bp,
146862306a36Sopenharmony_ci	int			first,
146962306a36Sopenharmony_ci	int			last)
147062306a36Sopenharmony_ci{
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_ci	if (bp) {
147362306a36Sopenharmony_ci		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
147462306a36Sopenharmony_ci		xfs_trans_log_buf(cur->bc_tp, bp,
147562306a36Sopenharmony_ci				  xfs_btree_key_offset(cur, first),
147662306a36Sopenharmony_ci				  xfs_btree_key_offset(cur, last + 1) - 1);
147762306a36Sopenharmony_ci	} else {
147862306a36Sopenharmony_ci		xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
147962306a36Sopenharmony_ci				xfs_ilog_fbroot(cur->bc_ino.whichfork));
148062306a36Sopenharmony_ci	}
148162306a36Sopenharmony_ci}
148262306a36Sopenharmony_ci
148362306a36Sopenharmony_ci/*
148462306a36Sopenharmony_ci * Log record values from the btree block.
148562306a36Sopenharmony_ci */
148662306a36Sopenharmony_civoid
148762306a36Sopenharmony_cixfs_btree_log_recs(
148862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
148962306a36Sopenharmony_ci	struct xfs_buf		*bp,
149062306a36Sopenharmony_ci	int			first,
149162306a36Sopenharmony_ci	int			last)
149262306a36Sopenharmony_ci{
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
149562306a36Sopenharmony_ci	xfs_trans_log_buf(cur->bc_tp, bp,
149662306a36Sopenharmony_ci			  xfs_btree_rec_offset(cur, first),
149762306a36Sopenharmony_ci			  xfs_btree_rec_offset(cur, last + 1) - 1);
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci}
150062306a36Sopenharmony_ci
150162306a36Sopenharmony_ci/*
150262306a36Sopenharmony_ci * Log block pointer fields from a btree block (nonleaf).
150362306a36Sopenharmony_ci */
150462306a36Sopenharmony_ciSTATIC void
150562306a36Sopenharmony_cixfs_btree_log_ptrs(
150662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
150762306a36Sopenharmony_ci	struct xfs_buf		*bp,	/* buffer containing btree block */
150862306a36Sopenharmony_ci	int			first,	/* index of first pointer to log */
150962306a36Sopenharmony_ci	int			last)	/* index of last pointer to log */
151062306a36Sopenharmony_ci{
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_ci	if (bp) {
151362306a36Sopenharmony_ci		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
151462306a36Sopenharmony_ci		int			level = xfs_btree_get_level(block);
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ci		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
151762306a36Sopenharmony_ci		xfs_trans_log_buf(cur->bc_tp, bp,
151862306a36Sopenharmony_ci				xfs_btree_ptr_offset(cur, first, level),
151962306a36Sopenharmony_ci				xfs_btree_ptr_offset(cur, last + 1, level) - 1);
152062306a36Sopenharmony_ci	} else {
152162306a36Sopenharmony_ci		xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
152262306a36Sopenharmony_ci			xfs_ilog_fbroot(cur->bc_ino.whichfork));
152362306a36Sopenharmony_ci	}
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_ci}
152662306a36Sopenharmony_ci
152762306a36Sopenharmony_ci/*
152862306a36Sopenharmony_ci * Log fields from a btree block header.
152962306a36Sopenharmony_ci */
153062306a36Sopenharmony_civoid
153162306a36Sopenharmony_cixfs_btree_log_block(
153262306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
153362306a36Sopenharmony_ci	struct xfs_buf		*bp,	/* buffer containing btree block */
153462306a36Sopenharmony_ci	uint32_t		fields)	/* mask of fields: XFS_BB_... */
153562306a36Sopenharmony_ci{
153662306a36Sopenharmony_ci	int			first;	/* first byte offset logged */
153762306a36Sopenharmony_ci	int			last;	/* last byte offset logged */
153862306a36Sopenharmony_ci	static const short	soffsets[] = {	/* table of offsets (short) */
153962306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_magic),
154062306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_level),
154162306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_numrecs),
154262306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),
154362306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib),
154462306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_blkno),
154562306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_lsn),
154662306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_uuid),
154762306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_owner),
154862306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.s.bb_crc),
154962306a36Sopenharmony_ci		XFS_BTREE_SBLOCK_CRC_LEN
155062306a36Sopenharmony_ci	};
155162306a36Sopenharmony_ci	static const short	loffsets[] = {	/* table of offsets (long) */
155262306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_magic),
155362306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_level),
155462306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_numrecs),
155562306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),
155662306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib),
155762306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_blkno),
155862306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_lsn),
155962306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_uuid),
156062306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_owner),
156162306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_crc),
156262306a36Sopenharmony_ci		offsetof(struct xfs_btree_block, bb_u.l.bb_pad),
156362306a36Sopenharmony_ci		XFS_BTREE_LBLOCK_CRC_LEN
156462306a36Sopenharmony_ci	};
156562306a36Sopenharmony_ci
156662306a36Sopenharmony_ci	if (bp) {
156762306a36Sopenharmony_ci		int nbits;
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_ci		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
157062306a36Sopenharmony_ci			/*
157162306a36Sopenharmony_ci			 * We don't log the CRC when updating a btree
157262306a36Sopenharmony_ci			 * block but instead recreate it during log
157362306a36Sopenharmony_ci			 * recovery.  As the log buffers have checksums
157462306a36Sopenharmony_ci			 * of their own this is safe and avoids logging a crc
157562306a36Sopenharmony_ci			 * update in a lot of places.
157662306a36Sopenharmony_ci			 */
157762306a36Sopenharmony_ci			if (fields == XFS_BB_ALL_BITS)
157862306a36Sopenharmony_ci				fields = XFS_BB_ALL_BITS_CRC;
157962306a36Sopenharmony_ci			nbits = XFS_BB_NUM_BITS_CRC;
158062306a36Sopenharmony_ci		} else {
158162306a36Sopenharmony_ci			nbits = XFS_BB_NUM_BITS;
158262306a36Sopenharmony_ci		}
158362306a36Sopenharmony_ci		xfs_btree_offsets(fields,
158462306a36Sopenharmony_ci				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
158562306a36Sopenharmony_ci					loffsets : soffsets,
158662306a36Sopenharmony_ci				  nbits, &first, &last);
158762306a36Sopenharmony_ci		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);
158862306a36Sopenharmony_ci		xfs_trans_log_buf(cur->bc_tp, bp, first, last);
158962306a36Sopenharmony_ci	} else {
159062306a36Sopenharmony_ci		xfs_trans_log_inode(cur->bc_tp, cur->bc_ino.ip,
159162306a36Sopenharmony_ci			xfs_ilog_fbroot(cur->bc_ino.whichfork));
159262306a36Sopenharmony_ci	}
159362306a36Sopenharmony_ci}
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_ci/*
159662306a36Sopenharmony_ci * Increment cursor by one record at the level.
159762306a36Sopenharmony_ci * For nonzero levels the leaf-ward information is untouched.
159862306a36Sopenharmony_ci */
159962306a36Sopenharmony_ciint						/* error */
160062306a36Sopenharmony_cixfs_btree_increment(
160162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
160262306a36Sopenharmony_ci	int			level,
160362306a36Sopenharmony_ci	int			*stat)		/* success/failure */
160462306a36Sopenharmony_ci{
160562306a36Sopenharmony_ci	struct xfs_btree_block	*block;
160662306a36Sopenharmony_ci	union xfs_btree_ptr	ptr;
160762306a36Sopenharmony_ci	struct xfs_buf		*bp;
160862306a36Sopenharmony_ci	int			error;		/* error return value */
160962306a36Sopenharmony_ci	int			lev;
161062306a36Sopenharmony_ci
161162306a36Sopenharmony_ci	ASSERT(level < cur->bc_nlevels);
161262306a36Sopenharmony_ci
161362306a36Sopenharmony_ci	/* Read-ahead to the right at this level. */
161462306a36Sopenharmony_ci	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	/* Get a pointer to the btree block. */
161762306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci#ifdef DEBUG
162062306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, level, bp);
162162306a36Sopenharmony_ci	if (error)
162262306a36Sopenharmony_ci		goto error0;
162362306a36Sopenharmony_ci#endif
162462306a36Sopenharmony_ci
162562306a36Sopenharmony_ci	/* We're done if we remain in the block after the increment. */
162662306a36Sopenharmony_ci	if (++cur->bc_levels[level].ptr <= xfs_btree_get_numrecs(block))
162762306a36Sopenharmony_ci		goto out1;
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_ci	/* Fail if we just went off the right edge of the tree. */
163062306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
163162306a36Sopenharmony_ci	if (xfs_btree_ptr_is_null(cur, &ptr))
163262306a36Sopenharmony_ci		goto out0;
163362306a36Sopenharmony_ci
163462306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, increment);
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci	/*
163762306a36Sopenharmony_ci	 * March up the tree incrementing pointers.
163862306a36Sopenharmony_ci	 * Stop when we don't go off the right edge of a block.
163962306a36Sopenharmony_ci	 */
164062306a36Sopenharmony_ci	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
164162306a36Sopenharmony_ci		block = xfs_btree_get_block(cur, lev, &bp);
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci#ifdef DEBUG
164462306a36Sopenharmony_ci		error = xfs_btree_check_block(cur, block, lev, bp);
164562306a36Sopenharmony_ci		if (error)
164662306a36Sopenharmony_ci			goto error0;
164762306a36Sopenharmony_ci#endif
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci		if (++cur->bc_levels[lev].ptr <= xfs_btree_get_numrecs(block))
165062306a36Sopenharmony_ci			break;
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci		/* Read-ahead the right block for the next loop. */
165362306a36Sopenharmony_ci		xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
165462306a36Sopenharmony_ci	}
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_ci	/*
165762306a36Sopenharmony_ci	 * If we went off the root then we are either seriously
165862306a36Sopenharmony_ci	 * confused or have the tree root in an inode.
165962306a36Sopenharmony_ci	 */
166062306a36Sopenharmony_ci	if (lev == cur->bc_nlevels) {
166162306a36Sopenharmony_ci		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
166262306a36Sopenharmony_ci			goto out0;
166362306a36Sopenharmony_ci		ASSERT(0);
166462306a36Sopenharmony_ci		error = -EFSCORRUPTED;
166562306a36Sopenharmony_ci		goto error0;
166662306a36Sopenharmony_ci	}
166762306a36Sopenharmony_ci	ASSERT(lev < cur->bc_nlevels);
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_ci	/*
167062306a36Sopenharmony_ci	 * Now walk back down the tree, fixing up the cursor's buffer
167162306a36Sopenharmony_ci	 * pointers and key numbers.
167262306a36Sopenharmony_ci	 */
167362306a36Sopenharmony_ci	for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
167462306a36Sopenharmony_ci		union xfs_btree_ptr	*ptrp;
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci		ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
167762306a36Sopenharmony_ci		--lev;
167862306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
167962306a36Sopenharmony_ci		if (error)
168062306a36Sopenharmony_ci			goto error0;
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_ci		xfs_btree_setbuf(cur, lev, bp);
168362306a36Sopenharmony_ci		cur->bc_levels[lev].ptr = 1;
168462306a36Sopenharmony_ci	}
168562306a36Sopenharmony_ciout1:
168662306a36Sopenharmony_ci	*stat = 1;
168762306a36Sopenharmony_ci	return 0;
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_ciout0:
169062306a36Sopenharmony_ci	*stat = 0;
169162306a36Sopenharmony_ci	return 0;
169262306a36Sopenharmony_ci
169362306a36Sopenharmony_cierror0:
169462306a36Sopenharmony_ci	return error;
169562306a36Sopenharmony_ci}
169662306a36Sopenharmony_ci
169762306a36Sopenharmony_ci/*
169862306a36Sopenharmony_ci * Decrement cursor by one record at the level.
169962306a36Sopenharmony_ci * For nonzero levels the leaf-ward information is untouched.
170062306a36Sopenharmony_ci */
170162306a36Sopenharmony_ciint						/* error */
170262306a36Sopenharmony_cixfs_btree_decrement(
170362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
170462306a36Sopenharmony_ci	int			level,
170562306a36Sopenharmony_ci	int			*stat)		/* success/failure */
170662306a36Sopenharmony_ci{
170762306a36Sopenharmony_ci	struct xfs_btree_block	*block;
170862306a36Sopenharmony_ci	struct xfs_buf		*bp;
170962306a36Sopenharmony_ci	int			error;		/* error return value */
171062306a36Sopenharmony_ci	int			lev;
171162306a36Sopenharmony_ci	union xfs_btree_ptr	ptr;
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	ASSERT(level < cur->bc_nlevels);
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci	/* Read-ahead to the left at this level. */
171662306a36Sopenharmony_ci	xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
171762306a36Sopenharmony_ci
171862306a36Sopenharmony_ci	/* We're done if we remain in the block after the decrement. */
171962306a36Sopenharmony_ci	if (--cur->bc_levels[level].ptr > 0)
172062306a36Sopenharmony_ci		goto out1;
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci	/* Get a pointer to the btree block. */
172362306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
172462306a36Sopenharmony_ci
172562306a36Sopenharmony_ci#ifdef DEBUG
172662306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, level, bp);
172762306a36Sopenharmony_ci	if (error)
172862306a36Sopenharmony_ci		goto error0;
172962306a36Sopenharmony_ci#endif
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci	/* Fail if we just went off the left edge of the tree. */
173262306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
173362306a36Sopenharmony_ci	if (xfs_btree_ptr_is_null(cur, &ptr))
173462306a36Sopenharmony_ci		goto out0;
173562306a36Sopenharmony_ci
173662306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, decrement);
173762306a36Sopenharmony_ci
173862306a36Sopenharmony_ci	/*
173962306a36Sopenharmony_ci	 * March up the tree decrementing pointers.
174062306a36Sopenharmony_ci	 * Stop when we don't go off the left edge of a block.
174162306a36Sopenharmony_ci	 */
174262306a36Sopenharmony_ci	for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
174362306a36Sopenharmony_ci		if (--cur->bc_levels[lev].ptr > 0)
174462306a36Sopenharmony_ci			break;
174562306a36Sopenharmony_ci		/* Read-ahead the left block for the next loop. */
174662306a36Sopenharmony_ci		xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
174762306a36Sopenharmony_ci	}
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci	/*
175062306a36Sopenharmony_ci	 * If we went off the root then we are seriously confused.
175162306a36Sopenharmony_ci	 * or the root of the tree is in an inode.
175262306a36Sopenharmony_ci	 */
175362306a36Sopenharmony_ci	if (lev == cur->bc_nlevels) {
175462306a36Sopenharmony_ci		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
175562306a36Sopenharmony_ci			goto out0;
175662306a36Sopenharmony_ci		ASSERT(0);
175762306a36Sopenharmony_ci		error = -EFSCORRUPTED;
175862306a36Sopenharmony_ci		goto error0;
175962306a36Sopenharmony_ci	}
176062306a36Sopenharmony_ci	ASSERT(lev < cur->bc_nlevels);
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci	/*
176362306a36Sopenharmony_ci	 * Now walk back down the tree, fixing up the cursor's buffer
176462306a36Sopenharmony_ci	 * pointers and key numbers.
176562306a36Sopenharmony_ci	 */
176662306a36Sopenharmony_ci	for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
176762306a36Sopenharmony_ci		union xfs_btree_ptr	*ptrp;
176862306a36Sopenharmony_ci
176962306a36Sopenharmony_ci		ptrp = xfs_btree_ptr_addr(cur, cur->bc_levels[lev].ptr, block);
177062306a36Sopenharmony_ci		--lev;
177162306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);
177262306a36Sopenharmony_ci		if (error)
177362306a36Sopenharmony_ci			goto error0;
177462306a36Sopenharmony_ci		xfs_btree_setbuf(cur, lev, bp);
177562306a36Sopenharmony_ci		cur->bc_levels[lev].ptr = xfs_btree_get_numrecs(block);
177662306a36Sopenharmony_ci	}
177762306a36Sopenharmony_ciout1:
177862306a36Sopenharmony_ci	*stat = 1;
177962306a36Sopenharmony_ci	return 0;
178062306a36Sopenharmony_ci
178162306a36Sopenharmony_ciout0:
178262306a36Sopenharmony_ci	*stat = 0;
178362306a36Sopenharmony_ci	return 0;
178462306a36Sopenharmony_ci
178562306a36Sopenharmony_cierror0:
178662306a36Sopenharmony_ci	return error;
178762306a36Sopenharmony_ci}
178862306a36Sopenharmony_ci
178962306a36Sopenharmony_ciint
179062306a36Sopenharmony_cixfs_btree_lookup_get_block(
179162306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,	/* btree cursor */
179262306a36Sopenharmony_ci	int				level,	/* level in the btree */
179362306a36Sopenharmony_ci	const union xfs_btree_ptr	*pp,	/* ptr to btree block */
179462306a36Sopenharmony_ci	struct xfs_btree_block		**blkp) /* return btree block */
179562306a36Sopenharmony_ci{
179662306a36Sopenharmony_ci	struct xfs_buf		*bp;	/* buffer pointer for btree block */
179762306a36Sopenharmony_ci	xfs_daddr_t		daddr;
179862306a36Sopenharmony_ci	int			error = 0;
179962306a36Sopenharmony_ci
180062306a36Sopenharmony_ci	/* special case the root block if in an inode */
180162306a36Sopenharmony_ci	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
180262306a36Sopenharmony_ci	    (level == cur->bc_nlevels - 1)) {
180362306a36Sopenharmony_ci		*blkp = xfs_btree_get_iroot(cur);
180462306a36Sopenharmony_ci		return 0;
180562306a36Sopenharmony_ci	}
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci	/*
180862306a36Sopenharmony_ci	 * If the old buffer at this level for the disk address we are
180962306a36Sopenharmony_ci	 * looking for re-use it.
181062306a36Sopenharmony_ci	 *
181162306a36Sopenharmony_ci	 * Otherwise throw it away and get a new one.
181262306a36Sopenharmony_ci	 */
181362306a36Sopenharmony_ci	bp = cur->bc_levels[level].bp;
181462306a36Sopenharmony_ci	error = xfs_btree_ptr_to_daddr(cur, pp, &daddr);
181562306a36Sopenharmony_ci	if (error)
181662306a36Sopenharmony_ci		return error;
181762306a36Sopenharmony_ci	if (bp && xfs_buf_daddr(bp) == daddr) {
181862306a36Sopenharmony_ci		*blkp = XFS_BUF_TO_BLOCK(bp);
181962306a36Sopenharmony_ci		return 0;
182062306a36Sopenharmony_ci	}
182162306a36Sopenharmony_ci
182262306a36Sopenharmony_ci	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);
182362306a36Sopenharmony_ci	if (error)
182462306a36Sopenharmony_ci		return error;
182562306a36Sopenharmony_ci
182662306a36Sopenharmony_ci	/* Check the inode owner since the verifiers don't. */
182762306a36Sopenharmony_ci	if (xfs_has_crc(cur->bc_mp) &&
182862306a36Sopenharmony_ci	    !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) &&
182962306a36Sopenharmony_ci	    (cur->bc_flags & XFS_BTREE_LONG_PTRS) &&
183062306a36Sopenharmony_ci	    be64_to_cpu((*blkp)->bb_u.l.bb_owner) !=
183162306a36Sopenharmony_ci			cur->bc_ino.ip->i_ino)
183262306a36Sopenharmony_ci		goto out_bad;
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci	/* Did we get the level we were looking for? */
183562306a36Sopenharmony_ci	if (be16_to_cpu((*blkp)->bb_level) != level)
183662306a36Sopenharmony_ci		goto out_bad;
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci	/* Check that internal nodes have at least one record. */
183962306a36Sopenharmony_ci	if (level != 0 && be16_to_cpu((*blkp)->bb_numrecs) == 0)
184062306a36Sopenharmony_ci		goto out_bad;
184162306a36Sopenharmony_ci
184262306a36Sopenharmony_ci	xfs_btree_setbuf(cur, level, bp);
184362306a36Sopenharmony_ci	return 0;
184462306a36Sopenharmony_ci
184562306a36Sopenharmony_ciout_bad:
184662306a36Sopenharmony_ci	*blkp = NULL;
184762306a36Sopenharmony_ci	xfs_buf_mark_corrupt(bp);
184862306a36Sopenharmony_ci	xfs_trans_brelse(cur->bc_tp, bp);
184962306a36Sopenharmony_ci	return -EFSCORRUPTED;
185062306a36Sopenharmony_ci}
185162306a36Sopenharmony_ci
185262306a36Sopenharmony_ci/*
185362306a36Sopenharmony_ci * Get current search key.  For level 0 we don't actually have a key
185462306a36Sopenharmony_ci * structure so we make one up from the record.  For all other levels
185562306a36Sopenharmony_ci * we just return the right key.
185662306a36Sopenharmony_ci */
185762306a36Sopenharmony_ciSTATIC union xfs_btree_key *
185862306a36Sopenharmony_cixfs_lookup_get_search_key(
185962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
186062306a36Sopenharmony_ci	int			level,
186162306a36Sopenharmony_ci	int			keyno,
186262306a36Sopenharmony_ci	struct xfs_btree_block	*block,
186362306a36Sopenharmony_ci	union xfs_btree_key	*kp)
186462306a36Sopenharmony_ci{
186562306a36Sopenharmony_ci	if (level == 0) {
186662306a36Sopenharmony_ci		cur->bc_ops->init_key_from_rec(kp,
186762306a36Sopenharmony_ci				xfs_btree_rec_addr(cur, keyno, block));
186862306a36Sopenharmony_ci		return kp;
186962306a36Sopenharmony_ci	}
187062306a36Sopenharmony_ci
187162306a36Sopenharmony_ci	return xfs_btree_key_addr(cur, keyno, block);
187262306a36Sopenharmony_ci}
187362306a36Sopenharmony_ci
187462306a36Sopenharmony_ci/*
187562306a36Sopenharmony_ci * Lookup the record.  The cursor is made to point to it, based on dir.
187662306a36Sopenharmony_ci * stat is set to 0 if can't find any such record, 1 for success.
187762306a36Sopenharmony_ci */
187862306a36Sopenharmony_ciint					/* error */
187962306a36Sopenharmony_cixfs_btree_lookup(
188062306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
188162306a36Sopenharmony_ci	xfs_lookup_t		dir,	/* <=, ==, or >= */
188262306a36Sopenharmony_ci	int			*stat)	/* success/failure */
188362306a36Sopenharmony_ci{
188462306a36Sopenharmony_ci	struct xfs_btree_block	*block;	/* current btree block */
188562306a36Sopenharmony_ci	int64_t			diff;	/* difference for the current key */
188662306a36Sopenharmony_ci	int			error;	/* error return value */
188762306a36Sopenharmony_ci	int			keyno;	/* current key number */
188862306a36Sopenharmony_ci	int			level;	/* level in the btree */
188962306a36Sopenharmony_ci	union xfs_btree_ptr	*pp;	/* ptr to btree block */
189062306a36Sopenharmony_ci	union xfs_btree_ptr	ptr;	/* ptr to btree block */
189162306a36Sopenharmony_ci
189262306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, lookup);
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_ci	/* No such thing as a zero-level tree. */
189562306a36Sopenharmony_ci	if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0))
189662306a36Sopenharmony_ci		return -EFSCORRUPTED;
189762306a36Sopenharmony_ci
189862306a36Sopenharmony_ci	block = NULL;
189962306a36Sopenharmony_ci	keyno = 0;
190062306a36Sopenharmony_ci
190162306a36Sopenharmony_ci	/* initialise start pointer from cursor */
190262306a36Sopenharmony_ci	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
190362306a36Sopenharmony_ci	pp = &ptr;
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_ci	/*
190662306a36Sopenharmony_ci	 * Iterate over each level in the btree, starting at the root.
190762306a36Sopenharmony_ci	 * For each level above the leaves, find the key we need, based
190862306a36Sopenharmony_ci	 * on the lookup record, then follow the corresponding block
190962306a36Sopenharmony_ci	 * pointer down to the next level.
191062306a36Sopenharmony_ci	 */
191162306a36Sopenharmony_ci	for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
191262306a36Sopenharmony_ci		/* Get the block we need to do the lookup on. */
191362306a36Sopenharmony_ci		error = xfs_btree_lookup_get_block(cur, level, pp, &block);
191462306a36Sopenharmony_ci		if (error)
191562306a36Sopenharmony_ci			goto error0;
191662306a36Sopenharmony_ci
191762306a36Sopenharmony_ci		if (diff == 0) {
191862306a36Sopenharmony_ci			/*
191962306a36Sopenharmony_ci			 * If we already had a key match at a higher level, we
192062306a36Sopenharmony_ci			 * know we need to use the first entry in this block.
192162306a36Sopenharmony_ci			 */
192262306a36Sopenharmony_ci			keyno = 1;
192362306a36Sopenharmony_ci		} else {
192462306a36Sopenharmony_ci			/* Otherwise search this block. Do a binary search. */
192562306a36Sopenharmony_ci
192662306a36Sopenharmony_ci			int	high;	/* high entry number */
192762306a36Sopenharmony_ci			int	low;	/* low entry number */
192862306a36Sopenharmony_ci
192962306a36Sopenharmony_ci			/* Set low and high entry numbers, 1-based. */
193062306a36Sopenharmony_ci			low = 1;
193162306a36Sopenharmony_ci			high = xfs_btree_get_numrecs(block);
193262306a36Sopenharmony_ci			if (!high) {
193362306a36Sopenharmony_ci				/* Block is empty, must be an empty leaf. */
193462306a36Sopenharmony_ci				if (level != 0 || cur->bc_nlevels != 1) {
193562306a36Sopenharmony_ci					XFS_CORRUPTION_ERROR(__func__,
193662306a36Sopenharmony_ci							XFS_ERRLEVEL_LOW,
193762306a36Sopenharmony_ci							cur->bc_mp, block,
193862306a36Sopenharmony_ci							sizeof(*block));
193962306a36Sopenharmony_ci					return -EFSCORRUPTED;
194062306a36Sopenharmony_ci				}
194162306a36Sopenharmony_ci
194262306a36Sopenharmony_ci				cur->bc_levels[0].ptr = dir != XFS_LOOKUP_LE;
194362306a36Sopenharmony_ci				*stat = 0;
194462306a36Sopenharmony_ci				return 0;
194562306a36Sopenharmony_ci			}
194662306a36Sopenharmony_ci
194762306a36Sopenharmony_ci			/* Binary search the block. */
194862306a36Sopenharmony_ci			while (low <= high) {
194962306a36Sopenharmony_ci				union xfs_btree_key	key;
195062306a36Sopenharmony_ci				union xfs_btree_key	*kp;
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_ci				XFS_BTREE_STATS_INC(cur, compare);
195362306a36Sopenharmony_ci
195462306a36Sopenharmony_ci				/* keyno is average of low and high. */
195562306a36Sopenharmony_ci				keyno = (low + high) >> 1;
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_ci				/* Get current search key */
195862306a36Sopenharmony_ci				kp = xfs_lookup_get_search_key(cur, level,
195962306a36Sopenharmony_ci						keyno, block, &key);
196062306a36Sopenharmony_ci
196162306a36Sopenharmony_ci				/*
196262306a36Sopenharmony_ci				 * Compute difference to get next direction:
196362306a36Sopenharmony_ci				 *  - less than, move right
196462306a36Sopenharmony_ci				 *  - greater than, move left
196562306a36Sopenharmony_ci				 *  - equal, we're done
196662306a36Sopenharmony_ci				 */
196762306a36Sopenharmony_ci				diff = cur->bc_ops->key_diff(cur, kp);
196862306a36Sopenharmony_ci				if (diff < 0)
196962306a36Sopenharmony_ci					low = keyno + 1;
197062306a36Sopenharmony_ci				else if (diff > 0)
197162306a36Sopenharmony_ci					high = keyno - 1;
197262306a36Sopenharmony_ci				else
197362306a36Sopenharmony_ci					break;
197462306a36Sopenharmony_ci			}
197562306a36Sopenharmony_ci		}
197662306a36Sopenharmony_ci
197762306a36Sopenharmony_ci		/*
197862306a36Sopenharmony_ci		 * If there are more levels, set up for the next level
197962306a36Sopenharmony_ci		 * by getting the block number and filling in the cursor.
198062306a36Sopenharmony_ci		 */
198162306a36Sopenharmony_ci		if (level > 0) {
198262306a36Sopenharmony_ci			/*
198362306a36Sopenharmony_ci			 * If we moved left, need the previous key number,
198462306a36Sopenharmony_ci			 * unless there isn't one.
198562306a36Sopenharmony_ci			 */
198662306a36Sopenharmony_ci			if (diff > 0 && --keyno < 1)
198762306a36Sopenharmony_ci				keyno = 1;
198862306a36Sopenharmony_ci			pp = xfs_btree_ptr_addr(cur, keyno, block);
198962306a36Sopenharmony_ci
199062306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, pp, 0, level);
199162306a36Sopenharmony_ci			if (error)
199262306a36Sopenharmony_ci				goto error0;
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci			cur->bc_levels[level].ptr = keyno;
199562306a36Sopenharmony_ci		}
199662306a36Sopenharmony_ci	}
199762306a36Sopenharmony_ci
199862306a36Sopenharmony_ci	/* Done with the search. See if we need to adjust the results. */
199962306a36Sopenharmony_ci	if (dir != XFS_LOOKUP_LE && diff < 0) {
200062306a36Sopenharmony_ci		keyno++;
200162306a36Sopenharmony_ci		/*
200262306a36Sopenharmony_ci		 * If ge search and we went off the end of the block, but it's
200362306a36Sopenharmony_ci		 * not the last block, we're in the wrong block.
200462306a36Sopenharmony_ci		 */
200562306a36Sopenharmony_ci		xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
200662306a36Sopenharmony_ci		if (dir == XFS_LOOKUP_GE &&
200762306a36Sopenharmony_ci		    keyno > xfs_btree_get_numrecs(block) &&
200862306a36Sopenharmony_ci		    !xfs_btree_ptr_is_null(cur, &ptr)) {
200962306a36Sopenharmony_ci			int	i;
201062306a36Sopenharmony_ci
201162306a36Sopenharmony_ci			cur->bc_levels[0].ptr = keyno;
201262306a36Sopenharmony_ci			error = xfs_btree_increment(cur, 0, &i);
201362306a36Sopenharmony_ci			if (error)
201462306a36Sopenharmony_ci				goto error0;
201562306a36Sopenharmony_ci			if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
201662306a36Sopenharmony_ci				return -EFSCORRUPTED;
201762306a36Sopenharmony_ci			*stat = 1;
201862306a36Sopenharmony_ci			return 0;
201962306a36Sopenharmony_ci		}
202062306a36Sopenharmony_ci	} else if (dir == XFS_LOOKUP_LE && diff > 0)
202162306a36Sopenharmony_ci		keyno--;
202262306a36Sopenharmony_ci	cur->bc_levels[0].ptr = keyno;
202362306a36Sopenharmony_ci
202462306a36Sopenharmony_ci	/* Return if we succeeded or not. */
202562306a36Sopenharmony_ci	if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
202662306a36Sopenharmony_ci		*stat = 0;
202762306a36Sopenharmony_ci	else if (dir != XFS_LOOKUP_EQ || diff == 0)
202862306a36Sopenharmony_ci		*stat = 1;
202962306a36Sopenharmony_ci	else
203062306a36Sopenharmony_ci		*stat = 0;
203162306a36Sopenharmony_ci	return 0;
203262306a36Sopenharmony_ci
203362306a36Sopenharmony_cierror0:
203462306a36Sopenharmony_ci	return error;
203562306a36Sopenharmony_ci}
203662306a36Sopenharmony_ci
203762306a36Sopenharmony_ci/* Find the high key storage area from a regular key. */
203862306a36Sopenharmony_ciunion xfs_btree_key *
203962306a36Sopenharmony_cixfs_btree_high_key_from_key(
204062306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
204162306a36Sopenharmony_ci	union xfs_btree_key	*key)
204262306a36Sopenharmony_ci{
204362306a36Sopenharmony_ci	ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
204462306a36Sopenharmony_ci	return (union xfs_btree_key *)((char *)key +
204562306a36Sopenharmony_ci			(cur->bc_ops->key_len / 2));
204662306a36Sopenharmony_ci}
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci/* Determine the low (and high if overlapped) keys of a leaf block */
204962306a36Sopenharmony_ciSTATIC void
205062306a36Sopenharmony_cixfs_btree_get_leaf_keys(
205162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
205262306a36Sopenharmony_ci	struct xfs_btree_block	*block,
205362306a36Sopenharmony_ci	union xfs_btree_key	*key)
205462306a36Sopenharmony_ci{
205562306a36Sopenharmony_ci	union xfs_btree_key	max_hkey;
205662306a36Sopenharmony_ci	union xfs_btree_key	hkey;
205762306a36Sopenharmony_ci	union xfs_btree_rec	*rec;
205862306a36Sopenharmony_ci	union xfs_btree_key	*high;
205962306a36Sopenharmony_ci	int			n;
206062306a36Sopenharmony_ci
206162306a36Sopenharmony_ci	rec = xfs_btree_rec_addr(cur, 1, block);
206262306a36Sopenharmony_ci	cur->bc_ops->init_key_from_rec(key, rec);
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_ci		cur->bc_ops->init_high_key_from_rec(&max_hkey, rec);
206762306a36Sopenharmony_ci		for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
206862306a36Sopenharmony_ci			rec = xfs_btree_rec_addr(cur, n, block);
206962306a36Sopenharmony_ci			cur->bc_ops->init_high_key_from_rec(&hkey, rec);
207062306a36Sopenharmony_ci			if (xfs_btree_keycmp_gt(cur, &hkey, &max_hkey))
207162306a36Sopenharmony_ci				max_hkey = hkey;
207262306a36Sopenharmony_ci		}
207362306a36Sopenharmony_ci
207462306a36Sopenharmony_ci		high = xfs_btree_high_key_from_key(cur, key);
207562306a36Sopenharmony_ci		memcpy(high, &max_hkey, cur->bc_ops->key_len / 2);
207662306a36Sopenharmony_ci	}
207762306a36Sopenharmony_ci}
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_ci/* Determine the low (and high if overlapped) keys of a node block */
208062306a36Sopenharmony_ciSTATIC void
208162306a36Sopenharmony_cixfs_btree_get_node_keys(
208262306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
208362306a36Sopenharmony_ci	struct xfs_btree_block	*block,
208462306a36Sopenharmony_ci	union xfs_btree_key	*key)
208562306a36Sopenharmony_ci{
208662306a36Sopenharmony_ci	union xfs_btree_key	*hkey;
208762306a36Sopenharmony_ci	union xfs_btree_key	*max_hkey;
208862306a36Sopenharmony_ci	union xfs_btree_key	*high;
208962306a36Sopenharmony_ci	int			n;
209062306a36Sopenharmony_ci
209162306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
209262306a36Sopenharmony_ci		memcpy(key, xfs_btree_key_addr(cur, 1, block),
209362306a36Sopenharmony_ci				cur->bc_ops->key_len / 2);
209462306a36Sopenharmony_ci
209562306a36Sopenharmony_ci		max_hkey = xfs_btree_high_key_addr(cur, 1, block);
209662306a36Sopenharmony_ci		for (n = 2; n <= xfs_btree_get_numrecs(block); n++) {
209762306a36Sopenharmony_ci			hkey = xfs_btree_high_key_addr(cur, n, block);
209862306a36Sopenharmony_ci			if (xfs_btree_keycmp_gt(cur, hkey, max_hkey))
209962306a36Sopenharmony_ci				max_hkey = hkey;
210062306a36Sopenharmony_ci		}
210162306a36Sopenharmony_ci
210262306a36Sopenharmony_ci		high = xfs_btree_high_key_from_key(cur, key);
210362306a36Sopenharmony_ci		memcpy(high, max_hkey, cur->bc_ops->key_len / 2);
210462306a36Sopenharmony_ci	} else {
210562306a36Sopenharmony_ci		memcpy(key, xfs_btree_key_addr(cur, 1, block),
210662306a36Sopenharmony_ci				cur->bc_ops->key_len);
210762306a36Sopenharmony_ci	}
210862306a36Sopenharmony_ci}
210962306a36Sopenharmony_ci
211062306a36Sopenharmony_ci/* Derive the keys for any btree block. */
211162306a36Sopenharmony_civoid
211262306a36Sopenharmony_cixfs_btree_get_keys(
211362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
211462306a36Sopenharmony_ci	struct xfs_btree_block	*block,
211562306a36Sopenharmony_ci	union xfs_btree_key	*key)
211662306a36Sopenharmony_ci{
211762306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_level) == 0)
211862306a36Sopenharmony_ci		xfs_btree_get_leaf_keys(cur, block, key);
211962306a36Sopenharmony_ci	else
212062306a36Sopenharmony_ci		xfs_btree_get_node_keys(cur, block, key);
212162306a36Sopenharmony_ci}
212262306a36Sopenharmony_ci
212362306a36Sopenharmony_ci/*
212462306a36Sopenharmony_ci * Decide if we need to update the parent keys of a btree block.  For
212562306a36Sopenharmony_ci * a standard btree this is only necessary if we're updating the first
212662306a36Sopenharmony_ci * record/key.  For an overlapping btree, we must always update the
212762306a36Sopenharmony_ci * keys because the highest key can be in any of the records or keys
212862306a36Sopenharmony_ci * in the block.
212962306a36Sopenharmony_ci */
213062306a36Sopenharmony_cistatic inline bool
213162306a36Sopenharmony_cixfs_btree_needs_key_update(
213262306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
213362306a36Sopenharmony_ci	int			ptr)
213462306a36Sopenharmony_ci{
213562306a36Sopenharmony_ci	return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1;
213662306a36Sopenharmony_ci}
213762306a36Sopenharmony_ci
213862306a36Sopenharmony_ci/*
213962306a36Sopenharmony_ci * Update the low and high parent keys of the given level, progressing
214062306a36Sopenharmony_ci * towards the root.  If force_all is false, stop if the keys for a given
214162306a36Sopenharmony_ci * level do not need updating.
214262306a36Sopenharmony_ci */
214362306a36Sopenharmony_ciSTATIC int
214462306a36Sopenharmony_ci__xfs_btree_updkeys(
214562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
214662306a36Sopenharmony_ci	int			level,
214762306a36Sopenharmony_ci	struct xfs_btree_block	*block,
214862306a36Sopenharmony_ci	struct xfs_buf		*bp0,
214962306a36Sopenharmony_ci	bool			force_all)
215062306a36Sopenharmony_ci{
215162306a36Sopenharmony_ci	union xfs_btree_key	key;	/* keys from current level */
215262306a36Sopenharmony_ci	union xfs_btree_key	*lkey;	/* keys from the next level up */
215362306a36Sopenharmony_ci	union xfs_btree_key	*hkey;
215462306a36Sopenharmony_ci	union xfs_btree_key	*nlkey;	/* keys from the next level up */
215562306a36Sopenharmony_ci	union xfs_btree_key	*nhkey;
215662306a36Sopenharmony_ci	struct xfs_buf		*bp;
215762306a36Sopenharmony_ci	int			ptr;
215862306a36Sopenharmony_ci
215962306a36Sopenharmony_ci	ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING);
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci	/* Exit if there aren't any parent levels to update. */
216262306a36Sopenharmony_ci	if (level + 1 >= cur->bc_nlevels)
216362306a36Sopenharmony_ci		return 0;
216462306a36Sopenharmony_ci
216562306a36Sopenharmony_ci	trace_xfs_btree_updkeys(cur, level, bp0);
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci	lkey = &key;
216862306a36Sopenharmony_ci	hkey = xfs_btree_high_key_from_key(cur, lkey);
216962306a36Sopenharmony_ci	xfs_btree_get_keys(cur, block, lkey);
217062306a36Sopenharmony_ci	for (level++; level < cur->bc_nlevels; level++) {
217162306a36Sopenharmony_ci#ifdef DEBUG
217262306a36Sopenharmony_ci		int		error;
217362306a36Sopenharmony_ci#endif
217462306a36Sopenharmony_ci		block = xfs_btree_get_block(cur, level, &bp);
217562306a36Sopenharmony_ci		trace_xfs_btree_updkeys(cur, level, bp);
217662306a36Sopenharmony_ci#ifdef DEBUG
217762306a36Sopenharmony_ci		error = xfs_btree_check_block(cur, block, level, bp);
217862306a36Sopenharmony_ci		if (error)
217962306a36Sopenharmony_ci			return error;
218062306a36Sopenharmony_ci#endif
218162306a36Sopenharmony_ci		ptr = cur->bc_levels[level].ptr;
218262306a36Sopenharmony_ci		nlkey = xfs_btree_key_addr(cur, ptr, block);
218362306a36Sopenharmony_ci		nhkey = xfs_btree_high_key_addr(cur, ptr, block);
218462306a36Sopenharmony_ci		if (!force_all &&
218562306a36Sopenharmony_ci		    xfs_btree_keycmp_eq(cur, nlkey, lkey) &&
218662306a36Sopenharmony_ci		    xfs_btree_keycmp_eq(cur, nhkey, hkey))
218762306a36Sopenharmony_ci			break;
218862306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, nlkey, lkey, 1);
218962306a36Sopenharmony_ci		xfs_btree_log_keys(cur, bp, ptr, ptr);
219062306a36Sopenharmony_ci		if (level + 1 >= cur->bc_nlevels)
219162306a36Sopenharmony_ci			break;
219262306a36Sopenharmony_ci		xfs_btree_get_node_keys(cur, block, lkey);
219362306a36Sopenharmony_ci	}
219462306a36Sopenharmony_ci
219562306a36Sopenharmony_ci	return 0;
219662306a36Sopenharmony_ci}
219762306a36Sopenharmony_ci
219862306a36Sopenharmony_ci/* Update all the keys from some level in cursor back to the root. */
219962306a36Sopenharmony_ciSTATIC int
220062306a36Sopenharmony_cixfs_btree_updkeys_force(
220162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
220262306a36Sopenharmony_ci	int			level)
220362306a36Sopenharmony_ci{
220462306a36Sopenharmony_ci	struct xfs_buf		*bp;
220562306a36Sopenharmony_ci	struct xfs_btree_block	*block;
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
220862306a36Sopenharmony_ci	return __xfs_btree_updkeys(cur, level, block, bp, true);
220962306a36Sopenharmony_ci}
221062306a36Sopenharmony_ci
221162306a36Sopenharmony_ci/*
221262306a36Sopenharmony_ci * Update the parent keys of the given level, progressing towards the root.
221362306a36Sopenharmony_ci */
221462306a36Sopenharmony_ciSTATIC int
221562306a36Sopenharmony_cixfs_btree_update_keys(
221662306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
221762306a36Sopenharmony_ci	int			level)
221862306a36Sopenharmony_ci{
221962306a36Sopenharmony_ci	struct xfs_btree_block	*block;
222062306a36Sopenharmony_ci	struct xfs_buf		*bp;
222162306a36Sopenharmony_ci	union xfs_btree_key	*kp;
222262306a36Sopenharmony_ci	union xfs_btree_key	key;
222362306a36Sopenharmony_ci	int			ptr;
222462306a36Sopenharmony_ci
222562306a36Sopenharmony_ci	ASSERT(level >= 0);
222662306a36Sopenharmony_ci
222762306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
222862306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_OVERLAPPING)
222962306a36Sopenharmony_ci		return __xfs_btree_updkeys(cur, level, block, bp, false);
223062306a36Sopenharmony_ci
223162306a36Sopenharmony_ci	/*
223262306a36Sopenharmony_ci	 * Go up the tree from this level toward the root.
223362306a36Sopenharmony_ci	 * At each level, update the key value to the value input.
223462306a36Sopenharmony_ci	 * Stop when we reach a level where the cursor isn't pointing
223562306a36Sopenharmony_ci	 * at the first entry in the block.
223662306a36Sopenharmony_ci	 */
223762306a36Sopenharmony_ci	xfs_btree_get_keys(cur, block, &key);
223862306a36Sopenharmony_ci	for (level++, ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
223962306a36Sopenharmony_ci#ifdef DEBUG
224062306a36Sopenharmony_ci		int		error;
224162306a36Sopenharmony_ci#endif
224262306a36Sopenharmony_ci		block = xfs_btree_get_block(cur, level, &bp);
224362306a36Sopenharmony_ci#ifdef DEBUG
224462306a36Sopenharmony_ci		error = xfs_btree_check_block(cur, block, level, bp);
224562306a36Sopenharmony_ci		if (error)
224662306a36Sopenharmony_ci			return error;
224762306a36Sopenharmony_ci#endif
224862306a36Sopenharmony_ci		ptr = cur->bc_levels[level].ptr;
224962306a36Sopenharmony_ci		kp = xfs_btree_key_addr(cur, ptr, block);
225062306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, kp, &key, 1);
225162306a36Sopenharmony_ci		xfs_btree_log_keys(cur, bp, ptr, ptr);
225262306a36Sopenharmony_ci	}
225362306a36Sopenharmony_ci
225462306a36Sopenharmony_ci	return 0;
225562306a36Sopenharmony_ci}
225662306a36Sopenharmony_ci
225762306a36Sopenharmony_ci/*
225862306a36Sopenharmony_ci * Update the record referred to by cur to the value in the
225962306a36Sopenharmony_ci * given record. This either works (return 0) or gets an
226062306a36Sopenharmony_ci * EFSCORRUPTED error.
226162306a36Sopenharmony_ci */
226262306a36Sopenharmony_ciint
226362306a36Sopenharmony_cixfs_btree_update(
226462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
226562306a36Sopenharmony_ci	union xfs_btree_rec	*rec)
226662306a36Sopenharmony_ci{
226762306a36Sopenharmony_ci	struct xfs_btree_block	*block;
226862306a36Sopenharmony_ci	struct xfs_buf		*bp;
226962306a36Sopenharmony_ci	int			error;
227062306a36Sopenharmony_ci	int			ptr;
227162306a36Sopenharmony_ci	union xfs_btree_rec	*rp;
227262306a36Sopenharmony_ci
227362306a36Sopenharmony_ci	/* Pick up the current block. */
227462306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, 0, &bp);
227562306a36Sopenharmony_ci
227662306a36Sopenharmony_ci#ifdef DEBUG
227762306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, 0, bp);
227862306a36Sopenharmony_ci	if (error)
227962306a36Sopenharmony_ci		goto error0;
228062306a36Sopenharmony_ci#endif
228162306a36Sopenharmony_ci	/* Get the address of the rec to be updated. */
228262306a36Sopenharmony_ci	ptr = cur->bc_levels[0].ptr;
228362306a36Sopenharmony_ci	rp = xfs_btree_rec_addr(cur, ptr, block);
228462306a36Sopenharmony_ci
228562306a36Sopenharmony_ci	/* Fill in the new contents and log them. */
228662306a36Sopenharmony_ci	xfs_btree_copy_recs(cur, rp, rec, 1);
228762306a36Sopenharmony_ci	xfs_btree_log_recs(cur, bp, ptr, ptr);
228862306a36Sopenharmony_ci
228962306a36Sopenharmony_ci	/*
229062306a36Sopenharmony_ci	 * If we are tracking the last record in the tree and
229162306a36Sopenharmony_ci	 * we are at the far right edge of the tree, update it.
229262306a36Sopenharmony_ci	 */
229362306a36Sopenharmony_ci	if (xfs_btree_is_lastrec(cur, block, 0)) {
229462306a36Sopenharmony_ci		cur->bc_ops->update_lastrec(cur, block, rec,
229562306a36Sopenharmony_ci					    ptr, LASTREC_UPDATE);
229662306a36Sopenharmony_ci	}
229762306a36Sopenharmony_ci
229862306a36Sopenharmony_ci	/* Pass new key value up to our parent. */
229962306a36Sopenharmony_ci	if (xfs_btree_needs_key_update(cur, ptr)) {
230062306a36Sopenharmony_ci		error = xfs_btree_update_keys(cur, 0);
230162306a36Sopenharmony_ci		if (error)
230262306a36Sopenharmony_ci			goto error0;
230362306a36Sopenharmony_ci	}
230462306a36Sopenharmony_ci
230562306a36Sopenharmony_ci	return 0;
230662306a36Sopenharmony_ci
230762306a36Sopenharmony_cierror0:
230862306a36Sopenharmony_ci	return error;
230962306a36Sopenharmony_ci}
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_ci/*
231262306a36Sopenharmony_ci * Move 1 record left from cur/level if possible.
231362306a36Sopenharmony_ci * Update cur to reflect the new path.
231462306a36Sopenharmony_ci */
231562306a36Sopenharmony_ciSTATIC int					/* error */
231662306a36Sopenharmony_cixfs_btree_lshift(
231762306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
231862306a36Sopenharmony_ci	int			level,
231962306a36Sopenharmony_ci	int			*stat)		/* success/failure */
232062306a36Sopenharmony_ci{
232162306a36Sopenharmony_ci	struct xfs_buf		*lbp;		/* left buffer pointer */
232262306a36Sopenharmony_ci	struct xfs_btree_block	*left;		/* left btree block */
232362306a36Sopenharmony_ci	int			lrecs;		/* left record count */
232462306a36Sopenharmony_ci	struct xfs_buf		*rbp;		/* right buffer pointer */
232562306a36Sopenharmony_ci	struct xfs_btree_block	*right;		/* right btree block */
232662306a36Sopenharmony_ci	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
232762306a36Sopenharmony_ci	int			rrecs;		/* right record count */
232862306a36Sopenharmony_ci	union xfs_btree_ptr	lptr;		/* left btree pointer */
232962306a36Sopenharmony_ci	union xfs_btree_key	*rkp = NULL;	/* right btree key */
233062306a36Sopenharmony_ci	union xfs_btree_ptr	*rpp = NULL;	/* right address pointer */
233162306a36Sopenharmony_ci	union xfs_btree_rec	*rrp = NULL;	/* right record pointer */
233262306a36Sopenharmony_ci	int			error;		/* error return value */
233362306a36Sopenharmony_ci	int			i;
233462306a36Sopenharmony_ci
233562306a36Sopenharmony_ci	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
233662306a36Sopenharmony_ci	    level == cur->bc_nlevels - 1)
233762306a36Sopenharmony_ci		goto out0;
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci	/* Set up variables for this block as "right". */
234062306a36Sopenharmony_ci	right = xfs_btree_get_block(cur, level, &rbp);
234162306a36Sopenharmony_ci
234262306a36Sopenharmony_ci#ifdef DEBUG
234362306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, right, level, rbp);
234462306a36Sopenharmony_ci	if (error)
234562306a36Sopenharmony_ci		goto error0;
234662306a36Sopenharmony_ci#endif
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_ci	/* If we've got no left sibling then we can't shift an entry left. */
234962306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
235062306a36Sopenharmony_ci	if (xfs_btree_ptr_is_null(cur, &lptr))
235162306a36Sopenharmony_ci		goto out0;
235262306a36Sopenharmony_ci
235362306a36Sopenharmony_ci	/*
235462306a36Sopenharmony_ci	 * If the cursor entry is the one that would be moved, don't
235562306a36Sopenharmony_ci	 * do it... it's too complicated.
235662306a36Sopenharmony_ci	 */
235762306a36Sopenharmony_ci	if (cur->bc_levels[level].ptr <= 1)
235862306a36Sopenharmony_ci		goto out0;
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci	/* Set up the left neighbor as "left". */
236162306a36Sopenharmony_ci	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
236262306a36Sopenharmony_ci	if (error)
236362306a36Sopenharmony_ci		goto error0;
236462306a36Sopenharmony_ci
236562306a36Sopenharmony_ci	/* If it's full, it can't take another entry. */
236662306a36Sopenharmony_ci	lrecs = xfs_btree_get_numrecs(left);
236762306a36Sopenharmony_ci	if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
236862306a36Sopenharmony_ci		goto out0;
236962306a36Sopenharmony_ci
237062306a36Sopenharmony_ci	rrecs = xfs_btree_get_numrecs(right);
237162306a36Sopenharmony_ci
237262306a36Sopenharmony_ci	/*
237362306a36Sopenharmony_ci	 * We add one entry to the left side and remove one for the right side.
237462306a36Sopenharmony_ci	 * Account for it here, the changes will be updated on disk and logged
237562306a36Sopenharmony_ci	 * later.
237662306a36Sopenharmony_ci	 */
237762306a36Sopenharmony_ci	lrecs++;
237862306a36Sopenharmony_ci	rrecs--;
237962306a36Sopenharmony_ci
238062306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, lshift);
238162306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, 1);
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci	/*
238462306a36Sopenharmony_ci	 * If non-leaf, copy a key and a ptr to the left block.
238562306a36Sopenharmony_ci	 * Log the changes to the left block.
238662306a36Sopenharmony_ci	 */
238762306a36Sopenharmony_ci	if (level > 0) {
238862306a36Sopenharmony_ci		/* It's a non-leaf.  Move keys and pointers. */
238962306a36Sopenharmony_ci		union xfs_btree_key	*lkp;	/* left btree key */
239062306a36Sopenharmony_ci		union xfs_btree_ptr	*lpp;	/* left address pointer */
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_ci		lkp = xfs_btree_key_addr(cur, lrecs, left);
239362306a36Sopenharmony_ci		rkp = xfs_btree_key_addr(cur, 1, right);
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ci		lpp = xfs_btree_ptr_addr(cur, lrecs, left);
239662306a36Sopenharmony_ci		rpp = xfs_btree_ptr_addr(cur, 1, right);
239762306a36Sopenharmony_ci
239862306a36Sopenharmony_ci		error = xfs_btree_debug_check_ptr(cur, rpp, 0, level);
239962306a36Sopenharmony_ci		if (error)
240062306a36Sopenharmony_ci			goto error0;
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, lkp, rkp, 1);
240362306a36Sopenharmony_ci		xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci		xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
240662306a36Sopenharmony_ci		xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci		ASSERT(cur->bc_ops->keys_inorder(cur,
240962306a36Sopenharmony_ci			xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
241062306a36Sopenharmony_ci	} else {
241162306a36Sopenharmony_ci		/* It's a leaf.  Move records.  */
241262306a36Sopenharmony_ci		union xfs_btree_rec	*lrp;	/* left record pointer */
241362306a36Sopenharmony_ci
241462306a36Sopenharmony_ci		lrp = xfs_btree_rec_addr(cur, lrecs, left);
241562306a36Sopenharmony_ci		rrp = xfs_btree_rec_addr(cur, 1, right);
241662306a36Sopenharmony_ci
241762306a36Sopenharmony_ci		xfs_btree_copy_recs(cur, lrp, rrp, 1);
241862306a36Sopenharmony_ci		xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci		ASSERT(cur->bc_ops->recs_inorder(cur,
242162306a36Sopenharmony_ci			xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
242262306a36Sopenharmony_ci	}
242362306a36Sopenharmony_ci
242462306a36Sopenharmony_ci	xfs_btree_set_numrecs(left, lrecs);
242562306a36Sopenharmony_ci	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
242662306a36Sopenharmony_ci
242762306a36Sopenharmony_ci	xfs_btree_set_numrecs(right, rrecs);
242862306a36Sopenharmony_ci	xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
242962306a36Sopenharmony_ci
243062306a36Sopenharmony_ci	/*
243162306a36Sopenharmony_ci	 * Slide the contents of right down one entry.
243262306a36Sopenharmony_ci	 */
243362306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
243462306a36Sopenharmony_ci	if (level > 0) {
243562306a36Sopenharmony_ci		/* It's a nonleaf. operate on keys and ptrs */
243662306a36Sopenharmony_ci		for (i = 0; i < rrecs; i++) {
243762306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
243862306a36Sopenharmony_ci			if (error)
243962306a36Sopenharmony_ci				goto error0;
244062306a36Sopenharmony_ci		}
244162306a36Sopenharmony_ci
244262306a36Sopenharmony_ci		xfs_btree_shift_keys(cur,
244362306a36Sopenharmony_ci				xfs_btree_key_addr(cur, 2, right),
244462306a36Sopenharmony_ci				-1, rrecs);
244562306a36Sopenharmony_ci		xfs_btree_shift_ptrs(cur,
244662306a36Sopenharmony_ci				xfs_btree_ptr_addr(cur, 2, right),
244762306a36Sopenharmony_ci				-1, rrecs);
244862306a36Sopenharmony_ci
244962306a36Sopenharmony_ci		xfs_btree_log_keys(cur, rbp, 1, rrecs);
245062306a36Sopenharmony_ci		xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
245162306a36Sopenharmony_ci	} else {
245262306a36Sopenharmony_ci		/* It's a leaf. operate on records */
245362306a36Sopenharmony_ci		xfs_btree_shift_recs(cur,
245462306a36Sopenharmony_ci			xfs_btree_rec_addr(cur, 2, right),
245562306a36Sopenharmony_ci			-1, rrecs);
245662306a36Sopenharmony_ci		xfs_btree_log_recs(cur, rbp, 1, rrecs);
245762306a36Sopenharmony_ci	}
245862306a36Sopenharmony_ci
245962306a36Sopenharmony_ci	/*
246062306a36Sopenharmony_ci	 * Using a temporary cursor, update the parent key values of the
246162306a36Sopenharmony_ci	 * block on the left.
246262306a36Sopenharmony_ci	 */
246362306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
246462306a36Sopenharmony_ci		error = xfs_btree_dup_cursor(cur, &tcur);
246562306a36Sopenharmony_ci		if (error)
246662306a36Sopenharmony_ci			goto error0;
246762306a36Sopenharmony_ci		i = xfs_btree_firstrec(tcur, level);
246862306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
246962306a36Sopenharmony_ci			error = -EFSCORRUPTED;
247062306a36Sopenharmony_ci			goto error0;
247162306a36Sopenharmony_ci		}
247262306a36Sopenharmony_ci
247362306a36Sopenharmony_ci		error = xfs_btree_decrement(tcur, level, &i);
247462306a36Sopenharmony_ci		if (error)
247562306a36Sopenharmony_ci			goto error1;
247662306a36Sopenharmony_ci
247762306a36Sopenharmony_ci		/* Update the parent high keys of the left block, if needed. */
247862306a36Sopenharmony_ci		error = xfs_btree_update_keys(tcur, level);
247962306a36Sopenharmony_ci		if (error)
248062306a36Sopenharmony_ci			goto error1;
248162306a36Sopenharmony_ci
248262306a36Sopenharmony_ci		xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
248362306a36Sopenharmony_ci	}
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_ci	/* Update the parent keys of the right block. */
248662306a36Sopenharmony_ci	error = xfs_btree_update_keys(cur, level);
248762306a36Sopenharmony_ci	if (error)
248862306a36Sopenharmony_ci		goto error0;
248962306a36Sopenharmony_ci
249062306a36Sopenharmony_ci	/* Slide the cursor value left one. */
249162306a36Sopenharmony_ci	cur->bc_levels[level].ptr--;
249262306a36Sopenharmony_ci
249362306a36Sopenharmony_ci	*stat = 1;
249462306a36Sopenharmony_ci	return 0;
249562306a36Sopenharmony_ci
249662306a36Sopenharmony_ciout0:
249762306a36Sopenharmony_ci	*stat = 0;
249862306a36Sopenharmony_ci	return 0;
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_cierror0:
250162306a36Sopenharmony_ci	return error;
250262306a36Sopenharmony_ci
250362306a36Sopenharmony_cierror1:
250462306a36Sopenharmony_ci	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
250562306a36Sopenharmony_ci	return error;
250662306a36Sopenharmony_ci}
250762306a36Sopenharmony_ci
250862306a36Sopenharmony_ci/*
250962306a36Sopenharmony_ci * Move 1 record right from cur/level if possible.
251062306a36Sopenharmony_ci * Update cur to reflect the new path.
251162306a36Sopenharmony_ci */
251262306a36Sopenharmony_ciSTATIC int					/* error */
251362306a36Sopenharmony_cixfs_btree_rshift(
251462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
251562306a36Sopenharmony_ci	int			level,
251662306a36Sopenharmony_ci	int			*stat)		/* success/failure */
251762306a36Sopenharmony_ci{
251862306a36Sopenharmony_ci	struct xfs_buf		*lbp;		/* left buffer pointer */
251962306a36Sopenharmony_ci	struct xfs_btree_block	*left;		/* left btree block */
252062306a36Sopenharmony_ci	struct xfs_buf		*rbp;		/* right buffer pointer */
252162306a36Sopenharmony_ci	struct xfs_btree_block	*right;		/* right btree block */
252262306a36Sopenharmony_ci	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
252362306a36Sopenharmony_ci	union xfs_btree_ptr	rptr;		/* right block pointer */
252462306a36Sopenharmony_ci	union xfs_btree_key	*rkp;		/* right btree key */
252562306a36Sopenharmony_ci	int			rrecs;		/* right record count */
252662306a36Sopenharmony_ci	int			lrecs;		/* left record count */
252762306a36Sopenharmony_ci	int			error;		/* error return value */
252862306a36Sopenharmony_ci	int			i;		/* loop counter */
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_ci	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
253162306a36Sopenharmony_ci	    (level == cur->bc_nlevels - 1))
253262306a36Sopenharmony_ci		goto out0;
253362306a36Sopenharmony_ci
253462306a36Sopenharmony_ci	/* Set up variables for this block as "left". */
253562306a36Sopenharmony_ci	left = xfs_btree_get_block(cur, level, &lbp);
253662306a36Sopenharmony_ci
253762306a36Sopenharmony_ci#ifdef DEBUG
253862306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, left, level, lbp);
253962306a36Sopenharmony_ci	if (error)
254062306a36Sopenharmony_ci		goto error0;
254162306a36Sopenharmony_ci#endif
254262306a36Sopenharmony_ci
254362306a36Sopenharmony_ci	/* If we've got no right sibling then we can't shift an entry right. */
254462306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
254562306a36Sopenharmony_ci	if (xfs_btree_ptr_is_null(cur, &rptr))
254662306a36Sopenharmony_ci		goto out0;
254762306a36Sopenharmony_ci
254862306a36Sopenharmony_ci	/*
254962306a36Sopenharmony_ci	 * If the cursor entry is the one that would be moved, don't
255062306a36Sopenharmony_ci	 * do it... it's too complicated.
255162306a36Sopenharmony_ci	 */
255262306a36Sopenharmony_ci	lrecs = xfs_btree_get_numrecs(left);
255362306a36Sopenharmony_ci	if (cur->bc_levels[level].ptr >= lrecs)
255462306a36Sopenharmony_ci		goto out0;
255562306a36Sopenharmony_ci
255662306a36Sopenharmony_ci	/* Set up the right neighbor as "right". */
255762306a36Sopenharmony_ci	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
255862306a36Sopenharmony_ci	if (error)
255962306a36Sopenharmony_ci		goto error0;
256062306a36Sopenharmony_ci
256162306a36Sopenharmony_ci	/* If it's full, it can't take another entry. */
256262306a36Sopenharmony_ci	rrecs = xfs_btree_get_numrecs(right);
256362306a36Sopenharmony_ci	if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
256462306a36Sopenharmony_ci		goto out0;
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, rshift);
256762306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci	/*
257062306a36Sopenharmony_ci	 * Make a hole at the start of the right neighbor block, then
257162306a36Sopenharmony_ci	 * copy the last left block entry to the hole.
257262306a36Sopenharmony_ci	 */
257362306a36Sopenharmony_ci	if (level > 0) {
257462306a36Sopenharmony_ci		/* It's a nonleaf. make a hole in the keys and ptrs */
257562306a36Sopenharmony_ci		union xfs_btree_key	*lkp;
257662306a36Sopenharmony_ci		union xfs_btree_ptr	*lpp;
257762306a36Sopenharmony_ci		union xfs_btree_ptr	*rpp;
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_ci		lkp = xfs_btree_key_addr(cur, lrecs, left);
258062306a36Sopenharmony_ci		lpp = xfs_btree_ptr_addr(cur, lrecs, left);
258162306a36Sopenharmony_ci		rkp = xfs_btree_key_addr(cur, 1, right);
258262306a36Sopenharmony_ci		rpp = xfs_btree_ptr_addr(cur, 1, right);
258362306a36Sopenharmony_ci
258462306a36Sopenharmony_ci		for (i = rrecs - 1; i >= 0; i--) {
258562306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
258662306a36Sopenharmony_ci			if (error)
258762306a36Sopenharmony_ci				goto error0;
258862306a36Sopenharmony_ci		}
258962306a36Sopenharmony_ci
259062306a36Sopenharmony_ci		xfs_btree_shift_keys(cur, rkp, 1, rrecs);
259162306a36Sopenharmony_ci		xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
259262306a36Sopenharmony_ci
259362306a36Sopenharmony_ci		error = xfs_btree_debug_check_ptr(cur, lpp, 0, level);
259462306a36Sopenharmony_ci		if (error)
259562306a36Sopenharmony_ci			goto error0;
259662306a36Sopenharmony_ci
259762306a36Sopenharmony_ci		/* Now put the new data in, and log it. */
259862306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, rkp, lkp, 1);
259962306a36Sopenharmony_ci		xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci		xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
260262306a36Sopenharmony_ci		xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
260362306a36Sopenharmony_ci
260462306a36Sopenharmony_ci		ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
260562306a36Sopenharmony_ci			xfs_btree_key_addr(cur, 2, right)));
260662306a36Sopenharmony_ci	} else {
260762306a36Sopenharmony_ci		/* It's a leaf. make a hole in the records */
260862306a36Sopenharmony_ci		union xfs_btree_rec	*lrp;
260962306a36Sopenharmony_ci		union xfs_btree_rec	*rrp;
261062306a36Sopenharmony_ci
261162306a36Sopenharmony_ci		lrp = xfs_btree_rec_addr(cur, lrecs, left);
261262306a36Sopenharmony_ci		rrp = xfs_btree_rec_addr(cur, 1, right);
261362306a36Sopenharmony_ci
261462306a36Sopenharmony_ci		xfs_btree_shift_recs(cur, rrp, 1, rrecs);
261562306a36Sopenharmony_ci
261662306a36Sopenharmony_ci		/* Now put the new data in, and log it. */
261762306a36Sopenharmony_ci		xfs_btree_copy_recs(cur, rrp, lrp, 1);
261862306a36Sopenharmony_ci		xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
261962306a36Sopenharmony_ci	}
262062306a36Sopenharmony_ci
262162306a36Sopenharmony_ci	/*
262262306a36Sopenharmony_ci	 * Decrement and log left's numrecs, bump and log right's numrecs.
262362306a36Sopenharmony_ci	 */
262462306a36Sopenharmony_ci	xfs_btree_set_numrecs(left, --lrecs);
262562306a36Sopenharmony_ci	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
262662306a36Sopenharmony_ci
262762306a36Sopenharmony_ci	xfs_btree_set_numrecs(right, ++rrecs);
262862306a36Sopenharmony_ci	xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
262962306a36Sopenharmony_ci
263062306a36Sopenharmony_ci	/*
263162306a36Sopenharmony_ci	 * Using a temporary cursor, update the parent key values of the
263262306a36Sopenharmony_ci	 * block on the right.
263362306a36Sopenharmony_ci	 */
263462306a36Sopenharmony_ci	error = xfs_btree_dup_cursor(cur, &tcur);
263562306a36Sopenharmony_ci	if (error)
263662306a36Sopenharmony_ci		goto error0;
263762306a36Sopenharmony_ci	i = xfs_btree_lastrec(tcur, level);
263862306a36Sopenharmony_ci	if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
263962306a36Sopenharmony_ci		error = -EFSCORRUPTED;
264062306a36Sopenharmony_ci		goto error0;
264162306a36Sopenharmony_ci	}
264262306a36Sopenharmony_ci
264362306a36Sopenharmony_ci	error = xfs_btree_increment(tcur, level, &i);
264462306a36Sopenharmony_ci	if (error)
264562306a36Sopenharmony_ci		goto error1;
264662306a36Sopenharmony_ci
264762306a36Sopenharmony_ci	/* Update the parent high keys of the left block, if needed. */
264862306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
264962306a36Sopenharmony_ci		error = xfs_btree_update_keys(cur, level);
265062306a36Sopenharmony_ci		if (error)
265162306a36Sopenharmony_ci			goto error1;
265262306a36Sopenharmony_ci	}
265362306a36Sopenharmony_ci
265462306a36Sopenharmony_ci	/* Update the parent keys of the right block. */
265562306a36Sopenharmony_ci	error = xfs_btree_update_keys(tcur, level);
265662306a36Sopenharmony_ci	if (error)
265762306a36Sopenharmony_ci		goto error1;
265862306a36Sopenharmony_ci
265962306a36Sopenharmony_ci	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci	*stat = 1;
266262306a36Sopenharmony_ci	return 0;
266362306a36Sopenharmony_ci
266462306a36Sopenharmony_ciout0:
266562306a36Sopenharmony_ci	*stat = 0;
266662306a36Sopenharmony_ci	return 0;
266762306a36Sopenharmony_ci
266862306a36Sopenharmony_cierror0:
266962306a36Sopenharmony_ci	return error;
267062306a36Sopenharmony_ci
267162306a36Sopenharmony_cierror1:
267262306a36Sopenharmony_ci	xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
267362306a36Sopenharmony_ci	return error;
267462306a36Sopenharmony_ci}
267562306a36Sopenharmony_ci
267662306a36Sopenharmony_ci/*
267762306a36Sopenharmony_ci * Split cur/level block in half.
267862306a36Sopenharmony_ci * Return new block number and the key to its first
267962306a36Sopenharmony_ci * record (to be inserted into parent).
268062306a36Sopenharmony_ci */
268162306a36Sopenharmony_ciSTATIC int					/* error */
268262306a36Sopenharmony_ci__xfs_btree_split(
268362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
268462306a36Sopenharmony_ci	int			level,
268562306a36Sopenharmony_ci	union xfs_btree_ptr	*ptrp,
268662306a36Sopenharmony_ci	union xfs_btree_key	*key,
268762306a36Sopenharmony_ci	struct xfs_btree_cur	**curp,
268862306a36Sopenharmony_ci	int			*stat)		/* success/failure */
268962306a36Sopenharmony_ci{
269062306a36Sopenharmony_ci	union xfs_btree_ptr	lptr;		/* left sibling block ptr */
269162306a36Sopenharmony_ci	struct xfs_buf		*lbp;		/* left buffer pointer */
269262306a36Sopenharmony_ci	struct xfs_btree_block	*left;		/* left btree block */
269362306a36Sopenharmony_ci	union xfs_btree_ptr	rptr;		/* right sibling block ptr */
269462306a36Sopenharmony_ci	struct xfs_buf		*rbp;		/* right buffer pointer */
269562306a36Sopenharmony_ci	struct xfs_btree_block	*right;		/* right btree block */
269662306a36Sopenharmony_ci	union xfs_btree_ptr	rrptr;		/* right-right sibling ptr */
269762306a36Sopenharmony_ci	struct xfs_buf		*rrbp;		/* right-right buffer pointer */
269862306a36Sopenharmony_ci	struct xfs_btree_block	*rrblock;	/* right-right btree block */
269962306a36Sopenharmony_ci	int			lrecs;
270062306a36Sopenharmony_ci	int			rrecs;
270162306a36Sopenharmony_ci	int			src_index;
270262306a36Sopenharmony_ci	int			error;		/* error return value */
270362306a36Sopenharmony_ci	int			i;
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, split);
270662306a36Sopenharmony_ci
270762306a36Sopenharmony_ci	/* Set up left block (current one). */
270862306a36Sopenharmony_ci	left = xfs_btree_get_block(cur, level, &lbp);
270962306a36Sopenharmony_ci
271062306a36Sopenharmony_ci#ifdef DEBUG
271162306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, left, level, lbp);
271262306a36Sopenharmony_ci	if (error)
271362306a36Sopenharmony_ci		goto error0;
271462306a36Sopenharmony_ci#endif
271562306a36Sopenharmony_ci
271662306a36Sopenharmony_ci	xfs_btree_buf_to_ptr(cur, lbp, &lptr);
271762306a36Sopenharmony_ci
271862306a36Sopenharmony_ci	/* Allocate the new block. If we can't do it, we're toast. Give up. */
271962306a36Sopenharmony_ci	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);
272062306a36Sopenharmony_ci	if (error)
272162306a36Sopenharmony_ci		goto error0;
272262306a36Sopenharmony_ci	if (*stat == 0)
272362306a36Sopenharmony_ci		goto out0;
272462306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, alloc);
272562306a36Sopenharmony_ci
272662306a36Sopenharmony_ci	/* Set up the new block as "right". */
272762306a36Sopenharmony_ci	error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
272862306a36Sopenharmony_ci	if (error)
272962306a36Sopenharmony_ci		goto error0;
273062306a36Sopenharmony_ci
273162306a36Sopenharmony_ci	/* Fill in the btree header for the new right block. */
273262306a36Sopenharmony_ci	xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);
273362306a36Sopenharmony_ci
273462306a36Sopenharmony_ci	/*
273562306a36Sopenharmony_ci	 * Split the entries between the old and the new block evenly.
273662306a36Sopenharmony_ci	 * Make sure that if there's an odd number of entries now, that
273762306a36Sopenharmony_ci	 * each new block will have the same number of entries.
273862306a36Sopenharmony_ci	 */
273962306a36Sopenharmony_ci	lrecs = xfs_btree_get_numrecs(left);
274062306a36Sopenharmony_ci	rrecs = lrecs / 2;
274162306a36Sopenharmony_ci	if ((lrecs & 1) && cur->bc_levels[level].ptr <= rrecs + 1)
274262306a36Sopenharmony_ci		rrecs++;
274362306a36Sopenharmony_ci	src_index = (lrecs - rrecs + 1);
274462306a36Sopenharmony_ci
274562306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
274662306a36Sopenharmony_ci
274762306a36Sopenharmony_ci	/* Adjust numrecs for the later get_*_keys() calls. */
274862306a36Sopenharmony_ci	lrecs -= rrecs;
274962306a36Sopenharmony_ci	xfs_btree_set_numrecs(left, lrecs);
275062306a36Sopenharmony_ci	xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
275162306a36Sopenharmony_ci
275262306a36Sopenharmony_ci	/*
275362306a36Sopenharmony_ci	 * Copy btree block entries from the left block over to the
275462306a36Sopenharmony_ci	 * new block, the right. Update the right block and log the
275562306a36Sopenharmony_ci	 * changes.
275662306a36Sopenharmony_ci	 */
275762306a36Sopenharmony_ci	if (level > 0) {
275862306a36Sopenharmony_ci		/* It's a non-leaf.  Move keys and pointers. */
275962306a36Sopenharmony_ci		union xfs_btree_key	*lkp;	/* left btree key */
276062306a36Sopenharmony_ci		union xfs_btree_ptr	*lpp;	/* left address pointer */
276162306a36Sopenharmony_ci		union xfs_btree_key	*rkp;	/* right btree key */
276262306a36Sopenharmony_ci		union xfs_btree_ptr	*rpp;	/* right address pointer */
276362306a36Sopenharmony_ci
276462306a36Sopenharmony_ci		lkp = xfs_btree_key_addr(cur, src_index, left);
276562306a36Sopenharmony_ci		lpp = xfs_btree_ptr_addr(cur, src_index, left);
276662306a36Sopenharmony_ci		rkp = xfs_btree_key_addr(cur, 1, right);
276762306a36Sopenharmony_ci		rpp = xfs_btree_ptr_addr(cur, 1, right);
276862306a36Sopenharmony_ci
276962306a36Sopenharmony_ci		for (i = src_index; i < rrecs; i++) {
277062306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
277162306a36Sopenharmony_ci			if (error)
277262306a36Sopenharmony_ci				goto error0;
277362306a36Sopenharmony_ci		}
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci		/* Copy the keys & pointers to the new block. */
277662306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
277762306a36Sopenharmony_ci		xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
277862306a36Sopenharmony_ci
277962306a36Sopenharmony_ci		xfs_btree_log_keys(cur, rbp, 1, rrecs);
278062306a36Sopenharmony_ci		xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
278162306a36Sopenharmony_ci
278262306a36Sopenharmony_ci		/* Stash the keys of the new block for later insertion. */
278362306a36Sopenharmony_ci		xfs_btree_get_node_keys(cur, right, key);
278462306a36Sopenharmony_ci	} else {
278562306a36Sopenharmony_ci		/* It's a leaf.  Move records.  */
278662306a36Sopenharmony_ci		union xfs_btree_rec	*lrp;	/* left record pointer */
278762306a36Sopenharmony_ci		union xfs_btree_rec	*rrp;	/* right record pointer */
278862306a36Sopenharmony_ci
278962306a36Sopenharmony_ci		lrp = xfs_btree_rec_addr(cur, src_index, left);
279062306a36Sopenharmony_ci		rrp = xfs_btree_rec_addr(cur, 1, right);
279162306a36Sopenharmony_ci
279262306a36Sopenharmony_ci		/* Copy records to the new block. */
279362306a36Sopenharmony_ci		xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
279462306a36Sopenharmony_ci		xfs_btree_log_recs(cur, rbp, 1, rrecs);
279562306a36Sopenharmony_ci
279662306a36Sopenharmony_ci		/* Stash the keys of the new block for later insertion. */
279762306a36Sopenharmony_ci		xfs_btree_get_leaf_keys(cur, right, key);
279862306a36Sopenharmony_ci	}
279962306a36Sopenharmony_ci
280062306a36Sopenharmony_ci	/*
280162306a36Sopenharmony_ci	 * Find the left block number by looking in the buffer.
280262306a36Sopenharmony_ci	 * Adjust sibling pointers.
280362306a36Sopenharmony_ci	 */
280462306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
280562306a36Sopenharmony_ci	xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
280662306a36Sopenharmony_ci	xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
280762306a36Sopenharmony_ci	xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_ci	xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
281062306a36Sopenharmony_ci	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
281162306a36Sopenharmony_ci
281262306a36Sopenharmony_ci	/*
281362306a36Sopenharmony_ci	 * If there's a block to the new block's right, make that block
281462306a36Sopenharmony_ci	 * point back to right instead of to left.
281562306a36Sopenharmony_ci	 */
281662306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
281762306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, &rrptr,
281862306a36Sopenharmony_ci							0, &rrblock, &rrbp);
281962306a36Sopenharmony_ci		if (error)
282062306a36Sopenharmony_ci			goto error0;
282162306a36Sopenharmony_ci		xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
282262306a36Sopenharmony_ci		xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
282362306a36Sopenharmony_ci	}
282462306a36Sopenharmony_ci
282562306a36Sopenharmony_ci	/* Update the parent high keys of the left block, if needed. */
282662306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_OVERLAPPING) {
282762306a36Sopenharmony_ci		error = xfs_btree_update_keys(cur, level);
282862306a36Sopenharmony_ci		if (error)
282962306a36Sopenharmony_ci			goto error0;
283062306a36Sopenharmony_ci	}
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ci	/*
283362306a36Sopenharmony_ci	 * If the cursor is really in the right block, move it there.
283462306a36Sopenharmony_ci	 * If it's just pointing past the last entry in left, then we'll
283562306a36Sopenharmony_ci	 * insert there, so don't change anything in that case.
283662306a36Sopenharmony_ci	 */
283762306a36Sopenharmony_ci	if (cur->bc_levels[level].ptr > lrecs + 1) {
283862306a36Sopenharmony_ci		xfs_btree_setbuf(cur, level, rbp);
283962306a36Sopenharmony_ci		cur->bc_levels[level].ptr -= lrecs;
284062306a36Sopenharmony_ci	}
284162306a36Sopenharmony_ci	/*
284262306a36Sopenharmony_ci	 * If there are more levels, we'll need another cursor which refers
284362306a36Sopenharmony_ci	 * the right block, no matter where this cursor was.
284462306a36Sopenharmony_ci	 */
284562306a36Sopenharmony_ci	if (level + 1 < cur->bc_nlevels) {
284662306a36Sopenharmony_ci		error = xfs_btree_dup_cursor(cur, curp);
284762306a36Sopenharmony_ci		if (error)
284862306a36Sopenharmony_ci			goto error0;
284962306a36Sopenharmony_ci		(*curp)->bc_levels[level + 1].ptr++;
285062306a36Sopenharmony_ci	}
285162306a36Sopenharmony_ci	*ptrp = rptr;
285262306a36Sopenharmony_ci	*stat = 1;
285362306a36Sopenharmony_ci	return 0;
285462306a36Sopenharmony_ciout0:
285562306a36Sopenharmony_ci	*stat = 0;
285662306a36Sopenharmony_ci	return 0;
285762306a36Sopenharmony_ci
285862306a36Sopenharmony_cierror0:
285962306a36Sopenharmony_ci	return error;
286062306a36Sopenharmony_ci}
286162306a36Sopenharmony_ci
286262306a36Sopenharmony_ci#ifdef __KERNEL__
286362306a36Sopenharmony_cistruct xfs_btree_split_args {
286462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur;
286562306a36Sopenharmony_ci	int			level;
286662306a36Sopenharmony_ci	union xfs_btree_ptr	*ptrp;
286762306a36Sopenharmony_ci	union xfs_btree_key	*key;
286862306a36Sopenharmony_ci	struct xfs_btree_cur	**curp;
286962306a36Sopenharmony_ci	int			*stat;		/* success/failure */
287062306a36Sopenharmony_ci	int			result;
287162306a36Sopenharmony_ci	bool			kswapd;	/* allocation in kswapd context */
287262306a36Sopenharmony_ci	struct completion	*done;
287362306a36Sopenharmony_ci	struct work_struct	work;
287462306a36Sopenharmony_ci};
287562306a36Sopenharmony_ci
287662306a36Sopenharmony_ci/*
287762306a36Sopenharmony_ci * Stack switching interfaces for allocation
287862306a36Sopenharmony_ci */
287962306a36Sopenharmony_cistatic void
288062306a36Sopenharmony_cixfs_btree_split_worker(
288162306a36Sopenharmony_ci	struct work_struct	*work)
288262306a36Sopenharmony_ci{
288362306a36Sopenharmony_ci	struct xfs_btree_split_args	*args = container_of(work,
288462306a36Sopenharmony_ci						struct xfs_btree_split_args, work);
288562306a36Sopenharmony_ci	unsigned long		pflags;
288662306a36Sopenharmony_ci	unsigned long		new_pflags = 0;
288762306a36Sopenharmony_ci
288862306a36Sopenharmony_ci	/*
288962306a36Sopenharmony_ci	 * we are in a transaction context here, but may also be doing work
289062306a36Sopenharmony_ci	 * in kswapd context, and hence we may need to inherit that state
289162306a36Sopenharmony_ci	 * temporarily to ensure that we don't block waiting for memory reclaim
289262306a36Sopenharmony_ci	 * in any way.
289362306a36Sopenharmony_ci	 */
289462306a36Sopenharmony_ci	if (args->kswapd)
289562306a36Sopenharmony_ci		new_pflags |= PF_MEMALLOC | PF_KSWAPD;
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_ci	current_set_flags_nested(&pflags, new_pflags);
289862306a36Sopenharmony_ci	xfs_trans_set_context(args->cur->bc_tp);
289962306a36Sopenharmony_ci
290062306a36Sopenharmony_ci	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
290162306a36Sopenharmony_ci					 args->key, args->curp, args->stat);
290262306a36Sopenharmony_ci
290362306a36Sopenharmony_ci	xfs_trans_clear_context(args->cur->bc_tp);
290462306a36Sopenharmony_ci	current_restore_flags_nested(&pflags, new_pflags);
290562306a36Sopenharmony_ci
290662306a36Sopenharmony_ci	/*
290762306a36Sopenharmony_ci	 * Do not access args after complete() has run here. We don't own args
290862306a36Sopenharmony_ci	 * and the owner may run and free args before we return here.
290962306a36Sopenharmony_ci	 */
291062306a36Sopenharmony_ci	complete(args->done);
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_ci}
291362306a36Sopenharmony_ci
291462306a36Sopenharmony_ci/*
291562306a36Sopenharmony_ci * BMBT split requests often come in with little stack to work on so we push
291662306a36Sopenharmony_ci * them off to a worker thread so there is lots of stack to use. For the other
291762306a36Sopenharmony_ci * btree types, just call directly to avoid the context switch overhead here.
291862306a36Sopenharmony_ci *
291962306a36Sopenharmony_ci * Care must be taken here - the work queue rescuer thread introduces potential
292062306a36Sopenharmony_ci * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new
292162306a36Sopenharmony_ci * AGFs to allocate blocks. A task being run by the rescuer could attempt to
292262306a36Sopenharmony_ci * lock an AGF that is already locked by a task queued to run by the rescuer,
292362306a36Sopenharmony_ci * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to
292462306a36Sopenharmony_ci * release it until the current thread it is running gains the lock.
292562306a36Sopenharmony_ci *
292662306a36Sopenharmony_ci * To avoid this issue, we only ever queue BMBT splits that don't have an AGF
292762306a36Sopenharmony_ci * already locked to allocate from. The only place that doesn't hold an AGF
292862306a36Sopenharmony_ci * locked is unwritten extent conversion at IO completion, but that has already
292962306a36Sopenharmony_ci * been offloaded to a worker thread and hence has no stack consumption issues
293062306a36Sopenharmony_ci * we have to worry about.
293162306a36Sopenharmony_ci */
293262306a36Sopenharmony_ciSTATIC int					/* error */
293362306a36Sopenharmony_cixfs_btree_split(
293462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
293562306a36Sopenharmony_ci	int			level,
293662306a36Sopenharmony_ci	union xfs_btree_ptr	*ptrp,
293762306a36Sopenharmony_ci	union xfs_btree_key	*key,
293862306a36Sopenharmony_ci	struct xfs_btree_cur	**curp,
293962306a36Sopenharmony_ci	int			*stat)		/* success/failure */
294062306a36Sopenharmony_ci{
294162306a36Sopenharmony_ci	struct xfs_btree_split_args	args;
294262306a36Sopenharmony_ci	DECLARE_COMPLETION_ONSTACK(done);
294362306a36Sopenharmony_ci
294462306a36Sopenharmony_ci	if (cur->bc_btnum != XFS_BTNUM_BMAP ||
294562306a36Sopenharmony_ci	    cur->bc_tp->t_highest_agno == NULLAGNUMBER)
294662306a36Sopenharmony_ci		return __xfs_btree_split(cur, level, ptrp, key, curp, stat);
294762306a36Sopenharmony_ci
294862306a36Sopenharmony_ci	args.cur = cur;
294962306a36Sopenharmony_ci	args.level = level;
295062306a36Sopenharmony_ci	args.ptrp = ptrp;
295162306a36Sopenharmony_ci	args.key = key;
295262306a36Sopenharmony_ci	args.curp = curp;
295362306a36Sopenharmony_ci	args.stat = stat;
295462306a36Sopenharmony_ci	args.done = &done;
295562306a36Sopenharmony_ci	args.kswapd = current_is_kswapd();
295662306a36Sopenharmony_ci	INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker);
295762306a36Sopenharmony_ci	queue_work(xfs_alloc_wq, &args.work);
295862306a36Sopenharmony_ci	wait_for_completion(&done);
295962306a36Sopenharmony_ci	destroy_work_on_stack(&args.work);
296062306a36Sopenharmony_ci	return args.result;
296162306a36Sopenharmony_ci}
296262306a36Sopenharmony_ci#else
296362306a36Sopenharmony_ci#define xfs_btree_split	__xfs_btree_split
296462306a36Sopenharmony_ci#endif /* __KERNEL__ */
296562306a36Sopenharmony_ci
296662306a36Sopenharmony_ci
296762306a36Sopenharmony_ci/*
296862306a36Sopenharmony_ci * Copy the old inode root contents into a real block and make the
296962306a36Sopenharmony_ci * broot point to it.
297062306a36Sopenharmony_ci */
297162306a36Sopenharmony_ciint						/* error */
297262306a36Sopenharmony_cixfs_btree_new_iroot(
297362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,		/* btree cursor */
297462306a36Sopenharmony_ci	int			*logflags,	/* logging flags for inode */
297562306a36Sopenharmony_ci	int			*stat)		/* return status - 0 fail */
297662306a36Sopenharmony_ci{
297762306a36Sopenharmony_ci	struct xfs_buf		*cbp;		/* buffer for cblock */
297862306a36Sopenharmony_ci	struct xfs_btree_block	*block;		/* btree block */
297962306a36Sopenharmony_ci	struct xfs_btree_block	*cblock;	/* child btree block */
298062306a36Sopenharmony_ci	union xfs_btree_key	*ckp;		/* child key pointer */
298162306a36Sopenharmony_ci	union xfs_btree_ptr	*cpp;		/* child ptr pointer */
298262306a36Sopenharmony_ci	union xfs_btree_key	*kp;		/* pointer to btree key */
298362306a36Sopenharmony_ci	union xfs_btree_ptr	*pp;		/* pointer to block addr */
298462306a36Sopenharmony_ci	union xfs_btree_ptr	nptr;		/* new block addr */
298562306a36Sopenharmony_ci	int			level;		/* btree level */
298662306a36Sopenharmony_ci	int			error;		/* error return code */
298762306a36Sopenharmony_ci	int			i;		/* loop counter */
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, newroot);
299062306a36Sopenharmony_ci
299162306a36Sopenharmony_ci	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
299262306a36Sopenharmony_ci
299362306a36Sopenharmony_ci	level = cur->bc_nlevels - 1;
299462306a36Sopenharmony_ci
299562306a36Sopenharmony_ci	block = xfs_btree_get_iroot(cur);
299662306a36Sopenharmony_ci	pp = xfs_btree_ptr_addr(cur, 1, block);
299762306a36Sopenharmony_ci
299862306a36Sopenharmony_ci	/* Allocate the new block. If we can't do it, we're toast. Give up. */
299962306a36Sopenharmony_ci	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);
300062306a36Sopenharmony_ci	if (error)
300162306a36Sopenharmony_ci		goto error0;
300262306a36Sopenharmony_ci	if (*stat == 0)
300362306a36Sopenharmony_ci		return 0;
300462306a36Sopenharmony_ci
300562306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, alloc);
300662306a36Sopenharmony_ci
300762306a36Sopenharmony_ci	/* Copy the root into a real block. */
300862306a36Sopenharmony_ci	error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
300962306a36Sopenharmony_ci	if (error)
301062306a36Sopenharmony_ci		goto error0;
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_ci	/*
301362306a36Sopenharmony_ci	 * we can't just memcpy() the root in for CRC enabled btree blocks.
301462306a36Sopenharmony_ci	 * In that case have to also ensure the blkno remains correct
301562306a36Sopenharmony_ci	 */
301662306a36Sopenharmony_ci	memcpy(cblock, block, xfs_btree_block_len(cur));
301762306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) {
301862306a36Sopenharmony_ci		__be64 bno = cpu_to_be64(xfs_buf_daddr(cbp));
301962306a36Sopenharmony_ci		if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
302062306a36Sopenharmony_ci			cblock->bb_u.l.bb_blkno = bno;
302162306a36Sopenharmony_ci		else
302262306a36Sopenharmony_ci			cblock->bb_u.s.bb_blkno = bno;
302362306a36Sopenharmony_ci	}
302462306a36Sopenharmony_ci
302562306a36Sopenharmony_ci	be16_add_cpu(&block->bb_level, 1);
302662306a36Sopenharmony_ci	xfs_btree_set_numrecs(block, 1);
302762306a36Sopenharmony_ci	cur->bc_nlevels++;
302862306a36Sopenharmony_ci	ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
302962306a36Sopenharmony_ci	cur->bc_levels[level + 1].ptr = 1;
303062306a36Sopenharmony_ci
303162306a36Sopenharmony_ci	kp = xfs_btree_key_addr(cur, 1, block);
303262306a36Sopenharmony_ci	ckp = xfs_btree_key_addr(cur, 1, cblock);
303362306a36Sopenharmony_ci	xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
303662306a36Sopenharmony_ci	for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
303762306a36Sopenharmony_ci		error = xfs_btree_debug_check_ptr(cur, pp, i, level);
303862306a36Sopenharmony_ci		if (error)
303962306a36Sopenharmony_ci			goto error0;
304062306a36Sopenharmony_ci	}
304162306a36Sopenharmony_ci
304262306a36Sopenharmony_ci	xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
304362306a36Sopenharmony_ci
304462306a36Sopenharmony_ci	error = xfs_btree_debug_check_ptr(cur, &nptr, 0, level);
304562306a36Sopenharmony_ci	if (error)
304662306a36Sopenharmony_ci		goto error0;
304762306a36Sopenharmony_ci
304862306a36Sopenharmony_ci	xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
304962306a36Sopenharmony_ci
305062306a36Sopenharmony_ci	xfs_iroot_realloc(cur->bc_ino.ip,
305162306a36Sopenharmony_ci			  1 - xfs_btree_get_numrecs(cblock),
305262306a36Sopenharmony_ci			  cur->bc_ino.whichfork);
305362306a36Sopenharmony_ci
305462306a36Sopenharmony_ci	xfs_btree_setbuf(cur, level, cbp);
305562306a36Sopenharmony_ci
305662306a36Sopenharmony_ci	/*
305762306a36Sopenharmony_ci	 * Do all this logging at the end so that
305862306a36Sopenharmony_ci	 * the root is at the right level.
305962306a36Sopenharmony_ci	 */
306062306a36Sopenharmony_ci	xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
306162306a36Sopenharmony_ci	xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
306262306a36Sopenharmony_ci	xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
306362306a36Sopenharmony_ci
306462306a36Sopenharmony_ci	*logflags |=
306562306a36Sopenharmony_ci		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork);
306662306a36Sopenharmony_ci	*stat = 1;
306762306a36Sopenharmony_ci	return 0;
306862306a36Sopenharmony_cierror0:
306962306a36Sopenharmony_ci	return error;
307062306a36Sopenharmony_ci}
307162306a36Sopenharmony_ci
307262306a36Sopenharmony_ci/*
307362306a36Sopenharmony_ci * Allocate a new root block, fill it in.
307462306a36Sopenharmony_ci */
307562306a36Sopenharmony_ciSTATIC int				/* error */
307662306a36Sopenharmony_cixfs_btree_new_root(
307762306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
307862306a36Sopenharmony_ci	int			*stat)	/* success/failure */
307962306a36Sopenharmony_ci{
308062306a36Sopenharmony_ci	struct xfs_btree_block	*block;	/* one half of the old root block */
308162306a36Sopenharmony_ci	struct xfs_buf		*bp;	/* buffer containing block */
308262306a36Sopenharmony_ci	int			error;	/* error return value */
308362306a36Sopenharmony_ci	struct xfs_buf		*lbp;	/* left buffer pointer */
308462306a36Sopenharmony_ci	struct xfs_btree_block	*left;	/* left btree block */
308562306a36Sopenharmony_ci	struct xfs_buf		*nbp;	/* new (root) buffer */
308662306a36Sopenharmony_ci	struct xfs_btree_block	*new;	/* new (root) btree block */
308762306a36Sopenharmony_ci	int			nptr;	/* new value for key index, 1 or 2 */
308862306a36Sopenharmony_ci	struct xfs_buf		*rbp;	/* right buffer pointer */
308962306a36Sopenharmony_ci	struct xfs_btree_block	*right;	/* right btree block */
309062306a36Sopenharmony_ci	union xfs_btree_ptr	rptr;
309162306a36Sopenharmony_ci	union xfs_btree_ptr	lptr;
309262306a36Sopenharmony_ci
309362306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, newroot);
309462306a36Sopenharmony_ci
309562306a36Sopenharmony_ci	/* initialise our start point from the cursor */
309662306a36Sopenharmony_ci	cur->bc_ops->init_ptr_from_cur(cur, &rptr);
309762306a36Sopenharmony_ci
309862306a36Sopenharmony_ci	/* Allocate the new block. If we can't do it, we're toast. Give up. */
309962306a36Sopenharmony_ci	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);
310062306a36Sopenharmony_ci	if (error)
310162306a36Sopenharmony_ci		goto error0;
310262306a36Sopenharmony_ci	if (*stat == 0)
310362306a36Sopenharmony_ci		goto out0;
310462306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, alloc);
310562306a36Sopenharmony_ci
310662306a36Sopenharmony_ci	/* Set up the new block. */
310762306a36Sopenharmony_ci	error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
310862306a36Sopenharmony_ci	if (error)
310962306a36Sopenharmony_ci		goto error0;
311062306a36Sopenharmony_ci
311162306a36Sopenharmony_ci	/* Set the root in the holding structure  increasing the level by 1. */
311262306a36Sopenharmony_ci	cur->bc_ops->set_root(cur, &lptr, 1);
311362306a36Sopenharmony_ci
311462306a36Sopenharmony_ci	/*
311562306a36Sopenharmony_ci	 * At the previous root level there are now two blocks: the old root,
311662306a36Sopenharmony_ci	 * and the new block generated when it was split.  We don't know which
311762306a36Sopenharmony_ci	 * one the cursor is pointing at, so we set up variables "left" and
311862306a36Sopenharmony_ci	 * "right" for each case.
311962306a36Sopenharmony_ci	 */
312062306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
312162306a36Sopenharmony_ci
312262306a36Sopenharmony_ci#ifdef DEBUG
312362306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
312462306a36Sopenharmony_ci	if (error)
312562306a36Sopenharmony_ci		goto error0;
312662306a36Sopenharmony_ci#endif
312762306a36Sopenharmony_ci
312862306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
312962306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &rptr)) {
313062306a36Sopenharmony_ci		/* Our block is left, pick up the right block. */
313162306a36Sopenharmony_ci		lbp = bp;
313262306a36Sopenharmony_ci		xfs_btree_buf_to_ptr(cur, lbp, &lptr);
313362306a36Sopenharmony_ci		left = block;
313462306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
313562306a36Sopenharmony_ci		if (error)
313662306a36Sopenharmony_ci			goto error0;
313762306a36Sopenharmony_ci		bp = rbp;
313862306a36Sopenharmony_ci		nptr = 1;
313962306a36Sopenharmony_ci	} else {
314062306a36Sopenharmony_ci		/* Our block is right, pick up the left block. */
314162306a36Sopenharmony_ci		rbp = bp;
314262306a36Sopenharmony_ci		xfs_btree_buf_to_ptr(cur, rbp, &rptr);
314362306a36Sopenharmony_ci		right = block;
314462306a36Sopenharmony_ci		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
314562306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
314662306a36Sopenharmony_ci		if (error)
314762306a36Sopenharmony_ci			goto error0;
314862306a36Sopenharmony_ci		bp = lbp;
314962306a36Sopenharmony_ci		nptr = 2;
315062306a36Sopenharmony_ci	}
315162306a36Sopenharmony_ci
315262306a36Sopenharmony_ci	/* Fill in the new block's btree header and log it. */
315362306a36Sopenharmony_ci	xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);
315462306a36Sopenharmony_ci	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
315562306a36Sopenharmony_ci	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
315662306a36Sopenharmony_ci			!xfs_btree_ptr_is_null(cur, &rptr));
315762306a36Sopenharmony_ci
315862306a36Sopenharmony_ci	/* Fill in the key data in the new root. */
315962306a36Sopenharmony_ci	if (xfs_btree_get_level(left) > 0) {
316062306a36Sopenharmony_ci		/*
316162306a36Sopenharmony_ci		 * Get the keys for the left block's keys and put them directly
316262306a36Sopenharmony_ci		 * in the parent block.  Do the same for the right block.
316362306a36Sopenharmony_ci		 */
316462306a36Sopenharmony_ci		xfs_btree_get_node_keys(cur, left,
316562306a36Sopenharmony_ci				xfs_btree_key_addr(cur, 1, new));
316662306a36Sopenharmony_ci		xfs_btree_get_node_keys(cur, right,
316762306a36Sopenharmony_ci				xfs_btree_key_addr(cur, 2, new));
316862306a36Sopenharmony_ci	} else {
316962306a36Sopenharmony_ci		/*
317062306a36Sopenharmony_ci		 * Get the keys for the left block's records and put them
317162306a36Sopenharmony_ci		 * directly in the parent block.  Do the same for the right
317262306a36Sopenharmony_ci		 * block.
317362306a36Sopenharmony_ci		 */
317462306a36Sopenharmony_ci		xfs_btree_get_leaf_keys(cur, left,
317562306a36Sopenharmony_ci			xfs_btree_key_addr(cur, 1, new));
317662306a36Sopenharmony_ci		xfs_btree_get_leaf_keys(cur, right,
317762306a36Sopenharmony_ci			xfs_btree_key_addr(cur, 2, new));
317862306a36Sopenharmony_ci	}
317962306a36Sopenharmony_ci	xfs_btree_log_keys(cur, nbp, 1, 2);
318062306a36Sopenharmony_ci
318162306a36Sopenharmony_ci	/* Fill in the pointer data in the new root. */
318262306a36Sopenharmony_ci	xfs_btree_copy_ptrs(cur,
318362306a36Sopenharmony_ci		xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
318462306a36Sopenharmony_ci	xfs_btree_copy_ptrs(cur,
318562306a36Sopenharmony_ci		xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
318662306a36Sopenharmony_ci	xfs_btree_log_ptrs(cur, nbp, 1, 2);
318762306a36Sopenharmony_ci
318862306a36Sopenharmony_ci	/* Fix up the cursor. */
318962306a36Sopenharmony_ci	xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
319062306a36Sopenharmony_ci	cur->bc_levels[cur->bc_nlevels].ptr = nptr;
319162306a36Sopenharmony_ci	cur->bc_nlevels++;
319262306a36Sopenharmony_ci	ASSERT(cur->bc_nlevels <= cur->bc_maxlevels);
319362306a36Sopenharmony_ci	*stat = 1;
319462306a36Sopenharmony_ci	return 0;
319562306a36Sopenharmony_cierror0:
319662306a36Sopenharmony_ci	return error;
319762306a36Sopenharmony_ciout0:
319862306a36Sopenharmony_ci	*stat = 0;
319962306a36Sopenharmony_ci	return 0;
320062306a36Sopenharmony_ci}
320162306a36Sopenharmony_ci
320262306a36Sopenharmony_ciSTATIC int
320362306a36Sopenharmony_cixfs_btree_make_block_unfull(
320462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
320562306a36Sopenharmony_ci	int			level,	/* btree level */
320662306a36Sopenharmony_ci	int			numrecs,/* # of recs in block */
320762306a36Sopenharmony_ci	int			*oindex,/* old tree index */
320862306a36Sopenharmony_ci	int			*index,	/* new tree index */
320962306a36Sopenharmony_ci	union xfs_btree_ptr	*nptr,	/* new btree ptr */
321062306a36Sopenharmony_ci	struct xfs_btree_cur	**ncur,	/* new btree cursor */
321162306a36Sopenharmony_ci	union xfs_btree_key	*key,	/* key of new block */
321262306a36Sopenharmony_ci	int			*stat)
321362306a36Sopenharmony_ci{
321462306a36Sopenharmony_ci	int			error = 0;
321562306a36Sopenharmony_ci
321662306a36Sopenharmony_ci	if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
321762306a36Sopenharmony_ci	    level == cur->bc_nlevels - 1) {
321862306a36Sopenharmony_ci		struct xfs_inode *ip = cur->bc_ino.ip;
321962306a36Sopenharmony_ci
322062306a36Sopenharmony_ci		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
322162306a36Sopenharmony_ci			/* A root block that can be made bigger. */
322262306a36Sopenharmony_ci			xfs_iroot_realloc(ip, 1, cur->bc_ino.whichfork);
322362306a36Sopenharmony_ci			*stat = 1;
322462306a36Sopenharmony_ci		} else {
322562306a36Sopenharmony_ci			/* A root block that needs replacing */
322662306a36Sopenharmony_ci			int	logflags = 0;
322762306a36Sopenharmony_ci
322862306a36Sopenharmony_ci			error = xfs_btree_new_iroot(cur, &logflags, stat);
322962306a36Sopenharmony_ci			if (error || *stat == 0)
323062306a36Sopenharmony_ci				return error;
323162306a36Sopenharmony_ci
323262306a36Sopenharmony_ci			xfs_trans_log_inode(cur->bc_tp, ip, logflags);
323362306a36Sopenharmony_ci		}
323462306a36Sopenharmony_ci
323562306a36Sopenharmony_ci		return 0;
323662306a36Sopenharmony_ci	}
323762306a36Sopenharmony_ci
323862306a36Sopenharmony_ci	/* First, try shifting an entry to the right neighbor. */
323962306a36Sopenharmony_ci	error = xfs_btree_rshift(cur, level, stat);
324062306a36Sopenharmony_ci	if (error || *stat)
324162306a36Sopenharmony_ci		return error;
324262306a36Sopenharmony_ci
324362306a36Sopenharmony_ci	/* Next, try shifting an entry to the left neighbor. */
324462306a36Sopenharmony_ci	error = xfs_btree_lshift(cur, level, stat);
324562306a36Sopenharmony_ci	if (error)
324662306a36Sopenharmony_ci		return error;
324762306a36Sopenharmony_ci
324862306a36Sopenharmony_ci	if (*stat) {
324962306a36Sopenharmony_ci		*oindex = *index = cur->bc_levels[level].ptr;
325062306a36Sopenharmony_ci		return 0;
325162306a36Sopenharmony_ci	}
325262306a36Sopenharmony_ci
325362306a36Sopenharmony_ci	/*
325462306a36Sopenharmony_ci	 * Next, try splitting the current block in half.
325562306a36Sopenharmony_ci	 *
325662306a36Sopenharmony_ci	 * If this works we have to re-set our variables because we
325762306a36Sopenharmony_ci	 * could be in a different block now.
325862306a36Sopenharmony_ci	 */
325962306a36Sopenharmony_ci	error = xfs_btree_split(cur, level, nptr, key, ncur, stat);
326062306a36Sopenharmony_ci	if (error || *stat == 0)
326162306a36Sopenharmony_ci		return error;
326262306a36Sopenharmony_ci
326362306a36Sopenharmony_ci
326462306a36Sopenharmony_ci	*index = cur->bc_levels[level].ptr;
326562306a36Sopenharmony_ci	return 0;
326662306a36Sopenharmony_ci}
326762306a36Sopenharmony_ci
326862306a36Sopenharmony_ci/*
326962306a36Sopenharmony_ci * Insert one record/level.  Return information to the caller
327062306a36Sopenharmony_ci * allowing the next level up to proceed if necessary.
327162306a36Sopenharmony_ci */
327262306a36Sopenharmony_ciSTATIC int
327362306a36Sopenharmony_cixfs_btree_insrec(
327462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
327562306a36Sopenharmony_ci	int			level,	/* level to insert record at */
327662306a36Sopenharmony_ci	union xfs_btree_ptr	*ptrp,	/* i/o: block number inserted */
327762306a36Sopenharmony_ci	union xfs_btree_rec	*rec,	/* record to insert */
327862306a36Sopenharmony_ci	union xfs_btree_key	*key,	/* i/o: block key for ptrp */
327962306a36Sopenharmony_ci	struct xfs_btree_cur	**curp,	/* output: new cursor replacing cur */
328062306a36Sopenharmony_ci	int			*stat)	/* success/failure */
328162306a36Sopenharmony_ci{
328262306a36Sopenharmony_ci	struct xfs_btree_block	*block;	/* btree block */
328362306a36Sopenharmony_ci	struct xfs_buf		*bp;	/* buffer for block */
328462306a36Sopenharmony_ci	union xfs_btree_ptr	nptr;	/* new block ptr */
328562306a36Sopenharmony_ci	struct xfs_btree_cur	*ncur = NULL;	/* new btree cursor */
328662306a36Sopenharmony_ci	union xfs_btree_key	nkey;	/* new block key */
328762306a36Sopenharmony_ci	union xfs_btree_key	*lkey;
328862306a36Sopenharmony_ci	int			optr;	/* old key/record index */
328962306a36Sopenharmony_ci	int			ptr;	/* key/record index */
329062306a36Sopenharmony_ci	int			numrecs;/* number of records */
329162306a36Sopenharmony_ci	int			error;	/* error return value */
329262306a36Sopenharmony_ci	int			i;
329362306a36Sopenharmony_ci	xfs_daddr_t		old_bn;
329462306a36Sopenharmony_ci
329562306a36Sopenharmony_ci	ncur = NULL;
329662306a36Sopenharmony_ci	lkey = &nkey;
329762306a36Sopenharmony_ci
329862306a36Sopenharmony_ci	/*
329962306a36Sopenharmony_ci	 * If we have an external root pointer, and we've made it to the
330062306a36Sopenharmony_ci	 * root level, allocate a new root block and we're done.
330162306a36Sopenharmony_ci	 */
330262306a36Sopenharmony_ci	if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
330362306a36Sopenharmony_ci	    (level >= cur->bc_nlevels)) {
330462306a36Sopenharmony_ci		error = xfs_btree_new_root(cur, stat);
330562306a36Sopenharmony_ci		xfs_btree_set_ptr_null(cur, ptrp);
330662306a36Sopenharmony_ci
330762306a36Sopenharmony_ci		return error;
330862306a36Sopenharmony_ci	}
330962306a36Sopenharmony_ci
331062306a36Sopenharmony_ci	/* If we're off the left edge, return failure. */
331162306a36Sopenharmony_ci	ptr = cur->bc_levels[level].ptr;
331262306a36Sopenharmony_ci	if (ptr == 0) {
331362306a36Sopenharmony_ci		*stat = 0;
331462306a36Sopenharmony_ci		return 0;
331562306a36Sopenharmony_ci	}
331662306a36Sopenharmony_ci
331762306a36Sopenharmony_ci	optr = ptr;
331862306a36Sopenharmony_ci
331962306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, insrec);
332062306a36Sopenharmony_ci
332162306a36Sopenharmony_ci	/* Get pointers to the btree buffer and block. */
332262306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
332362306a36Sopenharmony_ci	old_bn = bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL;
332462306a36Sopenharmony_ci	numrecs = xfs_btree_get_numrecs(block);
332562306a36Sopenharmony_ci
332662306a36Sopenharmony_ci#ifdef DEBUG
332762306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, level, bp);
332862306a36Sopenharmony_ci	if (error)
332962306a36Sopenharmony_ci		goto error0;
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_ci	/* Check that the new entry is being inserted in the right place. */
333262306a36Sopenharmony_ci	if (ptr <= numrecs) {
333362306a36Sopenharmony_ci		if (level == 0) {
333462306a36Sopenharmony_ci			ASSERT(cur->bc_ops->recs_inorder(cur, rec,
333562306a36Sopenharmony_ci				xfs_btree_rec_addr(cur, ptr, block)));
333662306a36Sopenharmony_ci		} else {
333762306a36Sopenharmony_ci			ASSERT(cur->bc_ops->keys_inorder(cur, key,
333862306a36Sopenharmony_ci				xfs_btree_key_addr(cur, ptr, block)));
333962306a36Sopenharmony_ci		}
334062306a36Sopenharmony_ci	}
334162306a36Sopenharmony_ci#endif
334262306a36Sopenharmony_ci
334362306a36Sopenharmony_ci	/*
334462306a36Sopenharmony_ci	 * If the block is full, we can't insert the new entry until we
334562306a36Sopenharmony_ci	 * make the block un-full.
334662306a36Sopenharmony_ci	 */
334762306a36Sopenharmony_ci	xfs_btree_set_ptr_null(cur, &nptr);
334862306a36Sopenharmony_ci	if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
334962306a36Sopenharmony_ci		error = xfs_btree_make_block_unfull(cur, level, numrecs,
335062306a36Sopenharmony_ci					&optr, &ptr, &nptr, &ncur, lkey, stat);
335162306a36Sopenharmony_ci		if (error || *stat == 0)
335262306a36Sopenharmony_ci			goto error0;
335362306a36Sopenharmony_ci	}
335462306a36Sopenharmony_ci
335562306a36Sopenharmony_ci	/*
335662306a36Sopenharmony_ci	 * The current block may have changed if the block was
335762306a36Sopenharmony_ci	 * previously full and we have just made space in it.
335862306a36Sopenharmony_ci	 */
335962306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
336062306a36Sopenharmony_ci	numrecs = xfs_btree_get_numrecs(block);
336162306a36Sopenharmony_ci
336262306a36Sopenharmony_ci#ifdef DEBUG
336362306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, level, bp);
336462306a36Sopenharmony_ci	if (error)
336562306a36Sopenharmony_ci		goto error0;
336662306a36Sopenharmony_ci#endif
336762306a36Sopenharmony_ci
336862306a36Sopenharmony_ci	/*
336962306a36Sopenharmony_ci	 * At this point we know there's room for our new entry in the block
337062306a36Sopenharmony_ci	 * we're pointing at.
337162306a36Sopenharmony_ci	 */
337262306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
337362306a36Sopenharmony_ci
337462306a36Sopenharmony_ci	if (level > 0) {
337562306a36Sopenharmony_ci		/* It's a nonleaf. make a hole in the keys and ptrs */
337662306a36Sopenharmony_ci		union xfs_btree_key	*kp;
337762306a36Sopenharmony_ci		union xfs_btree_ptr	*pp;
337862306a36Sopenharmony_ci
337962306a36Sopenharmony_ci		kp = xfs_btree_key_addr(cur, ptr, block);
338062306a36Sopenharmony_ci		pp = xfs_btree_ptr_addr(cur, ptr, block);
338162306a36Sopenharmony_ci
338262306a36Sopenharmony_ci		for (i = numrecs - ptr; i >= 0; i--) {
338362306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, pp, i, level);
338462306a36Sopenharmony_ci			if (error)
338562306a36Sopenharmony_ci				goto error0;
338662306a36Sopenharmony_ci		}
338762306a36Sopenharmony_ci
338862306a36Sopenharmony_ci		xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
338962306a36Sopenharmony_ci		xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
339062306a36Sopenharmony_ci
339162306a36Sopenharmony_ci		error = xfs_btree_debug_check_ptr(cur, ptrp, 0, level);
339262306a36Sopenharmony_ci		if (error)
339362306a36Sopenharmony_ci			goto error0;
339462306a36Sopenharmony_ci
339562306a36Sopenharmony_ci		/* Now put the new data in, bump numrecs and log it. */
339662306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, kp, key, 1);
339762306a36Sopenharmony_ci		xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
339862306a36Sopenharmony_ci		numrecs++;
339962306a36Sopenharmony_ci		xfs_btree_set_numrecs(block, numrecs);
340062306a36Sopenharmony_ci		xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
340162306a36Sopenharmony_ci		xfs_btree_log_keys(cur, bp, ptr, numrecs);
340262306a36Sopenharmony_ci#ifdef DEBUG
340362306a36Sopenharmony_ci		if (ptr < numrecs) {
340462306a36Sopenharmony_ci			ASSERT(cur->bc_ops->keys_inorder(cur, kp,
340562306a36Sopenharmony_ci				xfs_btree_key_addr(cur, ptr + 1, block)));
340662306a36Sopenharmony_ci		}
340762306a36Sopenharmony_ci#endif
340862306a36Sopenharmony_ci	} else {
340962306a36Sopenharmony_ci		/* It's a leaf. make a hole in the records */
341062306a36Sopenharmony_ci		union xfs_btree_rec             *rp;
341162306a36Sopenharmony_ci
341262306a36Sopenharmony_ci		rp = xfs_btree_rec_addr(cur, ptr, block);
341362306a36Sopenharmony_ci
341462306a36Sopenharmony_ci		xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
341562306a36Sopenharmony_ci
341662306a36Sopenharmony_ci		/* Now put the new data in, bump numrecs and log it. */
341762306a36Sopenharmony_ci		xfs_btree_copy_recs(cur, rp, rec, 1);
341862306a36Sopenharmony_ci		xfs_btree_set_numrecs(block, ++numrecs);
341962306a36Sopenharmony_ci		xfs_btree_log_recs(cur, bp, ptr, numrecs);
342062306a36Sopenharmony_ci#ifdef DEBUG
342162306a36Sopenharmony_ci		if (ptr < numrecs) {
342262306a36Sopenharmony_ci			ASSERT(cur->bc_ops->recs_inorder(cur, rp,
342362306a36Sopenharmony_ci				xfs_btree_rec_addr(cur, ptr + 1, block)));
342462306a36Sopenharmony_ci		}
342562306a36Sopenharmony_ci#endif
342662306a36Sopenharmony_ci	}
342762306a36Sopenharmony_ci
342862306a36Sopenharmony_ci	/* Log the new number of records in the btree header. */
342962306a36Sopenharmony_ci	xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
343062306a36Sopenharmony_ci
343162306a36Sopenharmony_ci	/*
343262306a36Sopenharmony_ci	 * If we just inserted into a new tree block, we have to
343362306a36Sopenharmony_ci	 * recalculate nkey here because nkey is out of date.
343462306a36Sopenharmony_ci	 *
343562306a36Sopenharmony_ci	 * Otherwise we're just updating an existing block (having shoved
343662306a36Sopenharmony_ci	 * some records into the new tree block), so use the regular key
343762306a36Sopenharmony_ci	 * update mechanism.
343862306a36Sopenharmony_ci	 */
343962306a36Sopenharmony_ci	if (bp && xfs_buf_daddr(bp) != old_bn) {
344062306a36Sopenharmony_ci		xfs_btree_get_keys(cur, block, lkey);
344162306a36Sopenharmony_ci	} else if (xfs_btree_needs_key_update(cur, optr)) {
344262306a36Sopenharmony_ci		error = xfs_btree_update_keys(cur, level);
344362306a36Sopenharmony_ci		if (error)
344462306a36Sopenharmony_ci			goto error0;
344562306a36Sopenharmony_ci	}
344662306a36Sopenharmony_ci
344762306a36Sopenharmony_ci	/*
344862306a36Sopenharmony_ci	 * If we are tracking the last record in the tree and
344962306a36Sopenharmony_ci	 * we are at the far right edge of the tree, update it.
345062306a36Sopenharmony_ci	 */
345162306a36Sopenharmony_ci	if (xfs_btree_is_lastrec(cur, block, level)) {
345262306a36Sopenharmony_ci		cur->bc_ops->update_lastrec(cur, block, rec,
345362306a36Sopenharmony_ci					    ptr, LASTREC_INSREC);
345462306a36Sopenharmony_ci	}
345562306a36Sopenharmony_ci
345662306a36Sopenharmony_ci	/*
345762306a36Sopenharmony_ci	 * Return the new block number, if any.
345862306a36Sopenharmony_ci	 * If there is one, give back a record value and a cursor too.
345962306a36Sopenharmony_ci	 */
346062306a36Sopenharmony_ci	*ptrp = nptr;
346162306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &nptr)) {
346262306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, key, lkey, 1);
346362306a36Sopenharmony_ci		*curp = ncur;
346462306a36Sopenharmony_ci	}
346562306a36Sopenharmony_ci
346662306a36Sopenharmony_ci	*stat = 1;
346762306a36Sopenharmony_ci	return 0;
346862306a36Sopenharmony_ci
346962306a36Sopenharmony_cierror0:
347062306a36Sopenharmony_ci	if (ncur)
347162306a36Sopenharmony_ci		xfs_btree_del_cursor(ncur, error);
347262306a36Sopenharmony_ci	return error;
347362306a36Sopenharmony_ci}
347462306a36Sopenharmony_ci
347562306a36Sopenharmony_ci/*
347662306a36Sopenharmony_ci * Insert the record at the point referenced by cur.
347762306a36Sopenharmony_ci *
347862306a36Sopenharmony_ci * A multi-level split of the tree on insert will invalidate the original
347962306a36Sopenharmony_ci * cursor.  All callers of this function should assume that the cursor is
348062306a36Sopenharmony_ci * no longer valid and revalidate it.
348162306a36Sopenharmony_ci */
348262306a36Sopenharmony_ciint
348362306a36Sopenharmony_cixfs_btree_insert(
348462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
348562306a36Sopenharmony_ci	int			*stat)
348662306a36Sopenharmony_ci{
348762306a36Sopenharmony_ci	int			error;	/* error return value */
348862306a36Sopenharmony_ci	int			i;	/* result value, 0 for failure */
348962306a36Sopenharmony_ci	int			level;	/* current level number in btree */
349062306a36Sopenharmony_ci	union xfs_btree_ptr	nptr;	/* new block number (split result) */
349162306a36Sopenharmony_ci	struct xfs_btree_cur	*ncur;	/* new cursor (split result) */
349262306a36Sopenharmony_ci	struct xfs_btree_cur	*pcur;	/* previous level's cursor */
349362306a36Sopenharmony_ci	union xfs_btree_key	bkey;	/* key of block to insert */
349462306a36Sopenharmony_ci	union xfs_btree_key	*key;
349562306a36Sopenharmony_ci	union xfs_btree_rec	rec;	/* record to insert */
349662306a36Sopenharmony_ci
349762306a36Sopenharmony_ci	level = 0;
349862306a36Sopenharmony_ci	ncur = NULL;
349962306a36Sopenharmony_ci	pcur = cur;
350062306a36Sopenharmony_ci	key = &bkey;
350162306a36Sopenharmony_ci
350262306a36Sopenharmony_ci	xfs_btree_set_ptr_null(cur, &nptr);
350362306a36Sopenharmony_ci
350462306a36Sopenharmony_ci	/* Make a key out of the record data to be inserted, and save it. */
350562306a36Sopenharmony_ci	cur->bc_ops->init_rec_from_cur(cur, &rec);
350662306a36Sopenharmony_ci	cur->bc_ops->init_key_from_rec(key, &rec);
350762306a36Sopenharmony_ci
350862306a36Sopenharmony_ci	/*
350962306a36Sopenharmony_ci	 * Loop going up the tree, starting at the leaf level.
351062306a36Sopenharmony_ci	 * Stop when we don't get a split block, that must mean that
351162306a36Sopenharmony_ci	 * the insert is finished with this level.
351262306a36Sopenharmony_ci	 */
351362306a36Sopenharmony_ci	do {
351462306a36Sopenharmony_ci		/*
351562306a36Sopenharmony_ci		 * Insert nrec/nptr into this level of the tree.
351662306a36Sopenharmony_ci		 * Note if we fail, nptr will be null.
351762306a36Sopenharmony_ci		 */
351862306a36Sopenharmony_ci		error = xfs_btree_insrec(pcur, level, &nptr, &rec, key,
351962306a36Sopenharmony_ci				&ncur, &i);
352062306a36Sopenharmony_ci		if (error) {
352162306a36Sopenharmony_ci			if (pcur != cur)
352262306a36Sopenharmony_ci				xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
352362306a36Sopenharmony_ci			goto error0;
352462306a36Sopenharmony_ci		}
352562306a36Sopenharmony_ci
352662306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
352762306a36Sopenharmony_ci			error = -EFSCORRUPTED;
352862306a36Sopenharmony_ci			goto error0;
352962306a36Sopenharmony_ci		}
353062306a36Sopenharmony_ci		level++;
353162306a36Sopenharmony_ci
353262306a36Sopenharmony_ci		/*
353362306a36Sopenharmony_ci		 * See if the cursor we just used is trash.
353462306a36Sopenharmony_ci		 * Can't trash the caller's cursor, but otherwise we should
353562306a36Sopenharmony_ci		 * if ncur is a new cursor or we're about to be done.
353662306a36Sopenharmony_ci		 */
353762306a36Sopenharmony_ci		if (pcur != cur &&
353862306a36Sopenharmony_ci		    (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
353962306a36Sopenharmony_ci			/* Save the state from the cursor before we trash it */
354062306a36Sopenharmony_ci			if (cur->bc_ops->update_cursor)
354162306a36Sopenharmony_ci				cur->bc_ops->update_cursor(pcur, cur);
354262306a36Sopenharmony_ci			cur->bc_nlevels = pcur->bc_nlevels;
354362306a36Sopenharmony_ci			xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
354462306a36Sopenharmony_ci		}
354562306a36Sopenharmony_ci		/* If we got a new cursor, switch to it. */
354662306a36Sopenharmony_ci		if (ncur) {
354762306a36Sopenharmony_ci			pcur = ncur;
354862306a36Sopenharmony_ci			ncur = NULL;
354962306a36Sopenharmony_ci		}
355062306a36Sopenharmony_ci	} while (!xfs_btree_ptr_is_null(cur, &nptr));
355162306a36Sopenharmony_ci
355262306a36Sopenharmony_ci	*stat = i;
355362306a36Sopenharmony_ci	return 0;
355462306a36Sopenharmony_cierror0:
355562306a36Sopenharmony_ci	return error;
355662306a36Sopenharmony_ci}
355762306a36Sopenharmony_ci
355862306a36Sopenharmony_ci/*
355962306a36Sopenharmony_ci * Try to merge a non-leaf block back into the inode root.
356062306a36Sopenharmony_ci *
356162306a36Sopenharmony_ci * Note: the killroot names comes from the fact that we're effectively
356262306a36Sopenharmony_ci * killing the old root block.  But because we can't just delete the
356362306a36Sopenharmony_ci * inode we have to copy the single block it was pointing to into the
356462306a36Sopenharmony_ci * inode.
356562306a36Sopenharmony_ci */
356662306a36Sopenharmony_ciSTATIC int
356762306a36Sopenharmony_cixfs_btree_kill_iroot(
356862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur)
356962306a36Sopenharmony_ci{
357062306a36Sopenharmony_ci	int			whichfork = cur->bc_ino.whichfork;
357162306a36Sopenharmony_ci	struct xfs_inode	*ip = cur->bc_ino.ip;
357262306a36Sopenharmony_ci	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
357362306a36Sopenharmony_ci	struct xfs_btree_block	*block;
357462306a36Sopenharmony_ci	struct xfs_btree_block	*cblock;
357562306a36Sopenharmony_ci	union xfs_btree_key	*kp;
357662306a36Sopenharmony_ci	union xfs_btree_key	*ckp;
357762306a36Sopenharmony_ci	union xfs_btree_ptr	*pp;
357862306a36Sopenharmony_ci	union xfs_btree_ptr	*cpp;
357962306a36Sopenharmony_ci	struct xfs_buf		*cbp;
358062306a36Sopenharmony_ci	int			level;
358162306a36Sopenharmony_ci	int			index;
358262306a36Sopenharmony_ci	int			numrecs;
358362306a36Sopenharmony_ci	int			error;
358462306a36Sopenharmony_ci#ifdef DEBUG
358562306a36Sopenharmony_ci	union xfs_btree_ptr	ptr;
358662306a36Sopenharmony_ci#endif
358762306a36Sopenharmony_ci	int			i;
358862306a36Sopenharmony_ci
358962306a36Sopenharmony_ci	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
359062306a36Sopenharmony_ci	ASSERT(cur->bc_nlevels > 1);
359162306a36Sopenharmony_ci
359262306a36Sopenharmony_ci	/*
359362306a36Sopenharmony_ci	 * Don't deal with the root block needs to be a leaf case.
359462306a36Sopenharmony_ci	 * We're just going to turn the thing back into extents anyway.
359562306a36Sopenharmony_ci	 */
359662306a36Sopenharmony_ci	level = cur->bc_nlevels - 1;
359762306a36Sopenharmony_ci	if (level == 1)
359862306a36Sopenharmony_ci		goto out0;
359962306a36Sopenharmony_ci
360062306a36Sopenharmony_ci	/*
360162306a36Sopenharmony_ci	 * Give up if the root has multiple children.
360262306a36Sopenharmony_ci	 */
360362306a36Sopenharmony_ci	block = xfs_btree_get_iroot(cur);
360462306a36Sopenharmony_ci	if (xfs_btree_get_numrecs(block) != 1)
360562306a36Sopenharmony_ci		goto out0;
360662306a36Sopenharmony_ci
360762306a36Sopenharmony_ci	cblock = xfs_btree_get_block(cur, level - 1, &cbp);
360862306a36Sopenharmony_ci	numrecs = xfs_btree_get_numrecs(cblock);
360962306a36Sopenharmony_ci
361062306a36Sopenharmony_ci	/*
361162306a36Sopenharmony_ci	 * Only do this if the next level will fit.
361262306a36Sopenharmony_ci	 * Then the data must be copied up to the inode,
361362306a36Sopenharmony_ci	 * instead of freeing the root you free the next level.
361462306a36Sopenharmony_ci	 */
361562306a36Sopenharmony_ci	if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
361662306a36Sopenharmony_ci		goto out0;
361762306a36Sopenharmony_ci
361862306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, killroot);
361962306a36Sopenharmony_ci
362062306a36Sopenharmony_ci#ifdef DEBUG
362162306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
362262306a36Sopenharmony_ci	ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
362362306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
362462306a36Sopenharmony_ci	ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
362562306a36Sopenharmony_ci#endif
362662306a36Sopenharmony_ci
362762306a36Sopenharmony_ci	index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
362862306a36Sopenharmony_ci	if (index) {
362962306a36Sopenharmony_ci		xfs_iroot_realloc(cur->bc_ino.ip, index,
363062306a36Sopenharmony_ci				  cur->bc_ino.whichfork);
363162306a36Sopenharmony_ci		block = ifp->if_broot;
363262306a36Sopenharmony_ci	}
363362306a36Sopenharmony_ci
363462306a36Sopenharmony_ci	be16_add_cpu(&block->bb_numrecs, index);
363562306a36Sopenharmony_ci	ASSERT(block->bb_numrecs == cblock->bb_numrecs);
363662306a36Sopenharmony_ci
363762306a36Sopenharmony_ci	kp = xfs_btree_key_addr(cur, 1, block);
363862306a36Sopenharmony_ci	ckp = xfs_btree_key_addr(cur, 1, cblock);
363962306a36Sopenharmony_ci	xfs_btree_copy_keys(cur, kp, ckp, numrecs);
364062306a36Sopenharmony_ci
364162306a36Sopenharmony_ci	pp = xfs_btree_ptr_addr(cur, 1, block);
364262306a36Sopenharmony_ci	cpp = xfs_btree_ptr_addr(cur, 1, cblock);
364362306a36Sopenharmony_ci
364462306a36Sopenharmony_ci	for (i = 0; i < numrecs; i++) {
364562306a36Sopenharmony_ci		error = xfs_btree_debug_check_ptr(cur, cpp, i, level - 1);
364662306a36Sopenharmony_ci		if (error)
364762306a36Sopenharmony_ci			return error;
364862306a36Sopenharmony_ci	}
364962306a36Sopenharmony_ci
365062306a36Sopenharmony_ci	xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
365162306a36Sopenharmony_ci
365262306a36Sopenharmony_ci	error = xfs_btree_free_block(cur, cbp);
365362306a36Sopenharmony_ci	if (error)
365462306a36Sopenharmony_ci		return error;
365562306a36Sopenharmony_ci
365662306a36Sopenharmony_ci	cur->bc_levels[level - 1].bp = NULL;
365762306a36Sopenharmony_ci	be16_add_cpu(&block->bb_level, -1);
365862306a36Sopenharmony_ci	xfs_trans_log_inode(cur->bc_tp, ip,
365962306a36Sopenharmony_ci		XFS_ILOG_CORE | xfs_ilog_fbroot(cur->bc_ino.whichfork));
366062306a36Sopenharmony_ci	cur->bc_nlevels--;
366162306a36Sopenharmony_ciout0:
366262306a36Sopenharmony_ci	return 0;
366362306a36Sopenharmony_ci}
366462306a36Sopenharmony_ci
366562306a36Sopenharmony_ci/*
366662306a36Sopenharmony_ci * Kill the current root node, and replace it with it's only child node.
366762306a36Sopenharmony_ci */
366862306a36Sopenharmony_ciSTATIC int
366962306a36Sopenharmony_cixfs_btree_kill_root(
367062306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
367162306a36Sopenharmony_ci	struct xfs_buf		*bp,
367262306a36Sopenharmony_ci	int			level,
367362306a36Sopenharmony_ci	union xfs_btree_ptr	*newroot)
367462306a36Sopenharmony_ci{
367562306a36Sopenharmony_ci	int			error;
367662306a36Sopenharmony_ci
367762306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, killroot);
367862306a36Sopenharmony_ci
367962306a36Sopenharmony_ci	/*
368062306a36Sopenharmony_ci	 * Update the root pointer, decreasing the level by 1 and then
368162306a36Sopenharmony_ci	 * free the old root.
368262306a36Sopenharmony_ci	 */
368362306a36Sopenharmony_ci	cur->bc_ops->set_root(cur, newroot, -1);
368462306a36Sopenharmony_ci
368562306a36Sopenharmony_ci	error = xfs_btree_free_block(cur, bp);
368662306a36Sopenharmony_ci	if (error)
368762306a36Sopenharmony_ci		return error;
368862306a36Sopenharmony_ci
368962306a36Sopenharmony_ci	cur->bc_levels[level].bp = NULL;
369062306a36Sopenharmony_ci	cur->bc_levels[level].ra = 0;
369162306a36Sopenharmony_ci	cur->bc_nlevels--;
369262306a36Sopenharmony_ci
369362306a36Sopenharmony_ci	return 0;
369462306a36Sopenharmony_ci}
369562306a36Sopenharmony_ci
369662306a36Sopenharmony_ciSTATIC int
369762306a36Sopenharmony_cixfs_btree_dec_cursor(
369862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
369962306a36Sopenharmony_ci	int			level,
370062306a36Sopenharmony_ci	int			*stat)
370162306a36Sopenharmony_ci{
370262306a36Sopenharmony_ci	int			error;
370362306a36Sopenharmony_ci	int			i;
370462306a36Sopenharmony_ci
370562306a36Sopenharmony_ci	if (level > 0) {
370662306a36Sopenharmony_ci		error = xfs_btree_decrement(cur, level, &i);
370762306a36Sopenharmony_ci		if (error)
370862306a36Sopenharmony_ci			return error;
370962306a36Sopenharmony_ci	}
371062306a36Sopenharmony_ci
371162306a36Sopenharmony_ci	*stat = 1;
371262306a36Sopenharmony_ci	return 0;
371362306a36Sopenharmony_ci}
371462306a36Sopenharmony_ci
371562306a36Sopenharmony_ci/*
371662306a36Sopenharmony_ci * Single level of the btree record deletion routine.
371762306a36Sopenharmony_ci * Delete record pointed to by cur/level.
371862306a36Sopenharmony_ci * Remove the record from its block then rebalance the tree.
371962306a36Sopenharmony_ci * Return 0 for error, 1 for done, 2 to go on to the next level.
372062306a36Sopenharmony_ci */
372162306a36Sopenharmony_ciSTATIC int					/* error */
372262306a36Sopenharmony_cixfs_btree_delrec(
372362306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,		/* btree cursor */
372462306a36Sopenharmony_ci	int			level,		/* level removing record from */
372562306a36Sopenharmony_ci	int			*stat)		/* fail/done/go-on */
372662306a36Sopenharmony_ci{
372762306a36Sopenharmony_ci	struct xfs_btree_block	*block;		/* btree block */
372862306a36Sopenharmony_ci	union xfs_btree_ptr	cptr;		/* current block ptr */
372962306a36Sopenharmony_ci	struct xfs_buf		*bp;		/* buffer for block */
373062306a36Sopenharmony_ci	int			error;		/* error return value */
373162306a36Sopenharmony_ci	int			i;		/* loop counter */
373262306a36Sopenharmony_ci	union xfs_btree_ptr	lptr;		/* left sibling block ptr */
373362306a36Sopenharmony_ci	struct xfs_buf		*lbp;		/* left buffer pointer */
373462306a36Sopenharmony_ci	struct xfs_btree_block	*left;		/* left btree block */
373562306a36Sopenharmony_ci	int			lrecs = 0;	/* left record count */
373662306a36Sopenharmony_ci	int			ptr;		/* key/record index */
373762306a36Sopenharmony_ci	union xfs_btree_ptr	rptr;		/* right sibling block ptr */
373862306a36Sopenharmony_ci	struct xfs_buf		*rbp;		/* right buffer pointer */
373962306a36Sopenharmony_ci	struct xfs_btree_block	*right;		/* right btree block */
374062306a36Sopenharmony_ci	struct xfs_btree_block	*rrblock;	/* right-right btree block */
374162306a36Sopenharmony_ci	struct xfs_buf		*rrbp;		/* right-right buffer pointer */
374262306a36Sopenharmony_ci	int			rrecs = 0;	/* right record count */
374362306a36Sopenharmony_ci	struct xfs_btree_cur	*tcur;		/* temporary btree cursor */
374462306a36Sopenharmony_ci	int			numrecs;	/* temporary numrec count */
374562306a36Sopenharmony_ci
374662306a36Sopenharmony_ci	tcur = NULL;
374762306a36Sopenharmony_ci
374862306a36Sopenharmony_ci	/* Get the index of the entry being deleted, check for nothing there. */
374962306a36Sopenharmony_ci	ptr = cur->bc_levels[level].ptr;
375062306a36Sopenharmony_ci	if (ptr == 0) {
375162306a36Sopenharmony_ci		*stat = 0;
375262306a36Sopenharmony_ci		return 0;
375362306a36Sopenharmony_ci	}
375462306a36Sopenharmony_ci
375562306a36Sopenharmony_ci	/* Get the buffer & block containing the record or key/ptr. */
375662306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
375762306a36Sopenharmony_ci	numrecs = xfs_btree_get_numrecs(block);
375862306a36Sopenharmony_ci
375962306a36Sopenharmony_ci#ifdef DEBUG
376062306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, level, bp);
376162306a36Sopenharmony_ci	if (error)
376262306a36Sopenharmony_ci		goto error0;
376362306a36Sopenharmony_ci#endif
376462306a36Sopenharmony_ci
376562306a36Sopenharmony_ci	/* Fail if we're off the end of the block. */
376662306a36Sopenharmony_ci	if (ptr > numrecs) {
376762306a36Sopenharmony_ci		*stat = 0;
376862306a36Sopenharmony_ci		return 0;
376962306a36Sopenharmony_ci	}
377062306a36Sopenharmony_ci
377162306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, delrec);
377262306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
377362306a36Sopenharmony_ci
377462306a36Sopenharmony_ci	/* Excise the entries being deleted. */
377562306a36Sopenharmony_ci	if (level > 0) {
377662306a36Sopenharmony_ci		/* It's a nonleaf. operate on keys and ptrs */
377762306a36Sopenharmony_ci		union xfs_btree_key	*lkp;
377862306a36Sopenharmony_ci		union xfs_btree_ptr	*lpp;
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci		lkp = xfs_btree_key_addr(cur, ptr + 1, block);
378162306a36Sopenharmony_ci		lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
378262306a36Sopenharmony_ci
378362306a36Sopenharmony_ci		for (i = 0; i < numrecs - ptr; i++) {
378462306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, lpp, i, level);
378562306a36Sopenharmony_ci			if (error)
378662306a36Sopenharmony_ci				goto error0;
378762306a36Sopenharmony_ci		}
378862306a36Sopenharmony_ci
378962306a36Sopenharmony_ci		if (ptr < numrecs) {
379062306a36Sopenharmony_ci			xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
379162306a36Sopenharmony_ci			xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
379262306a36Sopenharmony_ci			xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
379362306a36Sopenharmony_ci			xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
379462306a36Sopenharmony_ci		}
379562306a36Sopenharmony_ci	} else {
379662306a36Sopenharmony_ci		/* It's a leaf. operate on records */
379762306a36Sopenharmony_ci		if (ptr < numrecs) {
379862306a36Sopenharmony_ci			xfs_btree_shift_recs(cur,
379962306a36Sopenharmony_ci				xfs_btree_rec_addr(cur, ptr + 1, block),
380062306a36Sopenharmony_ci				-1, numrecs - ptr);
380162306a36Sopenharmony_ci			xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
380262306a36Sopenharmony_ci		}
380362306a36Sopenharmony_ci	}
380462306a36Sopenharmony_ci
380562306a36Sopenharmony_ci	/*
380662306a36Sopenharmony_ci	 * Decrement and log the number of entries in the block.
380762306a36Sopenharmony_ci	 */
380862306a36Sopenharmony_ci	xfs_btree_set_numrecs(block, --numrecs);
380962306a36Sopenharmony_ci	xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_ci	/*
381262306a36Sopenharmony_ci	 * If we are tracking the last record in the tree and
381362306a36Sopenharmony_ci	 * we are at the far right edge of the tree, update it.
381462306a36Sopenharmony_ci	 */
381562306a36Sopenharmony_ci	if (xfs_btree_is_lastrec(cur, block, level)) {
381662306a36Sopenharmony_ci		cur->bc_ops->update_lastrec(cur, block, NULL,
381762306a36Sopenharmony_ci					    ptr, LASTREC_DELREC);
381862306a36Sopenharmony_ci	}
381962306a36Sopenharmony_ci
382062306a36Sopenharmony_ci	/*
382162306a36Sopenharmony_ci	 * We're at the root level.  First, shrink the root block in-memory.
382262306a36Sopenharmony_ci	 * Try to get rid of the next level down.  If we can't then there's
382362306a36Sopenharmony_ci	 * nothing left to do.
382462306a36Sopenharmony_ci	 */
382562306a36Sopenharmony_ci	if (level == cur->bc_nlevels - 1) {
382662306a36Sopenharmony_ci		if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
382762306a36Sopenharmony_ci			xfs_iroot_realloc(cur->bc_ino.ip, -1,
382862306a36Sopenharmony_ci					  cur->bc_ino.whichfork);
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci			error = xfs_btree_kill_iroot(cur);
383162306a36Sopenharmony_ci			if (error)
383262306a36Sopenharmony_ci				goto error0;
383362306a36Sopenharmony_ci
383462306a36Sopenharmony_ci			error = xfs_btree_dec_cursor(cur, level, stat);
383562306a36Sopenharmony_ci			if (error)
383662306a36Sopenharmony_ci				goto error0;
383762306a36Sopenharmony_ci			*stat = 1;
383862306a36Sopenharmony_ci			return 0;
383962306a36Sopenharmony_ci		}
384062306a36Sopenharmony_ci
384162306a36Sopenharmony_ci		/*
384262306a36Sopenharmony_ci		 * If this is the root level, and there's only one entry left,
384362306a36Sopenharmony_ci		 * and it's NOT the leaf level, then we can get rid of this
384462306a36Sopenharmony_ci		 * level.
384562306a36Sopenharmony_ci		 */
384662306a36Sopenharmony_ci		if (numrecs == 1 && level > 0) {
384762306a36Sopenharmony_ci			union xfs_btree_ptr	*pp;
384862306a36Sopenharmony_ci			/*
384962306a36Sopenharmony_ci			 * pp is still set to the first pointer in the block.
385062306a36Sopenharmony_ci			 * Make it the new root of the btree.
385162306a36Sopenharmony_ci			 */
385262306a36Sopenharmony_ci			pp = xfs_btree_ptr_addr(cur, 1, block);
385362306a36Sopenharmony_ci			error = xfs_btree_kill_root(cur, bp, level, pp);
385462306a36Sopenharmony_ci			if (error)
385562306a36Sopenharmony_ci				goto error0;
385662306a36Sopenharmony_ci		} else if (level > 0) {
385762306a36Sopenharmony_ci			error = xfs_btree_dec_cursor(cur, level, stat);
385862306a36Sopenharmony_ci			if (error)
385962306a36Sopenharmony_ci				goto error0;
386062306a36Sopenharmony_ci		}
386162306a36Sopenharmony_ci		*stat = 1;
386262306a36Sopenharmony_ci		return 0;
386362306a36Sopenharmony_ci	}
386462306a36Sopenharmony_ci
386562306a36Sopenharmony_ci	/*
386662306a36Sopenharmony_ci	 * If we deleted the leftmost entry in the block, update the
386762306a36Sopenharmony_ci	 * key values above us in the tree.
386862306a36Sopenharmony_ci	 */
386962306a36Sopenharmony_ci	if (xfs_btree_needs_key_update(cur, ptr)) {
387062306a36Sopenharmony_ci		error = xfs_btree_update_keys(cur, level);
387162306a36Sopenharmony_ci		if (error)
387262306a36Sopenharmony_ci			goto error0;
387362306a36Sopenharmony_ci	}
387462306a36Sopenharmony_ci
387562306a36Sopenharmony_ci	/*
387662306a36Sopenharmony_ci	 * If the number of records remaining in the block is at least
387762306a36Sopenharmony_ci	 * the minimum, we're done.
387862306a36Sopenharmony_ci	 */
387962306a36Sopenharmony_ci	if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
388062306a36Sopenharmony_ci		error = xfs_btree_dec_cursor(cur, level, stat);
388162306a36Sopenharmony_ci		if (error)
388262306a36Sopenharmony_ci			goto error0;
388362306a36Sopenharmony_ci		return 0;
388462306a36Sopenharmony_ci	}
388562306a36Sopenharmony_ci
388662306a36Sopenharmony_ci	/*
388762306a36Sopenharmony_ci	 * Otherwise, we have to move some records around to keep the
388862306a36Sopenharmony_ci	 * tree balanced.  Look at the left and right sibling blocks to
388962306a36Sopenharmony_ci	 * see if we can re-balance by moving only one record.
389062306a36Sopenharmony_ci	 */
389162306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
389262306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
389362306a36Sopenharmony_ci
389462306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
389562306a36Sopenharmony_ci		/*
389662306a36Sopenharmony_ci		 * One child of root, need to get a chance to copy its contents
389762306a36Sopenharmony_ci		 * into the root and delete it. Can't go up to next level,
389862306a36Sopenharmony_ci		 * there's nothing to delete there.
389962306a36Sopenharmony_ci		 */
390062306a36Sopenharmony_ci		if (xfs_btree_ptr_is_null(cur, &rptr) &&
390162306a36Sopenharmony_ci		    xfs_btree_ptr_is_null(cur, &lptr) &&
390262306a36Sopenharmony_ci		    level == cur->bc_nlevels - 2) {
390362306a36Sopenharmony_ci			error = xfs_btree_kill_iroot(cur);
390462306a36Sopenharmony_ci			if (!error)
390562306a36Sopenharmony_ci				error = xfs_btree_dec_cursor(cur, level, stat);
390662306a36Sopenharmony_ci			if (error)
390762306a36Sopenharmony_ci				goto error0;
390862306a36Sopenharmony_ci			return 0;
390962306a36Sopenharmony_ci		}
391062306a36Sopenharmony_ci	}
391162306a36Sopenharmony_ci
391262306a36Sopenharmony_ci	ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
391362306a36Sopenharmony_ci	       !xfs_btree_ptr_is_null(cur, &lptr));
391462306a36Sopenharmony_ci
391562306a36Sopenharmony_ci	/*
391662306a36Sopenharmony_ci	 * Duplicate the cursor so our btree manipulations here won't
391762306a36Sopenharmony_ci	 * disrupt the next level up.
391862306a36Sopenharmony_ci	 */
391962306a36Sopenharmony_ci	error = xfs_btree_dup_cursor(cur, &tcur);
392062306a36Sopenharmony_ci	if (error)
392162306a36Sopenharmony_ci		goto error0;
392262306a36Sopenharmony_ci
392362306a36Sopenharmony_ci	/*
392462306a36Sopenharmony_ci	 * If there's a right sibling, see if it's ok to shift an entry
392562306a36Sopenharmony_ci	 * out of it.
392662306a36Sopenharmony_ci	 */
392762306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &rptr)) {
392862306a36Sopenharmony_ci		/*
392962306a36Sopenharmony_ci		 * Move the temp cursor to the last entry in the next block.
393062306a36Sopenharmony_ci		 * Actually any entry but the first would suffice.
393162306a36Sopenharmony_ci		 */
393262306a36Sopenharmony_ci		i = xfs_btree_lastrec(tcur, level);
393362306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
393462306a36Sopenharmony_ci			error = -EFSCORRUPTED;
393562306a36Sopenharmony_ci			goto error0;
393662306a36Sopenharmony_ci		}
393762306a36Sopenharmony_ci
393862306a36Sopenharmony_ci		error = xfs_btree_increment(tcur, level, &i);
393962306a36Sopenharmony_ci		if (error)
394062306a36Sopenharmony_ci			goto error0;
394162306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
394262306a36Sopenharmony_ci			error = -EFSCORRUPTED;
394362306a36Sopenharmony_ci			goto error0;
394462306a36Sopenharmony_ci		}
394562306a36Sopenharmony_ci
394662306a36Sopenharmony_ci		i = xfs_btree_lastrec(tcur, level);
394762306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
394862306a36Sopenharmony_ci			error = -EFSCORRUPTED;
394962306a36Sopenharmony_ci			goto error0;
395062306a36Sopenharmony_ci		}
395162306a36Sopenharmony_ci
395262306a36Sopenharmony_ci		/* Grab a pointer to the block. */
395362306a36Sopenharmony_ci		right = xfs_btree_get_block(tcur, level, &rbp);
395462306a36Sopenharmony_ci#ifdef DEBUG
395562306a36Sopenharmony_ci		error = xfs_btree_check_block(tcur, right, level, rbp);
395662306a36Sopenharmony_ci		if (error)
395762306a36Sopenharmony_ci			goto error0;
395862306a36Sopenharmony_ci#endif
395962306a36Sopenharmony_ci		/* Grab the current block number, for future use. */
396062306a36Sopenharmony_ci		xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
396162306a36Sopenharmony_ci
396262306a36Sopenharmony_ci		/*
396362306a36Sopenharmony_ci		 * If right block is full enough so that removing one entry
396462306a36Sopenharmony_ci		 * won't make it too empty, and left-shifting an entry out
396562306a36Sopenharmony_ci		 * of right to us works, we're done.
396662306a36Sopenharmony_ci		 */
396762306a36Sopenharmony_ci		if (xfs_btree_get_numrecs(right) - 1 >=
396862306a36Sopenharmony_ci		    cur->bc_ops->get_minrecs(tcur, level)) {
396962306a36Sopenharmony_ci			error = xfs_btree_lshift(tcur, level, &i);
397062306a36Sopenharmony_ci			if (error)
397162306a36Sopenharmony_ci				goto error0;
397262306a36Sopenharmony_ci			if (i) {
397362306a36Sopenharmony_ci				ASSERT(xfs_btree_get_numrecs(block) >=
397462306a36Sopenharmony_ci				       cur->bc_ops->get_minrecs(tcur, level));
397562306a36Sopenharmony_ci
397662306a36Sopenharmony_ci				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
397762306a36Sopenharmony_ci				tcur = NULL;
397862306a36Sopenharmony_ci
397962306a36Sopenharmony_ci				error = xfs_btree_dec_cursor(cur, level, stat);
398062306a36Sopenharmony_ci				if (error)
398162306a36Sopenharmony_ci					goto error0;
398262306a36Sopenharmony_ci				return 0;
398362306a36Sopenharmony_ci			}
398462306a36Sopenharmony_ci		}
398562306a36Sopenharmony_ci
398662306a36Sopenharmony_ci		/*
398762306a36Sopenharmony_ci		 * Otherwise, grab the number of records in right for
398862306a36Sopenharmony_ci		 * future reference, and fix up the temp cursor to point
398962306a36Sopenharmony_ci		 * to our block again (last record).
399062306a36Sopenharmony_ci		 */
399162306a36Sopenharmony_ci		rrecs = xfs_btree_get_numrecs(right);
399262306a36Sopenharmony_ci		if (!xfs_btree_ptr_is_null(cur, &lptr)) {
399362306a36Sopenharmony_ci			i = xfs_btree_firstrec(tcur, level);
399462306a36Sopenharmony_ci			if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
399562306a36Sopenharmony_ci				error = -EFSCORRUPTED;
399662306a36Sopenharmony_ci				goto error0;
399762306a36Sopenharmony_ci			}
399862306a36Sopenharmony_ci
399962306a36Sopenharmony_ci			error = xfs_btree_decrement(tcur, level, &i);
400062306a36Sopenharmony_ci			if (error)
400162306a36Sopenharmony_ci				goto error0;
400262306a36Sopenharmony_ci			if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
400362306a36Sopenharmony_ci				error = -EFSCORRUPTED;
400462306a36Sopenharmony_ci				goto error0;
400562306a36Sopenharmony_ci			}
400662306a36Sopenharmony_ci		}
400762306a36Sopenharmony_ci	}
400862306a36Sopenharmony_ci
400962306a36Sopenharmony_ci	/*
401062306a36Sopenharmony_ci	 * If there's a left sibling, see if it's ok to shift an entry
401162306a36Sopenharmony_ci	 * out of it.
401262306a36Sopenharmony_ci	 */
401362306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &lptr)) {
401462306a36Sopenharmony_ci		/*
401562306a36Sopenharmony_ci		 * Move the temp cursor to the first entry in the
401662306a36Sopenharmony_ci		 * previous block.
401762306a36Sopenharmony_ci		 */
401862306a36Sopenharmony_ci		i = xfs_btree_firstrec(tcur, level);
401962306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
402062306a36Sopenharmony_ci			error = -EFSCORRUPTED;
402162306a36Sopenharmony_ci			goto error0;
402262306a36Sopenharmony_ci		}
402362306a36Sopenharmony_ci
402462306a36Sopenharmony_ci		error = xfs_btree_decrement(tcur, level, &i);
402562306a36Sopenharmony_ci		if (error)
402662306a36Sopenharmony_ci			goto error0;
402762306a36Sopenharmony_ci		i = xfs_btree_firstrec(tcur, level);
402862306a36Sopenharmony_ci		if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
402962306a36Sopenharmony_ci			error = -EFSCORRUPTED;
403062306a36Sopenharmony_ci			goto error0;
403162306a36Sopenharmony_ci		}
403262306a36Sopenharmony_ci
403362306a36Sopenharmony_ci		/* Grab a pointer to the block. */
403462306a36Sopenharmony_ci		left = xfs_btree_get_block(tcur, level, &lbp);
403562306a36Sopenharmony_ci#ifdef DEBUG
403662306a36Sopenharmony_ci		error = xfs_btree_check_block(cur, left, level, lbp);
403762306a36Sopenharmony_ci		if (error)
403862306a36Sopenharmony_ci			goto error0;
403962306a36Sopenharmony_ci#endif
404062306a36Sopenharmony_ci		/* Grab the current block number, for future use. */
404162306a36Sopenharmony_ci		xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
404262306a36Sopenharmony_ci
404362306a36Sopenharmony_ci		/*
404462306a36Sopenharmony_ci		 * If left block is full enough so that removing one entry
404562306a36Sopenharmony_ci		 * won't make it too empty, and right-shifting an entry out
404662306a36Sopenharmony_ci		 * of left to us works, we're done.
404762306a36Sopenharmony_ci		 */
404862306a36Sopenharmony_ci		if (xfs_btree_get_numrecs(left) - 1 >=
404962306a36Sopenharmony_ci		    cur->bc_ops->get_minrecs(tcur, level)) {
405062306a36Sopenharmony_ci			error = xfs_btree_rshift(tcur, level, &i);
405162306a36Sopenharmony_ci			if (error)
405262306a36Sopenharmony_ci				goto error0;
405362306a36Sopenharmony_ci			if (i) {
405462306a36Sopenharmony_ci				ASSERT(xfs_btree_get_numrecs(block) >=
405562306a36Sopenharmony_ci				       cur->bc_ops->get_minrecs(tcur, level));
405662306a36Sopenharmony_ci				xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
405762306a36Sopenharmony_ci				tcur = NULL;
405862306a36Sopenharmony_ci				if (level == 0)
405962306a36Sopenharmony_ci					cur->bc_levels[0].ptr++;
406062306a36Sopenharmony_ci
406162306a36Sopenharmony_ci				*stat = 1;
406262306a36Sopenharmony_ci				return 0;
406362306a36Sopenharmony_ci			}
406462306a36Sopenharmony_ci		}
406562306a36Sopenharmony_ci
406662306a36Sopenharmony_ci		/*
406762306a36Sopenharmony_ci		 * Otherwise, grab the number of records in right for
406862306a36Sopenharmony_ci		 * future reference.
406962306a36Sopenharmony_ci		 */
407062306a36Sopenharmony_ci		lrecs = xfs_btree_get_numrecs(left);
407162306a36Sopenharmony_ci	}
407262306a36Sopenharmony_ci
407362306a36Sopenharmony_ci	/* Delete the temp cursor, we're done with it. */
407462306a36Sopenharmony_ci	xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
407562306a36Sopenharmony_ci	tcur = NULL;
407662306a36Sopenharmony_ci
407762306a36Sopenharmony_ci	/* If here, we need to do a join to keep the tree balanced. */
407862306a36Sopenharmony_ci	ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
407962306a36Sopenharmony_ci
408062306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &lptr) &&
408162306a36Sopenharmony_ci	    lrecs + xfs_btree_get_numrecs(block) <=
408262306a36Sopenharmony_ci			cur->bc_ops->get_maxrecs(cur, level)) {
408362306a36Sopenharmony_ci		/*
408462306a36Sopenharmony_ci		 * Set "right" to be the starting block,
408562306a36Sopenharmony_ci		 * "left" to be the left neighbor.
408662306a36Sopenharmony_ci		 */
408762306a36Sopenharmony_ci		rptr = cptr;
408862306a36Sopenharmony_ci		right = block;
408962306a36Sopenharmony_ci		rbp = bp;
409062306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);
409162306a36Sopenharmony_ci		if (error)
409262306a36Sopenharmony_ci			goto error0;
409362306a36Sopenharmony_ci
409462306a36Sopenharmony_ci	/*
409562306a36Sopenharmony_ci	 * If that won't work, see if we can join with the right neighbor block.
409662306a36Sopenharmony_ci	 */
409762306a36Sopenharmony_ci	} else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
409862306a36Sopenharmony_ci		   rrecs + xfs_btree_get_numrecs(block) <=
409962306a36Sopenharmony_ci			cur->bc_ops->get_maxrecs(cur, level)) {
410062306a36Sopenharmony_ci		/*
410162306a36Sopenharmony_ci		 * Set "left" to be the starting block,
410262306a36Sopenharmony_ci		 * "right" to be the right neighbor.
410362306a36Sopenharmony_ci		 */
410462306a36Sopenharmony_ci		lptr = cptr;
410562306a36Sopenharmony_ci		left = block;
410662306a36Sopenharmony_ci		lbp = bp;
410762306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);
410862306a36Sopenharmony_ci		if (error)
410962306a36Sopenharmony_ci			goto error0;
411062306a36Sopenharmony_ci
411162306a36Sopenharmony_ci	/*
411262306a36Sopenharmony_ci	 * Otherwise, we can't fix the imbalance.
411362306a36Sopenharmony_ci	 * Just return.  This is probably a logic error, but it's not fatal.
411462306a36Sopenharmony_ci	 */
411562306a36Sopenharmony_ci	} else {
411662306a36Sopenharmony_ci		error = xfs_btree_dec_cursor(cur, level, stat);
411762306a36Sopenharmony_ci		if (error)
411862306a36Sopenharmony_ci			goto error0;
411962306a36Sopenharmony_ci		return 0;
412062306a36Sopenharmony_ci	}
412162306a36Sopenharmony_ci
412262306a36Sopenharmony_ci	rrecs = xfs_btree_get_numrecs(right);
412362306a36Sopenharmony_ci	lrecs = xfs_btree_get_numrecs(left);
412462306a36Sopenharmony_ci
412562306a36Sopenharmony_ci	/*
412662306a36Sopenharmony_ci	 * We're now going to join "left" and "right" by moving all the stuff
412762306a36Sopenharmony_ci	 * in "right" to "left" and deleting "right".
412862306a36Sopenharmony_ci	 */
412962306a36Sopenharmony_ci	XFS_BTREE_STATS_ADD(cur, moves, rrecs);
413062306a36Sopenharmony_ci	if (level > 0) {
413162306a36Sopenharmony_ci		/* It's a non-leaf.  Move keys and pointers. */
413262306a36Sopenharmony_ci		union xfs_btree_key	*lkp;	/* left btree key */
413362306a36Sopenharmony_ci		union xfs_btree_ptr	*lpp;	/* left address pointer */
413462306a36Sopenharmony_ci		union xfs_btree_key	*rkp;	/* right btree key */
413562306a36Sopenharmony_ci		union xfs_btree_ptr	*rpp;	/* right address pointer */
413662306a36Sopenharmony_ci
413762306a36Sopenharmony_ci		lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
413862306a36Sopenharmony_ci		lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
413962306a36Sopenharmony_ci		rkp = xfs_btree_key_addr(cur, 1, right);
414062306a36Sopenharmony_ci		rpp = xfs_btree_ptr_addr(cur, 1, right);
414162306a36Sopenharmony_ci
414262306a36Sopenharmony_ci		for (i = 1; i < rrecs; i++) {
414362306a36Sopenharmony_ci			error = xfs_btree_debug_check_ptr(cur, rpp, i, level);
414462306a36Sopenharmony_ci			if (error)
414562306a36Sopenharmony_ci				goto error0;
414662306a36Sopenharmony_ci		}
414762306a36Sopenharmony_ci
414862306a36Sopenharmony_ci		xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
414962306a36Sopenharmony_ci		xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
415062306a36Sopenharmony_ci
415162306a36Sopenharmony_ci		xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
415262306a36Sopenharmony_ci		xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
415362306a36Sopenharmony_ci	} else {
415462306a36Sopenharmony_ci		/* It's a leaf.  Move records.  */
415562306a36Sopenharmony_ci		union xfs_btree_rec	*lrp;	/* left record pointer */
415662306a36Sopenharmony_ci		union xfs_btree_rec	*rrp;	/* right record pointer */
415762306a36Sopenharmony_ci
415862306a36Sopenharmony_ci		lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
415962306a36Sopenharmony_ci		rrp = xfs_btree_rec_addr(cur, 1, right);
416062306a36Sopenharmony_ci
416162306a36Sopenharmony_ci		xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
416262306a36Sopenharmony_ci		xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
416362306a36Sopenharmony_ci	}
416462306a36Sopenharmony_ci
416562306a36Sopenharmony_ci	XFS_BTREE_STATS_INC(cur, join);
416662306a36Sopenharmony_ci
416762306a36Sopenharmony_ci	/*
416862306a36Sopenharmony_ci	 * Fix up the number of records and right block pointer in the
416962306a36Sopenharmony_ci	 * surviving block, and log it.
417062306a36Sopenharmony_ci	 */
417162306a36Sopenharmony_ci	xfs_btree_set_numrecs(left, lrecs + rrecs);
417262306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
417362306a36Sopenharmony_ci	xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
417462306a36Sopenharmony_ci	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
417562306a36Sopenharmony_ci
417662306a36Sopenharmony_ci	/* If there is a right sibling, point it to the remaining block. */
417762306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
417862306a36Sopenharmony_ci	if (!xfs_btree_ptr_is_null(cur, &cptr)) {
417962306a36Sopenharmony_ci		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);
418062306a36Sopenharmony_ci		if (error)
418162306a36Sopenharmony_ci			goto error0;
418262306a36Sopenharmony_ci		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
418362306a36Sopenharmony_ci		xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
418462306a36Sopenharmony_ci	}
418562306a36Sopenharmony_ci
418662306a36Sopenharmony_ci	/* Free the deleted block. */
418762306a36Sopenharmony_ci	error = xfs_btree_free_block(cur, rbp);
418862306a36Sopenharmony_ci	if (error)
418962306a36Sopenharmony_ci		goto error0;
419062306a36Sopenharmony_ci
419162306a36Sopenharmony_ci	/*
419262306a36Sopenharmony_ci	 * If we joined with the left neighbor, set the buffer in the
419362306a36Sopenharmony_ci	 * cursor to the left block, and fix up the index.
419462306a36Sopenharmony_ci	 */
419562306a36Sopenharmony_ci	if (bp != lbp) {
419662306a36Sopenharmony_ci		cur->bc_levels[level].bp = lbp;
419762306a36Sopenharmony_ci		cur->bc_levels[level].ptr += lrecs;
419862306a36Sopenharmony_ci		cur->bc_levels[level].ra = 0;
419962306a36Sopenharmony_ci	}
420062306a36Sopenharmony_ci	/*
420162306a36Sopenharmony_ci	 * If we joined with the right neighbor and there's a level above
420262306a36Sopenharmony_ci	 * us, increment the cursor at that level.
420362306a36Sopenharmony_ci	 */
420462306a36Sopenharmony_ci	else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
420562306a36Sopenharmony_ci		   (level + 1 < cur->bc_nlevels)) {
420662306a36Sopenharmony_ci		error = xfs_btree_increment(cur, level + 1, &i);
420762306a36Sopenharmony_ci		if (error)
420862306a36Sopenharmony_ci			goto error0;
420962306a36Sopenharmony_ci	}
421062306a36Sopenharmony_ci
421162306a36Sopenharmony_ci	/*
421262306a36Sopenharmony_ci	 * Readjust the ptr at this level if it's not a leaf, since it's
421362306a36Sopenharmony_ci	 * still pointing at the deletion point, which makes the cursor
421462306a36Sopenharmony_ci	 * inconsistent.  If this makes the ptr 0, the caller fixes it up.
421562306a36Sopenharmony_ci	 * We can't use decrement because it would change the next level up.
421662306a36Sopenharmony_ci	 */
421762306a36Sopenharmony_ci	if (level > 0)
421862306a36Sopenharmony_ci		cur->bc_levels[level].ptr--;
421962306a36Sopenharmony_ci
422062306a36Sopenharmony_ci	/*
422162306a36Sopenharmony_ci	 * We combined blocks, so we have to update the parent keys if the
422262306a36Sopenharmony_ci	 * btree supports overlapped intervals.  However,
422362306a36Sopenharmony_ci	 * bc_levels[level + 1].ptr points to the old block so that the caller
422462306a36Sopenharmony_ci	 * knows which record to delete.  Therefore, the caller must be savvy
422562306a36Sopenharmony_ci	 * enough to call updkeys for us if we return stat == 2.  The other
422662306a36Sopenharmony_ci	 * exit points from this function don't require deletions further up
422762306a36Sopenharmony_ci	 * the tree, so they can call updkeys directly.
422862306a36Sopenharmony_ci	 */
422962306a36Sopenharmony_ci
423062306a36Sopenharmony_ci	/* Return value means the next level up has something to do. */
423162306a36Sopenharmony_ci	*stat = 2;
423262306a36Sopenharmony_ci	return 0;
423362306a36Sopenharmony_ci
423462306a36Sopenharmony_cierror0:
423562306a36Sopenharmony_ci	if (tcur)
423662306a36Sopenharmony_ci		xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
423762306a36Sopenharmony_ci	return error;
423862306a36Sopenharmony_ci}
423962306a36Sopenharmony_ci
424062306a36Sopenharmony_ci/*
424162306a36Sopenharmony_ci * Delete the record pointed to by cur.
424262306a36Sopenharmony_ci * The cursor refers to the place where the record was (could be inserted)
424362306a36Sopenharmony_ci * when the operation returns.
424462306a36Sopenharmony_ci */
424562306a36Sopenharmony_ciint					/* error */
424662306a36Sopenharmony_cixfs_btree_delete(
424762306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
424862306a36Sopenharmony_ci	int			*stat)	/* success/failure */
424962306a36Sopenharmony_ci{
425062306a36Sopenharmony_ci	int			error;	/* error return value */
425162306a36Sopenharmony_ci	int			level;
425262306a36Sopenharmony_ci	int			i;
425362306a36Sopenharmony_ci	bool			joined = false;
425462306a36Sopenharmony_ci
425562306a36Sopenharmony_ci	/*
425662306a36Sopenharmony_ci	 * Go up the tree, starting at leaf level.
425762306a36Sopenharmony_ci	 *
425862306a36Sopenharmony_ci	 * If 2 is returned then a join was done; go to the next level.
425962306a36Sopenharmony_ci	 * Otherwise we are done.
426062306a36Sopenharmony_ci	 */
426162306a36Sopenharmony_ci	for (level = 0, i = 2; i == 2; level++) {
426262306a36Sopenharmony_ci		error = xfs_btree_delrec(cur, level, &i);
426362306a36Sopenharmony_ci		if (error)
426462306a36Sopenharmony_ci			goto error0;
426562306a36Sopenharmony_ci		if (i == 2)
426662306a36Sopenharmony_ci			joined = true;
426762306a36Sopenharmony_ci	}
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_ci	/*
427062306a36Sopenharmony_ci	 * If we combined blocks as part of deleting the record, delrec won't
427162306a36Sopenharmony_ci	 * have updated the parent high keys so we have to do that here.
427262306a36Sopenharmony_ci	 */
427362306a36Sopenharmony_ci	if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) {
427462306a36Sopenharmony_ci		error = xfs_btree_updkeys_force(cur, 0);
427562306a36Sopenharmony_ci		if (error)
427662306a36Sopenharmony_ci			goto error0;
427762306a36Sopenharmony_ci	}
427862306a36Sopenharmony_ci
427962306a36Sopenharmony_ci	if (i == 0) {
428062306a36Sopenharmony_ci		for (level = 1; level < cur->bc_nlevels; level++) {
428162306a36Sopenharmony_ci			if (cur->bc_levels[level].ptr == 0) {
428262306a36Sopenharmony_ci				error = xfs_btree_decrement(cur, level, &i);
428362306a36Sopenharmony_ci				if (error)
428462306a36Sopenharmony_ci					goto error0;
428562306a36Sopenharmony_ci				break;
428662306a36Sopenharmony_ci			}
428762306a36Sopenharmony_ci		}
428862306a36Sopenharmony_ci	}
428962306a36Sopenharmony_ci
429062306a36Sopenharmony_ci	*stat = i;
429162306a36Sopenharmony_ci	return 0;
429262306a36Sopenharmony_cierror0:
429362306a36Sopenharmony_ci	return error;
429462306a36Sopenharmony_ci}
429562306a36Sopenharmony_ci
429662306a36Sopenharmony_ci/*
429762306a36Sopenharmony_ci * Get the data from the pointed-to record.
429862306a36Sopenharmony_ci */
429962306a36Sopenharmony_ciint					/* error */
430062306a36Sopenharmony_cixfs_btree_get_rec(
430162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,	/* btree cursor */
430262306a36Sopenharmony_ci	union xfs_btree_rec	**recp,	/* output: btree record */
430362306a36Sopenharmony_ci	int			*stat)	/* output: success/failure */
430462306a36Sopenharmony_ci{
430562306a36Sopenharmony_ci	struct xfs_btree_block	*block;	/* btree block */
430662306a36Sopenharmony_ci	struct xfs_buf		*bp;	/* buffer pointer */
430762306a36Sopenharmony_ci	int			ptr;	/* record number */
430862306a36Sopenharmony_ci#ifdef DEBUG
430962306a36Sopenharmony_ci	int			error;	/* error return value */
431062306a36Sopenharmony_ci#endif
431162306a36Sopenharmony_ci
431262306a36Sopenharmony_ci	ptr = cur->bc_levels[0].ptr;
431362306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, 0, &bp);
431462306a36Sopenharmony_ci
431562306a36Sopenharmony_ci#ifdef DEBUG
431662306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, 0, bp);
431762306a36Sopenharmony_ci	if (error)
431862306a36Sopenharmony_ci		return error;
431962306a36Sopenharmony_ci#endif
432062306a36Sopenharmony_ci
432162306a36Sopenharmony_ci	/*
432262306a36Sopenharmony_ci	 * Off the right end or left end, return failure.
432362306a36Sopenharmony_ci	 */
432462306a36Sopenharmony_ci	if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
432562306a36Sopenharmony_ci		*stat = 0;
432662306a36Sopenharmony_ci		return 0;
432762306a36Sopenharmony_ci	}
432862306a36Sopenharmony_ci
432962306a36Sopenharmony_ci	/*
433062306a36Sopenharmony_ci	 * Point to the record and extract its data.
433162306a36Sopenharmony_ci	 */
433262306a36Sopenharmony_ci	*recp = xfs_btree_rec_addr(cur, ptr, block);
433362306a36Sopenharmony_ci	*stat = 1;
433462306a36Sopenharmony_ci	return 0;
433562306a36Sopenharmony_ci}
433662306a36Sopenharmony_ci
433762306a36Sopenharmony_ci/* Visit a block in a btree. */
433862306a36Sopenharmony_ciSTATIC int
433962306a36Sopenharmony_cixfs_btree_visit_block(
434062306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
434162306a36Sopenharmony_ci	int				level,
434262306a36Sopenharmony_ci	xfs_btree_visit_blocks_fn	fn,
434362306a36Sopenharmony_ci	void				*data)
434462306a36Sopenharmony_ci{
434562306a36Sopenharmony_ci	struct xfs_btree_block		*block;
434662306a36Sopenharmony_ci	struct xfs_buf			*bp;
434762306a36Sopenharmony_ci	union xfs_btree_ptr		rptr;
434862306a36Sopenharmony_ci	int				error;
434962306a36Sopenharmony_ci
435062306a36Sopenharmony_ci	/* do right sibling readahead */
435162306a36Sopenharmony_ci	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
435262306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
435362306a36Sopenharmony_ci
435462306a36Sopenharmony_ci	/* process the block */
435562306a36Sopenharmony_ci	error = fn(cur, level, data);
435662306a36Sopenharmony_ci	if (error)
435762306a36Sopenharmony_ci		return error;
435862306a36Sopenharmony_ci
435962306a36Sopenharmony_ci	/* now read rh sibling block for next iteration */
436062306a36Sopenharmony_ci	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
436162306a36Sopenharmony_ci	if (xfs_btree_ptr_is_null(cur, &rptr))
436262306a36Sopenharmony_ci		return -ENOENT;
436362306a36Sopenharmony_ci
436462306a36Sopenharmony_ci	/*
436562306a36Sopenharmony_ci	 * We only visit blocks once in this walk, so we have to avoid the
436662306a36Sopenharmony_ci	 * internal xfs_btree_lookup_get_block() optimisation where it will
436762306a36Sopenharmony_ci	 * return the same block without checking if the right sibling points
436862306a36Sopenharmony_ci	 * back to us and creates a cyclic reference in the btree.
436962306a36Sopenharmony_ci	 */
437062306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
437162306a36Sopenharmony_ci		if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp,
437262306a36Sopenharmony_ci							xfs_buf_daddr(bp)))
437362306a36Sopenharmony_ci			return -EFSCORRUPTED;
437462306a36Sopenharmony_ci	} else {
437562306a36Sopenharmony_ci		if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp,
437662306a36Sopenharmony_ci							xfs_buf_daddr(bp)))
437762306a36Sopenharmony_ci			return -EFSCORRUPTED;
437862306a36Sopenharmony_ci	}
437962306a36Sopenharmony_ci	return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
438062306a36Sopenharmony_ci}
438162306a36Sopenharmony_ci
438262306a36Sopenharmony_ci
438362306a36Sopenharmony_ci/* Visit every block in a btree. */
438462306a36Sopenharmony_ciint
438562306a36Sopenharmony_cixfs_btree_visit_blocks(
438662306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
438762306a36Sopenharmony_ci	xfs_btree_visit_blocks_fn	fn,
438862306a36Sopenharmony_ci	unsigned int			flags,
438962306a36Sopenharmony_ci	void				*data)
439062306a36Sopenharmony_ci{
439162306a36Sopenharmony_ci	union xfs_btree_ptr		lptr;
439262306a36Sopenharmony_ci	int				level;
439362306a36Sopenharmony_ci	struct xfs_btree_block		*block = NULL;
439462306a36Sopenharmony_ci	int				error = 0;
439562306a36Sopenharmony_ci
439662306a36Sopenharmony_ci	cur->bc_ops->init_ptr_from_cur(cur, &lptr);
439762306a36Sopenharmony_ci
439862306a36Sopenharmony_ci	/* for each level */
439962306a36Sopenharmony_ci	for (level = cur->bc_nlevels - 1; level >= 0; level--) {
440062306a36Sopenharmony_ci		/* grab the left hand block */
440162306a36Sopenharmony_ci		error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
440262306a36Sopenharmony_ci		if (error)
440362306a36Sopenharmony_ci			return error;
440462306a36Sopenharmony_ci
440562306a36Sopenharmony_ci		/* readahead the left most block for the next level down */
440662306a36Sopenharmony_ci		if (level > 0) {
440762306a36Sopenharmony_ci			union xfs_btree_ptr     *ptr;
440862306a36Sopenharmony_ci
440962306a36Sopenharmony_ci			ptr = xfs_btree_ptr_addr(cur, 1, block);
441062306a36Sopenharmony_ci			xfs_btree_readahead_ptr(cur, ptr, 1);
441162306a36Sopenharmony_ci
441262306a36Sopenharmony_ci			/* save for the next iteration of the loop */
441362306a36Sopenharmony_ci			xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
441462306a36Sopenharmony_ci
441562306a36Sopenharmony_ci			if (!(flags & XFS_BTREE_VISIT_LEAVES))
441662306a36Sopenharmony_ci				continue;
441762306a36Sopenharmony_ci		} else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
441862306a36Sopenharmony_ci			continue;
441962306a36Sopenharmony_ci		}
442062306a36Sopenharmony_ci
442162306a36Sopenharmony_ci		/* for each buffer in the level */
442262306a36Sopenharmony_ci		do {
442362306a36Sopenharmony_ci			error = xfs_btree_visit_block(cur, level, fn, data);
442462306a36Sopenharmony_ci		} while (!error);
442562306a36Sopenharmony_ci
442662306a36Sopenharmony_ci		if (error != -ENOENT)
442762306a36Sopenharmony_ci			return error;
442862306a36Sopenharmony_ci	}
442962306a36Sopenharmony_ci
443062306a36Sopenharmony_ci	return 0;
443162306a36Sopenharmony_ci}
443262306a36Sopenharmony_ci
443362306a36Sopenharmony_ci/*
443462306a36Sopenharmony_ci * Change the owner of a btree.
443562306a36Sopenharmony_ci *
443662306a36Sopenharmony_ci * The mechanism we use here is ordered buffer logging. Because we don't know
443762306a36Sopenharmony_ci * how many buffers were are going to need to modify, we don't really want to
443862306a36Sopenharmony_ci * have to make transaction reservations for the worst case of every buffer in a
443962306a36Sopenharmony_ci * full size btree as that may be more space that we can fit in the log....
444062306a36Sopenharmony_ci *
444162306a36Sopenharmony_ci * We do the btree walk in the most optimal manner possible - we have sibling
444262306a36Sopenharmony_ci * pointers so we can just walk all the blocks on each level from left to right
444362306a36Sopenharmony_ci * in a single pass, and then move to the next level and do the same. We can
444462306a36Sopenharmony_ci * also do readahead on the sibling pointers to get IO moving more quickly,
444562306a36Sopenharmony_ci * though for slow disks this is unlikely to make much difference to performance
444662306a36Sopenharmony_ci * as the amount of CPU work we have to do before moving to the next block is
444762306a36Sopenharmony_ci * relatively small.
444862306a36Sopenharmony_ci *
444962306a36Sopenharmony_ci * For each btree block that we load, modify the owner appropriately, set the
445062306a36Sopenharmony_ci * buffer as an ordered buffer and log it appropriately. We need to ensure that
445162306a36Sopenharmony_ci * we mark the region we change dirty so that if the buffer is relogged in
445262306a36Sopenharmony_ci * a subsequent transaction the changes we make here as an ordered buffer are
445362306a36Sopenharmony_ci * correctly relogged in that transaction.  If we are in recovery context, then
445462306a36Sopenharmony_ci * just queue the modified buffer as delayed write buffer so the transaction
445562306a36Sopenharmony_ci * recovery completion writes the changes to disk.
445662306a36Sopenharmony_ci */
445762306a36Sopenharmony_cistruct xfs_btree_block_change_owner_info {
445862306a36Sopenharmony_ci	uint64_t		new_owner;
445962306a36Sopenharmony_ci	struct list_head	*buffer_list;
446062306a36Sopenharmony_ci};
446162306a36Sopenharmony_ci
446262306a36Sopenharmony_cistatic int
446362306a36Sopenharmony_cixfs_btree_block_change_owner(
446462306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
446562306a36Sopenharmony_ci	int			level,
446662306a36Sopenharmony_ci	void			*data)
446762306a36Sopenharmony_ci{
446862306a36Sopenharmony_ci	struct xfs_btree_block_change_owner_info	*bbcoi = data;
446962306a36Sopenharmony_ci	struct xfs_btree_block	*block;
447062306a36Sopenharmony_ci	struct xfs_buf		*bp;
447162306a36Sopenharmony_ci
447262306a36Sopenharmony_ci	/* modify the owner */
447362306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, level, &bp);
447462306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
447562306a36Sopenharmony_ci		if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner))
447662306a36Sopenharmony_ci			return 0;
447762306a36Sopenharmony_ci		block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner);
447862306a36Sopenharmony_ci	} else {
447962306a36Sopenharmony_ci		if (block->bb_u.s.bb_owner == cpu_to_be32(bbcoi->new_owner))
448062306a36Sopenharmony_ci			return 0;
448162306a36Sopenharmony_ci		block->bb_u.s.bb_owner = cpu_to_be32(bbcoi->new_owner);
448262306a36Sopenharmony_ci	}
448362306a36Sopenharmony_ci
448462306a36Sopenharmony_ci	/*
448562306a36Sopenharmony_ci	 * If the block is a root block hosted in an inode, we might not have a
448662306a36Sopenharmony_ci	 * buffer pointer here and we shouldn't attempt to log the change as the
448762306a36Sopenharmony_ci	 * information is already held in the inode and discarded when the root
448862306a36Sopenharmony_ci	 * block is formatted into the on-disk inode fork. We still change it,
448962306a36Sopenharmony_ci	 * though, so everything is consistent in memory.
449062306a36Sopenharmony_ci	 */
449162306a36Sopenharmony_ci	if (!bp) {
449262306a36Sopenharmony_ci		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
449362306a36Sopenharmony_ci		ASSERT(level == cur->bc_nlevels - 1);
449462306a36Sopenharmony_ci		return 0;
449562306a36Sopenharmony_ci	}
449662306a36Sopenharmony_ci
449762306a36Sopenharmony_ci	if (cur->bc_tp) {
449862306a36Sopenharmony_ci		if (!xfs_trans_ordered_buf(cur->bc_tp, bp)) {
449962306a36Sopenharmony_ci			xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
450062306a36Sopenharmony_ci			return -EAGAIN;
450162306a36Sopenharmony_ci		}
450262306a36Sopenharmony_ci	} else {
450362306a36Sopenharmony_ci		xfs_buf_delwri_queue(bp, bbcoi->buffer_list);
450462306a36Sopenharmony_ci	}
450562306a36Sopenharmony_ci
450662306a36Sopenharmony_ci	return 0;
450762306a36Sopenharmony_ci}
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ciint
451062306a36Sopenharmony_cixfs_btree_change_owner(
451162306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
451262306a36Sopenharmony_ci	uint64_t		new_owner,
451362306a36Sopenharmony_ci	struct list_head	*buffer_list)
451462306a36Sopenharmony_ci{
451562306a36Sopenharmony_ci	struct xfs_btree_block_change_owner_info	bbcoi;
451662306a36Sopenharmony_ci
451762306a36Sopenharmony_ci	bbcoi.new_owner = new_owner;
451862306a36Sopenharmony_ci	bbcoi.buffer_list = buffer_list;
451962306a36Sopenharmony_ci
452062306a36Sopenharmony_ci	return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
452162306a36Sopenharmony_ci			XFS_BTREE_VISIT_ALL, &bbcoi);
452262306a36Sopenharmony_ci}
452362306a36Sopenharmony_ci
452462306a36Sopenharmony_ci/* Verify the v5 fields of a long-format btree block. */
452562306a36Sopenharmony_cixfs_failaddr_t
452662306a36Sopenharmony_cixfs_btree_lblock_v5hdr_verify(
452762306a36Sopenharmony_ci	struct xfs_buf		*bp,
452862306a36Sopenharmony_ci	uint64_t		owner)
452962306a36Sopenharmony_ci{
453062306a36Sopenharmony_ci	struct xfs_mount	*mp = bp->b_mount;
453162306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
453262306a36Sopenharmony_ci
453362306a36Sopenharmony_ci	if (!xfs_has_crc(mp))
453462306a36Sopenharmony_ci		return __this_address;
453562306a36Sopenharmony_ci	if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid))
453662306a36Sopenharmony_ci		return __this_address;
453762306a36Sopenharmony_ci	if (block->bb_u.l.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
453862306a36Sopenharmony_ci		return __this_address;
453962306a36Sopenharmony_ci	if (owner != XFS_RMAP_OWN_UNKNOWN &&
454062306a36Sopenharmony_ci	    be64_to_cpu(block->bb_u.l.bb_owner) != owner)
454162306a36Sopenharmony_ci		return __this_address;
454262306a36Sopenharmony_ci	return NULL;
454362306a36Sopenharmony_ci}
454462306a36Sopenharmony_ci
454562306a36Sopenharmony_ci/* Verify a long-format btree block. */
454662306a36Sopenharmony_cixfs_failaddr_t
454762306a36Sopenharmony_cixfs_btree_lblock_verify(
454862306a36Sopenharmony_ci	struct xfs_buf		*bp,
454962306a36Sopenharmony_ci	unsigned int		max_recs)
455062306a36Sopenharmony_ci{
455162306a36Sopenharmony_ci	struct xfs_mount	*mp = bp->b_mount;
455262306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
455362306a36Sopenharmony_ci	xfs_fsblock_t		fsb;
455462306a36Sopenharmony_ci	xfs_failaddr_t		fa;
455562306a36Sopenharmony_ci
455662306a36Sopenharmony_ci	/* numrecs verification */
455762306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_numrecs) > max_recs)
455862306a36Sopenharmony_ci		return __this_address;
455962306a36Sopenharmony_ci
456062306a36Sopenharmony_ci	/* sibling pointer verification */
456162306a36Sopenharmony_ci	fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
456262306a36Sopenharmony_ci	fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
456362306a36Sopenharmony_ci			block->bb_u.l.bb_leftsib);
456462306a36Sopenharmony_ci	if (!fa)
456562306a36Sopenharmony_ci		fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb,
456662306a36Sopenharmony_ci				block->bb_u.l.bb_rightsib);
456762306a36Sopenharmony_ci	return fa;
456862306a36Sopenharmony_ci}
456962306a36Sopenharmony_ci
457062306a36Sopenharmony_ci/**
457162306a36Sopenharmony_ci * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
457262306a36Sopenharmony_ci *				      btree block
457362306a36Sopenharmony_ci *
457462306a36Sopenharmony_ci * @bp: buffer containing the btree block
457562306a36Sopenharmony_ci */
457662306a36Sopenharmony_cixfs_failaddr_t
457762306a36Sopenharmony_cixfs_btree_sblock_v5hdr_verify(
457862306a36Sopenharmony_ci	struct xfs_buf		*bp)
457962306a36Sopenharmony_ci{
458062306a36Sopenharmony_ci	struct xfs_mount	*mp = bp->b_mount;
458162306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
458262306a36Sopenharmony_ci	struct xfs_perag	*pag = bp->b_pag;
458362306a36Sopenharmony_ci
458462306a36Sopenharmony_ci	if (!xfs_has_crc(mp))
458562306a36Sopenharmony_ci		return __this_address;
458662306a36Sopenharmony_ci	if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
458762306a36Sopenharmony_ci		return __this_address;
458862306a36Sopenharmony_ci	if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
458962306a36Sopenharmony_ci		return __this_address;
459062306a36Sopenharmony_ci	if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
459162306a36Sopenharmony_ci		return __this_address;
459262306a36Sopenharmony_ci	return NULL;
459362306a36Sopenharmony_ci}
459462306a36Sopenharmony_ci
459562306a36Sopenharmony_ci/**
459662306a36Sopenharmony_ci * xfs_btree_sblock_verify() -- verify a short-format btree block
459762306a36Sopenharmony_ci *
459862306a36Sopenharmony_ci * @bp: buffer containing the btree block
459962306a36Sopenharmony_ci * @max_recs: maximum records allowed in this btree node
460062306a36Sopenharmony_ci */
460162306a36Sopenharmony_cixfs_failaddr_t
460262306a36Sopenharmony_cixfs_btree_sblock_verify(
460362306a36Sopenharmony_ci	struct xfs_buf		*bp,
460462306a36Sopenharmony_ci	unsigned int		max_recs)
460562306a36Sopenharmony_ci{
460662306a36Sopenharmony_ci	struct xfs_mount	*mp = bp->b_mount;
460762306a36Sopenharmony_ci	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
460862306a36Sopenharmony_ci	xfs_agblock_t		agbno;
460962306a36Sopenharmony_ci	xfs_failaddr_t		fa;
461062306a36Sopenharmony_ci
461162306a36Sopenharmony_ci	/* numrecs verification */
461262306a36Sopenharmony_ci	if (be16_to_cpu(block->bb_numrecs) > max_recs)
461362306a36Sopenharmony_ci		return __this_address;
461462306a36Sopenharmony_ci
461562306a36Sopenharmony_ci	/* sibling pointer verification */
461662306a36Sopenharmony_ci	agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp));
461762306a36Sopenharmony_ci	fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
461862306a36Sopenharmony_ci			block->bb_u.s.bb_leftsib);
461962306a36Sopenharmony_ci	if (!fa)
462062306a36Sopenharmony_ci		fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno,
462162306a36Sopenharmony_ci				block->bb_u.s.bb_rightsib);
462262306a36Sopenharmony_ci	return fa;
462362306a36Sopenharmony_ci}
462462306a36Sopenharmony_ci
462562306a36Sopenharmony_ci/*
462662306a36Sopenharmony_ci * For the given limits on leaf and keyptr records per block, calculate the
462762306a36Sopenharmony_ci * height of the tree needed to index the number of leaf records.
462862306a36Sopenharmony_ci */
462962306a36Sopenharmony_ciunsigned int
463062306a36Sopenharmony_cixfs_btree_compute_maxlevels(
463162306a36Sopenharmony_ci	const unsigned int	*limits,
463262306a36Sopenharmony_ci	unsigned long long	records)
463362306a36Sopenharmony_ci{
463462306a36Sopenharmony_ci	unsigned long long	level_blocks = howmany_64(records, limits[0]);
463562306a36Sopenharmony_ci	unsigned int		height = 1;
463662306a36Sopenharmony_ci
463762306a36Sopenharmony_ci	while (level_blocks > 1) {
463862306a36Sopenharmony_ci		level_blocks = howmany_64(level_blocks, limits[1]);
463962306a36Sopenharmony_ci		height++;
464062306a36Sopenharmony_ci	}
464162306a36Sopenharmony_ci
464262306a36Sopenharmony_ci	return height;
464362306a36Sopenharmony_ci}
464462306a36Sopenharmony_ci
464562306a36Sopenharmony_ci/*
464662306a36Sopenharmony_ci * For the given limits on leaf and keyptr records per block, calculate the
464762306a36Sopenharmony_ci * number of blocks needed to index the given number of leaf records.
464862306a36Sopenharmony_ci */
464962306a36Sopenharmony_ciunsigned long long
465062306a36Sopenharmony_cixfs_btree_calc_size(
465162306a36Sopenharmony_ci	const unsigned int	*limits,
465262306a36Sopenharmony_ci	unsigned long long	records)
465362306a36Sopenharmony_ci{
465462306a36Sopenharmony_ci	unsigned long long	level_blocks = howmany_64(records, limits[0]);
465562306a36Sopenharmony_ci	unsigned long long	blocks = level_blocks;
465662306a36Sopenharmony_ci
465762306a36Sopenharmony_ci	while (level_blocks > 1) {
465862306a36Sopenharmony_ci		level_blocks = howmany_64(level_blocks, limits[1]);
465962306a36Sopenharmony_ci		blocks += level_blocks;
466062306a36Sopenharmony_ci	}
466162306a36Sopenharmony_ci
466262306a36Sopenharmony_ci	return blocks;
466362306a36Sopenharmony_ci}
466462306a36Sopenharmony_ci
466562306a36Sopenharmony_ci/*
466662306a36Sopenharmony_ci * Given a number of available blocks for the btree to consume with records and
466762306a36Sopenharmony_ci * pointers, calculate the height of the tree needed to index all the records
466862306a36Sopenharmony_ci * that space can hold based on the number of pointers each interior node
466962306a36Sopenharmony_ci * holds.
467062306a36Sopenharmony_ci *
467162306a36Sopenharmony_ci * We start by assuming a single level tree consumes a single block, then track
467262306a36Sopenharmony_ci * the number of blocks each node level consumes until we no longer have space
467362306a36Sopenharmony_ci * to store the next node level. At this point, we are indexing all the leaf
467462306a36Sopenharmony_ci * blocks in the space, and there's no more free space to split the tree any
467562306a36Sopenharmony_ci * further. That's our maximum btree height.
467662306a36Sopenharmony_ci */
467762306a36Sopenharmony_ciunsigned int
467862306a36Sopenharmony_cixfs_btree_space_to_height(
467962306a36Sopenharmony_ci	const unsigned int	*limits,
468062306a36Sopenharmony_ci	unsigned long long	leaf_blocks)
468162306a36Sopenharmony_ci{
468262306a36Sopenharmony_ci	/*
468362306a36Sopenharmony_ci	 * The root btree block can have fewer than minrecs pointers in it
468462306a36Sopenharmony_ci	 * because the tree might not be big enough to require that amount of
468562306a36Sopenharmony_ci	 * fanout. Hence it has a minimum size of 2 pointers, not limits[1].
468662306a36Sopenharmony_ci	 */
468762306a36Sopenharmony_ci	unsigned long long	node_blocks = 2;
468862306a36Sopenharmony_ci	unsigned long long	blocks_left = leaf_blocks - 1;
468962306a36Sopenharmony_ci	unsigned int		height = 1;
469062306a36Sopenharmony_ci
469162306a36Sopenharmony_ci	if (leaf_blocks < 1)
469262306a36Sopenharmony_ci		return 0;
469362306a36Sopenharmony_ci
469462306a36Sopenharmony_ci	while (node_blocks < blocks_left) {
469562306a36Sopenharmony_ci		blocks_left -= node_blocks;
469662306a36Sopenharmony_ci		node_blocks *= limits[1];
469762306a36Sopenharmony_ci		height++;
469862306a36Sopenharmony_ci	}
469962306a36Sopenharmony_ci
470062306a36Sopenharmony_ci	return height;
470162306a36Sopenharmony_ci}
470262306a36Sopenharmony_ci
470362306a36Sopenharmony_ci/*
470462306a36Sopenharmony_ci * Query a regular btree for all records overlapping a given interval.
470562306a36Sopenharmony_ci * Start with a LE lookup of the key of low_rec and return all records
470662306a36Sopenharmony_ci * until we find a record with a key greater than the key of high_rec.
470762306a36Sopenharmony_ci */
470862306a36Sopenharmony_ciSTATIC int
470962306a36Sopenharmony_cixfs_btree_simple_query_range(
471062306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
471162306a36Sopenharmony_ci	const union xfs_btree_key	*low_key,
471262306a36Sopenharmony_ci	const union xfs_btree_key	*high_key,
471362306a36Sopenharmony_ci	xfs_btree_query_range_fn	fn,
471462306a36Sopenharmony_ci	void				*priv)
471562306a36Sopenharmony_ci{
471662306a36Sopenharmony_ci	union xfs_btree_rec		*recp;
471762306a36Sopenharmony_ci	union xfs_btree_key		rec_key;
471862306a36Sopenharmony_ci	int				stat;
471962306a36Sopenharmony_ci	bool				firstrec = true;
472062306a36Sopenharmony_ci	int				error;
472162306a36Sopenharmony_ci
472262306a36Sopenharmony_ci	ASSERT(cur->bc_ops->init_high_key_from_rec);
472362306a36Sopenharmony_ci	ASSERT(cur->bc_ops->diff_two_keys);
472462306a36Sopenharmony_ci
472562306a36Sopenharmony_ci	/*
472662306a36Sopenharmony_ci	 * Find the leftmost record.  The btree cursor must be set
472762306a36Sopenharmony_ci	 * to the low record used to generate low_key.
472862306a36Sopenharmony_ci	 */
472962306a36Sopenharmony_ci	stat = 0;
473062306a36Sopenharmony_ci	error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat);
473162306a36Sopenharmony_ci	if (error)
473262306a36Sopenharmony_ci		goto out;
473362306a36Sopenharmony_ci
473462306a36Sopenharmony_ci	/* Nothing?  See if there's anything to the right. */
473562306a36Sopenharmony_ci	if (!stat) {
473662306a36Sopenharmony_ci		error = xfs_btree_increment(cur, 0, &stat);
473762306a36Sopenharmony_ci		if (error)
473862306a36Sopenharmony_ci			goto out;
473962306a36Sopenharmony_ci	}
474062306a36Sopenharmony_ci
474162306a36Sopenharmony_ci	while (stat) {
474262306a36Sopenharmony_ci		/* Find the record. */
474362306a36Sopenharmony_ci		error = xfs_btree_get_rec(cur, &recp, &stat);
474462306a36Sopenharmony_ci		if (error || !stat)
474562306a36Sopenharmony_ci			break;
474662306a36Sopenharmony_ci
474762306a36Sopenharmony_ci		/* Skip if low_key > high_key(rec). */
474862306a36Sopenharmony_ci		if (firstrec) {
474962306a36Sopenharmony_ci			cur->bc_ops->init_high_key_from_rec(&rec_key, recp);
475062306a36Sopenharmony_ci			firstrec = false;
475162306a36Sopenharmony_ci			if (xfs_btree_keycmp_gt(cur, low_key, &rec_key))
475262306a36Sopenharmony_ci				goto advloop;
475362306a36Sopenharmony_ci		}
475462306a36Sopenharmony_ci
475562306a36Sopenharmony_ci		/* Stop if low_key(rec) > high_key. */
475662306a36Sopenharmony_ci		cur->bc_ops->init_key_from_rec(&rec_key, recp);
475762306a36Sopenharmony_ci		if (xfs_btree_keycmp_gt(cur, &rec_key, high_key))
475862306a36Sopenharmony_ci			break;
475962306a36Sopenharmony_ci
476062306a36Sopenharmony_ci		/* Callback */
476162306a36Sopenharmony_ci		error = fn(cur, recp, priv);
476262306a36Sopenharmony_ci		if (error)
476362306a36Sopenharmony_ci			break;
476462306a36Sopenharmony_ci
476562306a36Sopenharmony_ciadvloop:
476662306a36Sopenharmony_ci		/* Move on to the next record. */
476762306a36Sopenharmony_ci		error = xfs_btree_increment(cur, 0, &stat);
476862306a36Sopenharmony_ci		if (error)
476962306a36Sopenharmony_ci			break;
477062306a36Sopenharmony_ci	}
477162306a36Sopenharmony_ci
477262306a36Sopenharmony_ciout:
477362306a36Sopenharmony_ci	return error;
477462306a36Sopenharmony_ci}
477562306a36Sopenharmony_ci
477662306a36Sopenharmony_ci/*
477762306a36Sopenharmony_ci * Query an overlapped interval btree for all records overlapping a given
477862306a36Sopenharmony_ci * interval.  This function roughly follows the algorithm given in
477962306a36Sopenharmony_ci * "Interval Trees" of _Introduction to Algorithms_, which is section
478062306a36Sopenharmony_ci * 14.3 in the 2nd and 3rd editions.
478162306a36Sopenharmony_ci *
478262306a36Sopenharmony_ci * First, generate keys for the low and high records passed in.
478362306a36Sopenharmony_ci *
478462306a36Sopenharmony_ci * For any leaf node, generate the high and low keys for the record.
478562306a36Sopenharmony_ci * If the record keys overlap with the query low/high keys, pass the
478662306a36Sopenharmony_ci * record to the function iterator.
478762306a36Sopenharmony_ci *
478862306a36Sopenharmony_ci * For any internal node, compare the low and high keys of each
478962306a36Sopenharmony_ci * pointer against the query low/high keys.  If there's an overlap,
479062306a36Sopenharmony_ci * follow the pointer.
479162306a36Sopenharmony_ci *
479262306a36Sopenharmony_ci * As an optimization, we stop scanning a block when we find a low key
479362306a36Sopenharmony_ci * that is greater than the query's high key.
479462306a36Sopenharmony_ci */
479562306a36Sopenharmony_ciSTATIC int
479662306a36Sopenharmony_cixfs_btree_overlapped_query_range(
479762306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
479862306a36Sopenharmony_ci	const union xfs_btree_key	*low_key,
479962306a36Sopenharmony_ci	const union xfs_btree_key	*high_key,
480062306a36Sopenharmony_ci	xfs_btree_query_range_fn	fn,
480162306a36Sopenharmony_ci	void				*priv)
480262306a36Sopenharmony_ci{
480362306a36Sopenharmony_ci	union xfs_btree_ptr		ptr;
480462306a36Sopenharmony_ci	union xfs_btree_ptr		*pp;
480562306a36Sopenharmony_ci	union xfs_btree_key		rec_key;
480662306a36Sopenharmony_ci	union xfs_btree_key		rec_hkey;
480762306a36Sopenharmony_ci	union xfs_btree_key		*lkp;
480862306a36Sopenharmony_ci	union xfs_btree_key		*hkp;
480962306a36Sopenharmony_ci	union xfs_btree_rec		*recp;
481062306a36Sopenharmony_ci	struct xfs_btree_block		*block;
481162306a36Sopenharmony_ci	int				level;
481262306a36Sopenharmony_ci	struct xfs_buf			*bp;
481362306a36Sopenharmony_ci	int				i;
481462306a36Sopenharmony_ci	int				error;
481562306a36Sopenharmony_ci
481662306a36Sopenharmony_ci	/* Load the root of the btree. */
481762306a36Sopenharmony_ci	level = cur->bc_nlevels - 1;
481862306a36Sopenharmony_ci	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
481962306a36Sopenharmony_ci	error = xfs_btree_lookup_get_block(cur, level, &ptr, &block);
482062306a36Sopenharmony_ci	if (error)
482162306a36Sopenharmony_ci		return error;
482262306a36Sopenharmony_ci	xfs_btree_get_block(cur, level, &bp);
482362306a36Sopenharmony_ci	trace_xfs_btree_overlapped_query_range(cur, level, bp);
482462306a36Sopenharmony_ci#ifdef DEBUG
482562306a36Sopenharmony_ci	error = xfs_btree_check_block(cur, block, level, bp);
482662306a36Sopenharmony_ci	if (error)
482762306a36Sopenharmony_ci		goto out;
482862306a36Sopenharmony_ci#endif
482962306a36Sopenharmony_ci	cur->bc_levels[level].ptr = 1;
483062306a36Sopenharmony_ci
483162306a36Sopenharmony_ci	while (level < cur->bc_nlevels) {
483262306a36Sopenharmony_ci		block = xfs_btree_get_block(cur, level, &bp);
483362306a36Sopenharmony_ci
483462306a36Sopenharmony_ci		/* End of node, pop back towards the root. */
483562306a36Sopenharmony_ci		if (cur->bc_levels[level].ptr >
483662306a36Sopenharmony_ci					be16_to_cpu(block->bb_numrecs)) {
483762306a36Sopenharmony_cipop_up:
483862306a36Sopenharmony_ci			if (level < cur->bc_nlevels - 1)
483962306a36Sopenharmony_ci				cur->bc_levels[level + 1].ptr++;
484062306a36Sopenharmony_ci			level++;
484162306a36Sopenharmony_ci			continue;
484262306a36Sopenharmony_ci		}
484362306a36Sopenharmony_ci
484462306a36Sopenharmony_ci		if (level == 0) {
484562306a36Sopenharmony_ci			/* Handle a leaf node. */
484662306a36Sopenharmony_ci			recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr,
484762306a36Sopenharmony_ci					block);
484862306a36Sopenharmony_ci
484962306a36Sopenharmony_ci			cur->bc_ops->init_high_key_from_rec(&rec_hkey, recp);
485062306a36Sopenharmony_ci			cur->bc_ops->init_key_from_rec(&rec_key, recp);
485162306a36Sopenharmony_ci
485262306a36Sopenharmony_ci			/*
485362306a36Sopenharmony_ci			 * If (query's high key < record's low key), then there
485462306a36Sopenharmony_ci			 * are no more interesting records in this block.  Pop
485562306a36Sopenharmony_ci			 * up to the leaf level to find more record blocks.
485662306a36Sopenharmony_ci			 *
485762306a36Sopenharmony_ci			 * If (record's high key >= query's low key) and
485862306a36Sopenharmony_ci			 *    (query's high key >= record's low key), then
485962306a36Sopenharmony_ci			 * this record overlaps the query range; callback.
486062306a36Sopenharmony_ci			 */
486162306a36Sopenharmony_ci			if (xfs_btree_keycmp_lt(cur, high_key, &rec_key))
486262306a36Sopenharmony_ci				goto pop_up;
486362306a36Sopenharmony_ci			if (xfs_btree_keycmp_ge(cur, &rec_hkey, low_key)) {
486462306a36Sopenharmony_ci				error = fn(cur, recp, priv);
486562306a36Sopenharmony_ci				if (error)
486662306a36Sopenharmony_ci					break;
486762306a36Sopenharmony_ci			}
486862306a36Sopenharmony_ci			cur->bc_levels[level].ptr++;
486962306a36Sopenharmony_ci			continue;
487062306a36Sopenharmony_ci		}
487162306a36Sopenharmony_ci
487262306a36Sopenharmony_ci		/* Handle an internal node. */
487362306a36Sopenharmony_ci		lkp = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block);
487462306a36Sopenharmony_ci		hkp = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr,
487562306a36Sopenharmony_ci				block);
487662306a36Sopenharmony_ci		pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block);
487762306a36Sopenharmony_ci
487862306a36Sopenharmony_ci		/*
487962306a36Sopenharmony_ci		 * If (query's high key < pointer's low key), then there are no
488062306a36Sopenharmony_ci		 * more interesting keys in this block.  Pop up one leaf level
488162306a36Sopenharmony_ci		 * to continue looking for records.
488262306a36Sopenharmony_ci		 *
488362306a36Sopenharmony_ci		 * If (pointer's high key >= query's low key) and
488462306a36Sopenharmony_ci		 *    (query's high key >= pointer's low key), then
488562306a36Sopenharmony_ci		 * this record overlaps the query range; follow pointer.
488662306a36Sopenharmony_ci		 */
488762306a36Sopenharmony_ci		if (xfs_btree_keycmp_lt(cur, high_key, lkp))
488862306a36Sopenharmony_ci			goto pop_up;
488962306a36Sopenharmony_ci		if (xfs_btree_keycmp_ge(cur, hkp, low_key)) {
489062306a36Sopenharmony_ci			level--;
489162306a36Sopenharmony_ci			error = xfs_btree_lookup_get_block(cur, level, pp,
489262306a36Sopenharmony_ci					&block);
489362306a36Sopenharmony_ci			if (error)
489462306a36Sopenharmony_ci				goto out;
489562306a36Sopenharmony_ci			xfs_btree_get_block(cur, level, &bp);
489662306a36Sopenharmony_ci			trace_xfs_btree_overlapped_query_range(cur, level, bp);
489762306a36Sopenharmony_ci#ifdef DEBUG
489862306a36Sopenharmony_ci			error = xfs_btree_check_block(cur, block, level, bp);
489962306a36Sopenharmony_ci			if (error)
490062306a36Sopenharmony_ci				goto out;
490162306a36Sopenharmony_ci#endif
490262306a36Sopenharmony_ci			cur->bc_levels[level].ptr = 1;
490362306a36Sopenharmony_ci			continue;
490462306a36Sopenharmony_ci		}
490562306a36Sopenharmony_ci		cur->bc_levels[level].ptr++;
490662306a36Sopenharmony_ci	}
490762306a36Sopenharmony_ci
490862306a36Sopenharmony_ciout:
490962306a36Sopenharmony_ci	/*
491062306a36Sopenharmony_ci	 * If we don't end this function with the cursor pointing at a record
491162306a36Sopenharmony_ci	 * block, a subsequent non-error cursor deletion will not release
491262306a36Sopenharmony_ci	 * node-level buffers, causing a buffer leak.  This is quite possible
491362306a36Sopenharmony_ci	 * with a zero-results range query, so release the buffers if we
491462306a36Sopenharmony_ci	 * failed to return any results.
491562306a36Sopenharmony_ci	 */
491662306a36Sopenharmony_ci	if (cur->bc_levels[0].bp == NULL) {
491762306a36Sopenharmony_ci		for (i = 0; i < cur->bc_nlevels; i++) {
491862306a36Sopenharmony_ci			if (cur->bc_levels[i].bp) {
491962306a36Sopenharmony_ci				xfs_trans_brelse(cur->bc_tp,
492062306a36Sopenharmony_ci						cur->bc_levels[i].bp);
492162306a36Sopenharmony_ci				cur->bc_levels[i].bp = NULL;
492262306a36Sopenharmony_ci				cur->bc_levels[i].ptr = 0;
492362306a36Sopenharmony_ci				cur->bc_levels[i].ra = 0;
492462306a36Sopenharmony_ci			}
492562306a36Sopenharmony_ci		}
492662306a36Sopenharmony_ci	}
492762306a36Sopenharmony_ci
492862306a36Sopenharmony_ci	return error;
492962306a36Sopenharmony_ci}
493062306a36Sopenharmony_ci
493162306a36Sopenharmony_cistatic inline void
493262306a36Sopenharmony_cixfs_btree_key_from_irec(
493362306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
493462306a36Sopenharmony_ci	union xfs_btree_key		*key,
493562306a36Sopenharmony_ci	const union xfs_btree_irec	*irec)
493662306a36Sopenharmony_ci{
493762306a36Sopenharmony_ci	union xfs_btree_rec		rec;
493862306a36Sopenharmony_ci
493962306a36Sopenharmony_ci	cur->bc_rec = *irec;
494062306a36Sopenharmony_ci	cur->bc_ops->init_rec_from_cur(cur, &rec);
494162306a36Sopenharmony_ci	cur->bc_ops->init_key_from_rec(key, &rec);
494262306a36Sopenharmony_ci}
494362306a36Sopenharmony_ci
494462306a36Sopenharmony_ci/*
494562306a36Sopenharmony_ci * Query a btree for all records overlapping a given interval of keys.  The
494662306a36Sopenharmony_ci * supplied function will be called with each record found; return one of the
494762306a36Sopenharmony_ci * XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
494862306a36Sopenharmony_ci * code.  This function returns -ECANCELED, zero, or a negative error code.
494962306a36Sopenharmony_ci */
495062306a36Sopenharmony_ciint
495162306a36Sopenharmony_cixfs_btree_query_range(
495262306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
495362306a36Sopenharmony_ci	const union xfs_btree_irec	*low_rec,
495462306a36Sopenharmony_ci	const union xfs_btree_irec	*high_rec,
495562306a36Sopenharmony_ci	xfs_btree_query_range_fn	fn,
495662306a36Sopenharmony_ci	void				*priv)
495762306a36Sopenharmony_ci{
495862306a36Sopenharmony_ci	union xfs_btree_key		low_key;
495962306a36Sopenharmony_ci	union xfs_btree_key		high_key;
496062306a36Sopenharmony_ci
496162306a36Sopenharmony_ci	/* Find the keys of both ends of the interval. */
496262306a36Sopenharmony_ci	xfs_btree_key_from_irec(cur, &high_key, high_rec);
496362306a36Sopenharmony_ci	xfs_btree_key_from_irec(cur, &low_key, low_rec);
496462306a36Sopenharmony_ci
496562306a36Sopenharmony_ci	/* Enforce low key <= high key. */
496662306a36Sopenharmony_ci	if (!xfs_btree_keycmp_le(cur, &low_key, &high_key))
496762306a36Sopenharmony_ci		return -EINVAL;
496862306a36Sopenharmony_ci
496962306a36Sopenharmony_ci	if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
497062306a36Sopenharmony_ci		return xfs_btree_simple_query_range(cur, &low_key,
497162306a36Sopenharmony_ci				&high_key, fn, priv);
497262306a36Sopenharmony_ci	return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
497362306a36Sopenharmony_ci			fn, priv);
497462306a36Sopenharmony_ci}
497562306a36Sopenharmony_ci
497662306a36Sopenharmony_ci/* Query a btree for all records. */
497762306a36Sopenharmony_ciint
497862306a36Sopenharmony_cixfs_btree_query_all(
497962306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
498062306a36Sopenharmony_ci	xfs_btree_query_range_fn	fn,
498162306a36Sopenharmony_ci	void				*priv)
498262306a36Sopenharmony_ci{
498362306a36Sopenharmony_ci	union xfs_btree_key		low_key;
498462306a36Sopenharmony_ci	union xfs_btree_key		high_key;
498562306a36Sopenharmony_ci
498662306a36Sopenharmony_ci	memset(&cur->bc_rec, 0, sizeof(cur->bc_rec));
498762306a36Sopenharmony_ci	memset(&low_key, 0, sizeof(low_key));
498862306a36Sopenharmony_ci	memset(&high_key, 0xFF, sizeof(high_key));
498962306a36Sopenharmony_ci
499062306a36Sopenharmony_ci	return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv);
499162306a36Sopenharmony_ci}
499262306a36Sopenharmony_ci
499362306a36Sopenharmony_cistatic int
499462306a36Sopenharmony_cixfs_btree_count_blocks_helper(
499562306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
499662306a36Sopenharmony_ci	int			level,
499762306a36Sopenharmony_ci	void			*data)
499862306a36Sopenharmony_ci{
499962306a36Sopenharmony_ci	xfs_extlen_t		*blocks = data;
500062306a36Sopenharmony_ci	(*blocks)++;
500162306a36Sopenharmony_ci
500262306a36Sopenharmony_ci	return 0;
500362306a36Sopenharmony_ci}
500462306a36Sopenharmony_ci
500562306a36Sopenharmony_ci/* Count the blocks in a btree and return the result in *blocks. */
500662306a36Sopenharmony_ciint
500762306a36Sopenharmony_cixfs_btree_count_blocks(
500862306a36Sopenharmony_ci	struct xfs_btree_cur	*cur,
500962306a36Sopenharmony_ci	xfs_extlen_t		*blocks)
501062306a36Sopenharmony_ci{
501162306a36Sopenharmony_ci	*blocks = 0;
501262306a36Sopenharmony_ci	return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
501362306a36Sopenharmony_ci			XFS_BTREE_VISIT_ALL, blocks);
501462306a36Sopenharmony_ci}
501562306a36Sopenharmony_ci
501662306a36Sopenharmony_ci/* Compare two btree pointers. */
501762306a36Sopenharmony_ciint64_t
501862306a36Sopenharmony_cixfs_btree_diff_two_ptrs(
501962306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
502062306a36Sopenharmony_ci	const union xfs_btree_ptr	*a,
502162306a36Sopenharmony_ci	const union xfs_btree_ptr	*b)
502262306a36Sopenharmony_ci{
502362306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
502462306a36Sopenharmony_ci		return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l);
502562306a36Sopenharmony_ci	return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s);
502662306a36Sopenharmony_ci}
502762306a36Sopenharmony_ci
502862306a36Sopenharmony_cistruct xfs_btree_has_records {
502962306a36Sopenharmony_ci	/* Keys for the start and end of the range we want to know about. */
503062306a36Sopenharmony_ci	union xfs_btree_key		start_key;
503162306a36Sopenharmony_ci	union xfs_btree_key		end_key;
503262306a36Sopenharmony_ci
503362306a36Sopenharmony_ci	/* Mask for key comparisons, if desired. */
503462306a36Sopenharmony_ci	const union xfs_btree_key	*key_mask;
503562306a36Sopenharmony_ci
503662306a36Sopenharmony_ci	/* Highest record key we've seen so far. */
503762306a36Sopenharmony_ci	union xfs_btree_key		high_key;
503862306a36Sopenharmony_ci
503962306a36Sopenharmony_ci	enum xbtree_recpacking		outcome;
504062306a36Sopenharmony_ci};
504162306a36Sopenharmony_ci
504262306a36Sopenharmony_ciSTATIC int
504362306a36Sopenharmony_cixfs_btree_has_records_helper(
504462306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
504562306a36Sopenharmony_ci	const union xfs_btree_rec	*rec,
504662306a36Sopenharmony_ci	void				*priv)
504762306a36Sopenharmony_ci{
504862306a36Sopenharmony_ci	union xfs_btree_key		rec_key;
504962306a36Sopenharmony_ci	union xfs_btree_key		rec_high_key;
505062306a36Sopenharmony_ci	struct xfs_btree_has_records	*info = priv;
505162306a36Sopenharmony_ci	enum xbtree_key_contig		key_contig;
505262306a36Sopenharmony_ci
505362306a36Sopenharmony_ci	cur->bc_ops->init_key_from_rec(&rec_key, rec);
505462306a36Sopenharmony_ci
505562306a36Sopenharmony_ci	if (info->outcome == XBTREE_RECPACKING_EMPTY) {
505662306a36Sopenharmony_ci		info->outcome = XBTREE_RECPACKING_SPARSE;
505762306a36Sopenharmony_ci
505862306a36Sopenharmony_ci		/*
505962306a36Sopenharmony_ci		 * If the first record we find does not overlap the start key,
506062306a36Sopenharmony_ci		 * then there is a hole at the start of the search range.
506162306a36Sopenharmony_ci		 * Classify this as sparse and stop immediately.
506262306a36Sopenharmony_ci		 */
506362306a36Sopenharmony_ci		if (xfs_btree_masked_keycmp_lt(cur, &info->start_key, &rec_key,
506462306a36Sopenharmony_ci					info->key_mask))
506562306a36Sopenharmony_ci			return -ECANCELED;
506662306a36Sopenharmony_ci	} else {
506762306a36Sopenharmony_ci		/*
506862306a36Sopenharmony_ci		 * If a subsequent record does not overlap with the any record
506962306a36Sopenharmony_ci		 * we've seen so far, there is a hole in the middle of the
507062306a36Sopenharmony_ci		 * search range.  Classify this as sparse and stop.
507162306a36Sopenharmony_ci		 * If the keys overlap and this btree does not allow overlap,
507262306a36Sopenharmony_ci		 * signal corruption.
507362306a36Sopenharmony_ci		 */
507462306a36Sopenharmony_ci		key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key,
507562306a36Sopenharmony_ci					&rec_key, info->key_mask);
507662306a36Sopenharmony_ci		if (key_contig == XBTREE_KEY_OVERLAP &&
507762306a36Sopenharmony_ci				!(cur->bc_flags & XFS_BTREE_OVERLAPPING))
507862306a36Sopenharmony_ci			return -EFSCORRUPTED;
507962306a36Sopenharmony_ci		if (key_contig == XBTREE_KEY_GAP)
508062306a36Sopenharmony_ci			return -ECANCELED;
508162306a36Sopenharmony_ci	}
508262306a36Sopenharmony_ci
508362306a36Sopenharmony_ci	/*
508462306a36Sopenharmony_ci	 * If high_key(rec) is larger than any other high key we've seen,
508562306a36Sopenharmony_ci	 * remember it for later.
508662306a36Sopenharmony_ci	 */
508762306a36Sopenharmony_ci	cur->bc_ops->init_high_key_from_rec(&rec_high_key, rec);
508862306a36Sopenharmony_ci	if (xfs_btree_masked_keycmp_gt(cur, &rec_high_key, &info->high_key,
508962306a36Sopenharmony_ci				info->key_mask))
509062306a36Sopenharmony_ci		info->high_key = rec_high_key; /* struct copy */
509162306a36Sopenharmony_ci
509262306a36Sopenharmony_ci	return 0;
509362306a36Sopenharmony_ci}
509462306a36Sopenharmony_ci
509562306a36Sopenharmony_ci/*
509662306a36Sopenharmony_ci * Scan part of the keyspace of a btree and tell us if that keyspace does not
509762306a36Sopenharmony_ci * map to any records; is fully mapped to records; or is partially mapped to
509862306a36Sopenharmony_ci * records.  This is the btree record equivalent to determining if a file is
509962306a36Sopenharmony_ci * sparse.
510062306a36Sopenharmony_ci *
510162306a36Sopenharmony_ci * For most btree types, the record scan should use all available btree key
510262306a36Sopenharmony_ci * fields to compare the keys encountered.  These callers should pass NULL for
510362306a36Sopenharmony_ci * @mask.  However, some callers (e.g.  scanning physical space in the rmapbt)
510462306a36Sopenharmony_ci * want to ignore some part of the btree record keyspace when performing the
510562306a36Sopenharmony_ci * comparison.  These callers should pass in a union xfs_btree_key object with
510662306a36Sopenharmony_ci * the fields that *should* be a part of the comparison set to any nonzero
510762306a36Sopenharmony_ci * value, and the rest zeroed.
510862306a36Sopenharmony_ci */
510962306a36Sopenharmony_ciint
511062306a36Sopenharmony_cixfs_btree_has_records(
511162306a36Sopenharmony_ci	struct xfs_btree_cur		*cur,
511262306a36Sopenharmony_ci	const union xfs_btree_irec	*low,
511362306a36Sopenharmony_ci	const union xfs_btree_irec	*high,
511462306a36Sopenharmony_ci	const union xfs_btree_key	*mask,
511562306a36Sopenharmony_ci	enum xbtree_recpacking		*outcome)
511662306a36Sopenharmony_ci{
511762306a36Sopenharmony_ci	struct xfs_btree_has_records	info = {
511862306a36Sopenharmony_ci		.outcome		= XBTREE_RECPACKING_EMPTY,
511962306a36Sopenharmony_ci		.key_mask		= mask,
512062306a36Sopenharmony_ci	};
512162306a36Sopenharmony_ci	int				error;
512262306a36Sopenharmony_ci
512362306a36Sopenharmony_ci	/* Not all btrees support this operation. */
512462306a36Sopenharmony_ci	if (!cur->bc_ops->keys_contiguous) {
512562306a36Sopenharmony_ci		ASSERT(0);
512662306a36Sopenharmony_ci		return -EOPNOTSUPP;
512762306a36Sopenharmony_ci	}
512862306a36Sopenharmony_ci
512962306a36Sopenharmony_ci	xfs_btree_key_from_irec(cur, &info.start_key, low);
513062306a36Sopenharmony_ci	xfs_btree_key_from_irec(cur, &info.end_key, high);
513162306a36Sopenharmony_ci
513262306a36Sopenharmony_ci	error = xfs_btree_query_range(cur, low, high,
513362306a36Sopenharmony_ci			xfs_btree_has_records_helper, &info);
513462306a36Sopenharmony_ci	if (error == -ECANCELED)
513562306a36Sopenharmony_ci		goto out;
513662306a36Sopenharmony_ci	if (error)
513762306a36Sopenharmony_ci		return error;
513862306a36Sopenharmony_ci
513962306a36Sopenharmony_ci	if (info.outcome == XBTREE_RECPACKING_EMPTY)
514062306a36Sopenharmony_ci		goto out;
514162306a36Sopenharmony_ci
514262306a36Sopenharmony_ci	/*
514362306a36Sopenharmony_ci	 * If the largest high_key(rec) we saw during the walk is greater than
514462306a36Sopenharmony_ci	 * the end of the search range, classify this as full.  Otherwise,
514562306a36Sopenharmony_ci	 * there is a hole at the end of the search range.
514662306a36Sopenharmony_ci	 */
514762306a36Sopenharmony_ci	if (xfs_btree_masked_keycmp_ge(cur, &info.high_key, &info.end_key,
514862306a36Sopenharmony_ci				mask))
514962306a36Sopenharmony_ci		info.outcome = XBTREE_RECPACKING_FULL;
515062306a36Sopenharmony_ci
515162306a36Sopenharmony_ciout:
515262306a36Sopenharmony_ci	*outcome = info.outcome;
515362306a36Sopenharmony_ci	return 0;
515462306a36Sopenharmony_ci}
515562306a36Sopenharmony_ci
515662306a36Sopenharmony_ci/* Are there more records in this btree? */
515762306a36Sopenharmony_cibool
515862306a36Sopenharmony_cixfs_btree_has_more_records(
515962306a36Sopenharmony_ci	struct xfs_btree_cur	*cur)
516062306a36Sopenharmony_ci{
516162306a36Sopenharmony_ci	struct xfs_btree_block	*block;
516262306a36Sopenharmony_ci	struct xfs_buf		*bp;
516362306a36Sopenharmony_ci
516462306a36Sopenharmony_ci	block = xfs_btree_get_block(cur, 0, &bp);
516562306a36Sopenharmony_ci
516662306a36Sopenharmony_ci	/* There are still records in this block. */
516762306a36Sopenharmony_ci	if (cur->bc_levels[0].ptr < xfs_btree_get_numrecs(block))
516862306a36Sopenharmony_ci		return true;
516962306a36Sopenharmony_ci
517062306a36Sopenharmony_ci	/* There are more record blocks. */
517162306a36Sopenharmony_ci	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
517262306a36Sopenharmony_ci		return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK);
517362306a36Sopenharmony_ci	else
517462306a36Sopenharmony_ci		return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK);
517562306a36Sopenharmony_ci}
517662306a36Sopenharmony_ci
517762306a36Sopenharmony_ci/* Set up all the btree cursor caches. */
517862306a36Sopenharmony_ciint __init
517962306a36Sopenharmony_cixfs_btree_init_cur_caches(void)
518062306a36Sopenharmony_ci{
518162306a36Sopenharmony_ci	int		error;
518262306a36Sopenharmony_ci
518362306a36Sopenharmony_ci	error = xfs_allocbt_init_cur_cache();
518462306a36Sopenharmony_ci	if (error)
518562306a36Sopenharmony_ci		return error;
518662306a36Sopenharmony_ci	error = xfs_inobt_init_cur_cache();
518762306a36Sopenharmony_ci	if (error)
518862306a36Sopenharmony_ci		goto err;
518962306a36Sopenharmony_ci	error = xfs_bmbt_init_cur_cache();
519062306a36Sopenharmony_ci	if (error)
519162306a36Sopenharmony_ci		goto err;
519262306a36Sopenharmony_ci	error = xfs_rmapbt_init_cur_cache();
519362306a36Sopenharmony_ci	if (error)
519462306a36Sopenharmony_ci		goto err;
519562306a36Sopenharmony_ci	error = xfs_refcountbt_init_cur_cache();
519662306a36Sopenharmony_ci	if (error)
519762306a36Sopenharmony_ci		goto err;
519862306a36Sopenharmony_ci
519962306a36Sopenharmony_ci	return 0;
520062306a36Sopenharmony_cierr:
520162306a36Sopenharmony_ci	xfs_btree_destroy_cur_caches();
520262306a36Sopenharmony_ci	return error;
520362306a36Sopenharmony_ci}
520462306a36Sopenharmony_ci
520562306a36Sopenharmony_ci/* Destroy all the btree cursor caches, if they've been allocated. */
520662306a36Sopenharmony_civoid
520762306a36Sopenharmony_cixfs_btree_destroy_cur_caches(void)
520862306a36Sopenharmony_ci{
520962306a36Sopenharmony_ci	xfs_allocbt_destroy_cur_cache();
521062306a36Sopenharmony_ci	xfs_inobt_destroy_cur_cache();
521162306a36Sopenharmony_ci	xfs_bmbt_destroy_cur_cache();
521262306a36Sopenharmony_ci	xfs_rmapbt_destroy_cur_cache();
521362306a36Sopenharmony_ci	xfs_refcountbt_destroy_cur_cache();
521462306a36Sopenharmony_ci}
5215