162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (C) 2007,2008 Oracle.  All rights reserved.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/sched.h>
762306a36Sopenharmony_ci#include <linux/slab.h>
862306a36Sopenharmony_ci#include <linux/rbtree.h>
962306a36Sopenharmony_ci#include <linux/mm.h>
1062306a36Sopenharmony_ci#include <linux/error-injection.h>
1162306a36Sopenharmony_ci#include "messages.h"
1262306a36Sopenharmony_ci#include "ctree.h"
1362306a36Sopenharmony_ci#include "disk-io.h"
1462306a36Sopenharmony_ci#include "transaction.h"
1562306a36Sopenharmony_ci#include "print-tree.h"
1662306a36Sopenharmony_ci#include "locking.h"
1762306a36Sopenharmony_ci#include "volumes.h"
1862306a36Sopenharmony_ci#include "qgroup.h"
1962306a36Sopenharmony_ci#include "tree-mod-log.h"
2062306a36Sopenharmony_ci#include "tree-checker.h"
2162306a36Sopenharmony_ci#include "fs.h"
2262306a36Sopenharmony_ci#include "accessors.h"
2362306a36Sopenharmony_ci#include "extent-tree.h"
2462306a36Sopenharmony_ci#include "relocation.h"
2562306a36Sopenharmony_ci#include "file-item.h"
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistatic struct kmem_cache *btrfs_path_cachep;
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_cistatic int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
3062306a36Sopenharmony_ci		      *root, struct btrfs_path *path, int level);
3162306a36Sopenharmony_cistatic int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3262306a36Sopenharmony_ci		      const struct btrfs_key *ins_key, struct btrfs_path *path,
3362306a36Sopenharmony_ci		      int data_size, int extend);
3462306a36Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans,
3562306a36Sopenharmony_ci			  struct extent_buffer *dst,
3662306a36Sopenharmony_ci			  struct extent_buffer *src, int empty);
3762306a36Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans,
3862306a36Sopenharmony_ci			      struct extent_buffer *dst_buf,
3962306a36Sopenharmony_ci			      struct extent_buffer *src_buf);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cistatic const struct btrfs_csums {
4262306a36Sopenharmony_ci	u16		size;
4362306a36Sopenharmony_ci	const char	name[10];
4462306a36Sopenharmony_ci	const char	driver[12];
4562306a36Sopenharmony_ci} btrfs_csums[] = {
4662306a36Sopenharmony_ci	[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
4762306a36Sopenharmony_ci	[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
4862306a36Sopenharmony_ci	[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
4962306a36Sopenharmony_ci	[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
5062306a36Sopenharmony_ci				     .driver = "blake2b-256" },
5162306a36Sopenharmony_ci};
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci/*
5462306a36Sopenharmony_ci * The leaf data grows from end-to-front in the node.  this returns the address
5562306a36Sopenharmony_ci * of the start of the last item, which is the stop of the leaf data stack.
5662306a36Sopenharmony_ci */
5762306a36Sopenharmony_cistatic unsigned int leaf_data_end(const struct extent_buffer *leaf)
5862306a36Sopenharmony_ci{
5962306a36Sopenharmony_ci	u32 nr = btrfs_header_nritems(leaf);
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	if (nr == 0)
6262306a36Sopenharmony_ci		return BTRFS_LEAF_DATA_SIZE(leaf->fs_info);
6362306a36Sopenharmony_ci	return btrfs_item_offset(leaf, nr - 1);
6462306a36Sopenharmony_ci}
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci/*
6762306a36Sopenharmony_ci * Move data in a @leaf (using memmove, safe for overlapping ranges).
6862306a36Sopenharmony_ci *
6962306a36Sopenharmony_ci * @leaf:	leaf that we're doing a memmove on
7062306a36Sopenharmony_ci * @dst_offset:	item data offset we're moving to
7162306a36Sopenharmony_ci * @src_offset:	item data offset were' moving from
7262306a36Sopenharmony_ci * @len:	length of the data we're moving
7362306a36Sopenharmony_ci *
7462306a36Sopenharmony_ci * Wrapper around memmove_extent_buffer() that takes into account the header on
7562306a36Sopenharmony_ci * the leaf.  The btrfs_item offset's start directly after the header, so we
7662306a36Sopenharmony_ci * have to adjust any offsets to account for the header in the leaf.  This
7762306a36Sopenharmony_ci * handles that math to simplify the callers.
7862306a36Sopenharmony_ci */
7962306a36Sopenharmony_cistatic inline void memmove_leaf_data(const struct extent_buffer *leaf,
8062306a36Sopenharmony_ci				     unsigned long dst_offset,
8162306a36Sopenharmony_ci				     unsigned long src_offset,
8262306a36Sopenharmony_ci				     unsigned long len)
8362306a36Sopenharmony_ci{
8462306a36Sopenharmony_ci	memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, 0) + dst_offset,
8562306a36Sopenharmony_ci			      btrfs_item_nr_offset(leaf, 0) + src_offset, len);
8662306a36Sopenharmony_ci}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci/*
8962306a36Sopenharmony_ci * Copy item data from @src into @dst at the given @offset.
9062306a36Sopenharmony_ci *
9162306a36Sopenharmony_ci * @dst:	destination leaf that we're copying into
9262306a36Sopenharmony_ci * @src:	source leaf that we're copying from
9362306a36Sopenharmony_ci * @dst_offset:	item data offset we're copying to
9462306a36Sopenharmony_ci * @src_offset:	item data offset were' copying from
9562306a36Sopenharmony_ci * @len:	length of the data we're copying
9662306a36Sopenharmony_ci *
9762306a36Sopenharmony_ci * Wrapper around copy_extent_buffer() that takes into account the header on
9862306a36Sopenharmony_ci * the leaf.  The btrfs_item offset's start directly after the header, so we
9962306a36Sopenharmony_ci * have to adjust any offsets to account for the header in the leaf.  This
10062306a36Sopenharmony_ci * handles that math to simplify the callers.
10162306a36Sopenharmony_ci */
10262306a36Sopenharmony_cistatic inline void copy_leaf_data(const struct extent_buffer *dst,
10362306a36Sopenharmony_ci				  const struct extent_buffer *src,
10462306a36Sopenharmony_ci				  unsigned long dst_offset,
10562306a36Sopenharmony_ci				  unsigned long src_offset, unsigned long len)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, 0) + dst_offset,
10862306a36Sopenharmony_ci			   btrfs_item_nr_offset(src, 0) + src_offset, len);
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci/*
11262306a36Sopenharmony_ci * Move items in a @leaf (using memmove).
11362306a36Sopenharmony_ci *
11462306a36Sopenharmony_ci * @dst:	destination leaf for the items
11562306a36Sopenharmony_ci * @dst_item:	the item nr we're copying into
11662306a36Sopenharmony_ci * @src_item:	the item nr we're copying from
11762306a36Sopenharmony_ci * @nr_items:	the number of items to copy
11862306a36Sopenharmony_ci *
11962306a36Sopenharmony_ci * Wrapper around memmove_extent_buffer() that does the math to get the
12062306a36Sopenharmony_ci * appropriate offsets into the leaf from the item numbers.
12162306a36Sopenharmony_ci */
12262306a36Sopenharmony_cistatic inline void memmove_leaf_items(const struct extent_buffer *leaf,
12362306a36Sopenharmony_ci				      int dst_item, int src_item, int nr_items)
12462306a36Sopenharmony_ci{
12562306a36Sopenharmony_ci	memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, dst_item),
12662306a36Sopenharmony_ci			      btrfs_item_nr_offset(leaf, src_item),
12762306a36Sopenharmony_ci			      nr_items * sizeof(struct btrfs_item));
12862306a36Sopenharmony_ci}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci/*
13162306a36Sopenharmony_ci * Copy items from @src into @dst at the given @offset.
13262306a36Sopenharmony_ci *
13362306a36Sopenharmony_ci * @dst:	destination leaf for the items
13462306a36Sopenharmony_ci * @src:	source leaf for the items
13562306a36Sopenharmony_ci * @dst_item:	the item nr we're copying into
13662306a36Sopenharmony_ci * @src_item:	the item nr we're copying from
13762306a36Sopenharmony_ci * @nr_items:	the number of items to copy
13862306a36Sopenharmony_ci *
13962306a36Sopenharmony_ci * Wrapper around copy_extent_buffer() that does the math to get the
14062306a36Sopenharmony_ci * appropriate offsets into the leaf from the item numbers.
14162306a36Sopenharmony_ci */
14262306a36Sopenharmony_cistatic inline void copy_leaf_items(const struct extent_buffer *dst,
14362306a36Sopenharmony_ci				   const struct extent_buffer *src,
14462306a36Sopenharmony_ci				   int dst_item, int src_item, int nr_items)
14562306a36Sopenharmony_ci{
14662306a36Sopenharmony_ci	copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, dst_item),
14762306a36Sopenharmony_ci			      btrfs_item_nr_offset(src, src_item),
14862306a36Sopenharmony_ci			      nr_items * sizeof(struct btrfs_item));
14962306a36Sopenharmony_ci}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci/* This exists for btrfs-progs usages. */
15262306a36Sopenharmony_ciu16 btrfs_csum_type_size(u16 type)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	return btrfs_csums[type].size;
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ciint btrfs_super_csum_size(const struct btrfs_super_block *s)
15862306a36Sopenharmony_ci{
15962306a36Sopenharmony_ci	u16 t = btrfs_super_csum_type(s);
16062306a36Sopenharmony_ci	/*
16162306a36Sopenharmony_ci	 * csum type is validated at mount time
16262306a36Sopenharmony_ci	 */
16362306a36Sopenharmony_ci	return btrfs_csum_type_size(t);
16462306a36Sopenharmony_ci}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ciconst char *btrfs_super_csum_name(u16 csum_type)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	/* csum type is validated at mount time */
16962306a36Sopenharmony_ci	return btrfs_csums[csum_type].name;
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci/*
17362306a36Sopenharmony_ci * Return driver name if defined, otherwise the name that's also a valid driver
17462306a36Sopenharmony_ci * name
17562306a36Sopenharmony_ci */
17662306a36Sopenharmony_ciconst char *btrfs_super_csum_driver(u16 csum_type)
17762306a36Sopenharmony_ci{
17862306a36Sopenharmony_ci	/* csum type is validated at mount time */
17962306a36Sopenharmony_ci	return btrfs_csums[csum_type].driver[0] ?
18062306a36Sopenharmony_ci		btrfs_csums[csum_type].driver :
18162306a36Sopenharmony_ci		btrfs_csums[csum_type].name;
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cisize_t __attribute_const__ btrfs_get_num_csums(void)
18562306a36Sopenharmony_ci{
18662306a36Sopenharmony_ci	return ARRAY_SIZE(btrfs_csums);
18762306a36Sopenharmony_ci}
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_cistruct btrfs_path *btrfs_alloc_path(void)
19062306a36Sopenharmony_ci{
19162306a36Sopenharmony_ci	might_sleep();
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
19462306a36Sopenharmony_ci}
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci/* this also releases the path */
19762306a36Sopenharmony_civoid btrfs_free_path(struct btrfs_path *p)
19862306a36Sopenharmony_ci{
19962306a36Sopenharmony_ci	if (!p)
20062306a36Sopenharmony_ci		return;
20162306a36Sopenharmony_ci	btrfs_release_path(p);
20262306a36Sopenharmony_ci	kmem_cache_free(btrfs_path_cachep, p);
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci/*
20662306a36Sopenharmony_ci * path release drops references on the extent buffers in the path
20762306a36Sopenharmony_ci * and it drops any locks held by this path
20862306a36Sopenharmony_ci *
20962306a36Sopenharmony_ci * It is safe to call this on paths that no locks or extent buffers held.
21062306a36Sopenharmony_ci */
21162306a36Sopenharmony_cinoinline void btrfs_release_path(struct btrfs_path *p)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	int i;
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_ci	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
21662306a36Sopenharmony_ci		p->slots[i] = 0;
21762306a36Sopenharmony_ci		if (!p->nodes[i])
21862306a36Sopenharmony_ci			continue;
21962306a36Sopenharmony_ci		if (p->locks[i]) {
22062306a36Sopenharmony_ci			btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
22162306a36Sopenharmony_ci			p->locks[i] = 0;
22262306a36Sopenharmony_ci		}
22362306a36Sopenharmony_ci		free_extent_buffer(p->nodes[i]);
22462306a36Sopenharmony_ci		p->nodes[i] = NULL;
22562306a36Sopenharmony_ci	}
22662306a36Sopenharmony_ci}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci/*
22962306a36Sopenharmony_ci * We want the transaction abort to print stack trace only for errors where the
23062306a36Sopenharmony_ci * cause could be a bug, eg. due to ENOSPC, and not for common errors that are
23162306a36Sopenharmony_ci * caused by external factors.
23262306a36Sopenharmony_ci */
23362306a36Sopenharmony_cibool __cold abort_should_print_stack(int errno)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	switch (errno) {
23662306a36Sopenharmony_ci	case -EIO:
23762306a36Sopenharmony_ci	case -EROFS:
23862306a36Sopenharmony_ci	case -ENOMEM:
23962306a36Sopenharmony_ci		return false;
24062306a36Sopenharmony_ci	}
24162306a36Sopenharmony_ci	return true;
24262306a36Sopenharmony_ci}
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci/*
24562306a36Sopenharmony_ci * safely gets a reference on the root node of a tree.  A lock
24662306a36Sopenharmony_ci * is not taken, so a concurrent writer may put a different node
24762306a36Sopenharmony_ci * at the root of the tree.  See btrfs_lock_root_node for the
24862306a36Sopenharmony_ci * looping required.
24962306a36Sopenharmony_ci *
25062306a36Sopenharmony_ci * The extent buffer returned by this has a reference taken, so
25162306a36Sopenharmony_ci * it won't disappear.  It may stop being the root of the tree
25262306a36Sopenharmony_ci * at any time because there are no locks held.
25362306a36Sopenharmony_ci */
25462306a36Sopenharmony_cistruct extent_buffer *btrfs_root_node(struct btrfs_root *root)
25562306a36Sopenharmony_ci{
25662306a36Sopenharmony_ci	struct extent_buffer *eb;
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	while (1) {
25962306a36Sopenharmony_ci		rcu_read_lock();
26062306a36Sopenharmony_ci		eb = rcu_dereference(root->node);
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci		/*
26362306a36Sopenharmony_ci		 * RCU really hurts here, we could free up the root node because
26462306a36Sopenharmony_ci		 * it was COWed but we may not get the new root node yet so do
26562306a36Sopenharmony_ci		 * the inc_not_zero dance and if it doesn't work then
26662306a36Sopenharmony_ci		 * synchronize_rcu and try again.
26762306a36Sopenharmony_ci		 */
26862306a36Sopenharmony_ci		if (atomic_inc_not_zero(&eb->refs)) {
26962306a36Sopenharmony_ci			rcu_read_unlock();
27062306a36Sopenharmony_ci			break;
27162306a36Sopenharmony_ci		}
27262306a36Sopenharmony_ci		rcu_read_unlock();
27362306a36Sopenharmony_ci		synchronize_rcu();
27462306a36Sopenharmony_ci	}
27562306a36Sopenharmony_ci	return eb;
27662306a36Sopenharmony_ci}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci/*
27962306a36Sopenharmony_ci * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
28062306a36Sopenharmony_ci * just get put onto a simple dirty list.  Transaction walks this list to make
28162306a36Sopenharmony_ci * sure they get properly updated on disk.
28262306a36Sopenharmony_ci */
28362306a36Sopenharmony_cistatic void add_root_to_dirty_list(struct btrfs_root *root)
28462306a36Sopenharmony_ci{
28562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
28862306a36Sopenharmony_ci	    !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
28962306a36Sopenharmony_ci		return;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	spin_lock(&fs_info->trans_lock);
29262306a36Sopenharmony_ci	if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
29362306a36Sopenharmony_ci		/* Want the extent tree to be the last on the list */
29462306a36Sopenharmony_ci		if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
29562306a36Sopenharmony_ci			list_move_tail(&root->dirty_list,
29662306a36Sopenharmony_ci				       &fs_info->dirty_cowonly_roots);
29762306a36Sopenharmony_ci		else
29862306a36Sopenharmony_ci			list_move(&root->dirty_list,
29962306a36Sopenharmony_ci				  &fs_info->dirty_cowonly_roots);
30062306a36Sopenharmony_ci	}
30162306a36Sopenharmony_ci	spin_unlock(&fs_info->trans_lock);
30262306a36Sopenharmony_ci}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci/*
30562306a36Sopenharmony_ci * used by snapshot creation to make a copy of a root for a tree with
30662306a36Sopenharmony_ci * a given objectid.  The buffer with the new root node is returned in
30762306a36Sopenharmony_ci * cow_ret, and this func returns zero on success or a negative error code.
30862306a36Sopenharmony_ci */
30962306a36Sopenharmony_ciint btrfs_copy_root(struct btrfs_trans_handle *trans,
31062306a36Sopenharmony_ci		      struct btrfs_root *root,
31162306a36Sopenharmony_ci		      struct extent_buffer *buf,
31262306a36Sopenharmony_ci		      struct extent_buffer **cow_ret, u64 new_root_objectid)
31362306a36Sopenharmony_ci{
31462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
31562306a36Sopenharmony_ci	struct extent_buffer *cow;
31662306a36Sopenharmony_ci	int ret = 0;
31762306a36Sopenharmony_ci	int level;
31862306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
32162306a36Sopenharmony_ci		trans->transid != fs_info->running_transaction->transid);
32262306a36Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
32362306a36Sopenharmony_ci		trans->transid != root->last_trans);
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	level = btrfs_header_level(buf);
32662306a36Sopenharmony_ci	if (level == 0)
32762306a36Sopenharmony_ci		btrfs_item_key(buf, &disk_key, 0);
32862306a36Sopenharmony_ci	else
32962306a36Sopenharmony_ci		btrfs_node_key(buf, &disk_key, 0);
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_ci	cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
33262306a36Sopenharmony_ci				     &disk_key, level, buf->start, 0,
33362306a36Sopenharmony_ci				     BTRFS_NESTING_NEW_ROOT);
33462306a36Sopenharmony_ci	if (IS_ERR(cow))
33562306a36Sopenharmony_ci		return PTR_ERR(cow);
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	copy_extent_buffer_full(cow, buf);
33862306a36Sopenharmony_ci	btrfs_set_header_bytenr(cow, cow->start);
33962306a36Sopenharmony_ci	btrfs_set_header_generation(cow, trans->transid);
34062306a36Sopenharmony_ci	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
34162306a36Sopenharmony_ci	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
34262306a36Sopenharmony_ci				     BTRFS_HEADER_FLAG_RELOC);
34362306a36Sopenharmony_ci	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
34462306a36Sopenharmony_ci		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
34562306a36Sopenharmony_ci	else
34662306a36Sopenharmony_ci		btrfs_set_header_owner(cow, new_root_objectid);
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(buf) > trans->transid);
35162306a36Sopenharmony_ci	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
35262306a36Sopenharmony_ci		ret = btrfs_inc_ref(trans, root, cow, 1);
35362306a36Sopenharmony_ci	else
35462306a36Sopenharmony_ci		ret = btrfs_inc_ref(trans, root, cow, 0);
35562306a36Sopenharmony_ci	if (ret) {
35662306a36Sopenharmony_ci		btrfs_tree_unlock(cow);
35762306a36Sopenharmony_ci		free_extent_buffer(cow);
35862306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
35962306a36Sopenharmony_ci		return ret;
36062306a36Sopenharmony_ci	}
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, cow);
36362306a36Sopenharmony_ci	*cow_ret = cow;
36462306a36Sopenharmony_ci	return 0;
36562306a36Sopenharmony_ci}
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci/*
36862306a36Sopenharmony_ci * check if the tree block can be shared by multiple trees
36962306a36Sopenharmony_ci */
37062306a36Sopenharmony_ciint btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
37162306a36Sopenharmony_ci			      struct btrfs_root *root,
37262306a36Sopenharmony_ci			      struct extent_buffer *buf)
37362306a36Sopenharmony_ci{
37462306a36Sopenharmony_ci	/*
37562306a36Sopenharmony_ci	 * Tree blocks not in shareable trees and tree roots are never shared.
37662306a36Sopenharmony_ci	 * If a block was allocated after the last snapshot and the block was
37762306a36Sopenharmony_ci	 * not allocated by tree relocation, we know the block is not shared.
37862306a36Sopenharmony_ci	 */
37962306a36Sopenharmony_ci	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
38062306a36Sopenharmony_ci	    buf != root->node &&
38162306a36Sopenharmony_ci	    (btrfs_header_generation(buf) <=
38262306a36Sopenharmony_ci	     btrfs_root_last_snapshot(&root->root_item) ||
38362306a36Sopenharmony_ci	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
38462306a36Sopenharmony_ci		if (buf != root->commit_root)
38562306a36Sopenharmony_ci			return 1;
38662306a36Sopenharmony_ci		/*
38762306a36Sopenharmony_ci		 * An extent buffer that used to be the commit root may still be
38862306a36Sopenharmony_ci		 * shared because the tree height may have increased and it
38962306a36Sopenharmony_ci		 * became a child of a higher level root. This can happen when
39062306a36Sopenharmony_ci		 * snapshotting a subvolume created in the current transaction.
39162306a36Sopenharmony_ci		 */
39262306a36Sopenharmony_ci		if (btrfs_header_generation(buf) == trans->transid)
39362306a36Sopenharmony_ci			return 1;
39462306a36Sopenharmony_ci	}
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	return 0;
39762306a36Sopenharmony_ci}
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_cistatic noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
40062306a36Sopenharmony_ci				       struct btrfs_root *root,
40162306a36Sopenharmony_ci				       struct extent_buffer *buf,
40262306a36Sopenharmony_ci				       struct extent_buffer *cow,
40362306a36Sopenharmony_ci				       int *last_ref)
40462306a36Sopenharmony_ci{
40562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
40662306a36Sopenharmony_ci	u64 refs;
40762306a36Sopenharmony_ci	u64 owner;
40862306a36Sopenharmony_ci	u64 flags;
40962306a36Sopenharmony_ci	u64 new_flags = 0;
41062306a36Sopenharmony_ci	int ret;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	/*
41362306a36Sopenharmony_ci	 * Backrefs update rules:
41462306a36Sopenharmony_ci	 *
41562306a36Sopenharmony_ci	 * Always use full backrefs for extent pointers in tree block
41662306a36Sopenharmony_ci	 * allocated by tree relocation.
41762306a36Sopenharmony_ci	 *
41862306a36Sopenharmony_ci	 * If a shared tree block is no longer referenced by its owner
41962306a36Sopenharmony_ci	 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
42062306a36Sopenharmony_ci	 * use full backrefs for extent pointers in tree block.
42162306a36Sopenharmony_ci	 *
42262306a36Sopenharmony_ci	 * If a tree block is been relocating
42362306a36Sopenharmony_ci	 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
42462306a36Sopenharmony_ci	 * use full backrefs for extent pointers in tree block.
42562306a36Sopenharmony_ci	 * The reason for this is some operations (such as drop tree)
42662306a36Sopenharmony_ci	 * are only allowed for blocks use full backrefs.
42762306a36Sopenharmony_ci	 */
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	if (btrfs_block_can_be_shared(trans, root, buf)) {
43062306a36Sopenharmony_ci		ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
43162306a36Sopenharmony_ci					       btrfs_header_level(buf), 1,
43262306a36Sopenharmony_ci					       &refs, &flags);
43362306a36Sopenharmony_ci		if (ret)
43462306a36Sopenharmony_ci			return ret;
43562306a36Sopenharmony_ci		if (unlikely(refs == 0)) {
43662306a36Sopenharmony_ci			btrfs_crit(fs_info,
43762306a36Sopenharmony_ci		"found 0 references for tree block at bytenr %llu level %d root %llu",
43862306a36Sopenharmony_ci				   buf->start, btrfs_header_level(buf),
43962306a36Sopenharmony_ci				   btrfs_root_id(root));
44062306a36Sopenharmony_ci			ret = -EUCLEAN;
44162306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
44262306a36Sopenharmony_ci			return ret;
44362306a36Sopenharmony_ci		}
44462306a36Sopenharmony_ci	} else {
44562306a36Sopenharmony_ci		refs = 1;
44662306a36Sopenharmony_ci		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
44762306a36Sopenharmony_ci		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
44862306a36Sopenharmony_ci			flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
44962306a36Sopenharmony_ci		else
45062306a36Sopenharmony_ci			flags = 0;
45162306a36Sopenharmony_ci	}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci	owner = btrfs_header_owner(buf);
45462306a36Sopenharmony_ci	BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
45562306a36Sopenharmony_ci	       !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	if (refs > 1) {
45862306a36Sopenharmony_ci		if ((owner == root->root_key.objectid ||
45962306a36Sopenharmony_ci		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
46062306a36Sopenharmony_ci		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
46162306a36Sopenharmony_ci			ret = btrfs_inc_ref(trans, root, buf, 1);
46262306a36Sopenharmony_ci			if (ret)
46362306a36Sopenharmony_ci				return ret;
46462306a36Sopenharmony_ci
46562306a36Sopenharmony_ci			if (root->root_key.objectid ==
46662306a36Sopenharmony_ci			    BTRFS_TREE_RELOC_OBJECTID) {
46762306a36Sopenharmony_ci				ret = btrfs_dec_ref(trans, root, buf, 0);
46862306a36Sopenharmony_ci				if (ret)
46962306a36Sopenharmony_ci					return ret;
47062306a36Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 1);
47162306a36Sopenharmony_ci				if (ret)
47262306a36Sopenharmony_ci					return ret;
47362306a36Sopenharmony_ci			}
47462306a36Sopenharmony_ci			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
47562306a36Sopenharmony_ci		} else {
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci			if (root->root_key.objectid ==
47862306a36Sopenharmony_ci			    BTRFS_TREE_RELOC_OBJECTID)
47962306a36Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 1);
48062306a36Sopenharmony_ci			else
48162306a36Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 0);
48262306a36Sopenharmony_ci			if (ret)
48362306a36Sopenharmony_ci				return ret;
48462306a36Sopenharmony_ci		}
48562306a36Sopenharmony_ci		if (new_flags != 0) {
48662306a36Sopenharmony_ci			ret = btrfs_set_disk_extent_flags(trans, buf, new_flags);
48762306a36Sopenharmony_ci			if (ret)
48862306a36Sopenharmony_ci				return ret;
48962306a36Sopenharmony_ci		}
49062306a36Sopenharmony_ci	} else {
49162306a36Sopenharmony_ci		if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
49262306a36Sopenharmony_ci			if (root->root_key.objectid ==
49362306a36Sopenharmony_ci			    BTRFS_TREE_RELOC_OBJECTID)
49462306a36Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 1);
49562306a36Sopenharmony_ci			else
49662306a36Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 0);
49762306a36Sopenharmony_ci			if (ret)
49862306a36Sopenharmony_ci				return ret;
49962306a36Sopenharmony_ci			ret = btrfs_dec_ref(trans, root, buf, 1);
50062306a36Sopenharmony_ci			if (ret)
50162306a36Sopenharmony_ci				return ret;
50262306a36Sopenharmony_ci		}
50362306a36Sopenharmony_ci		btrfs_clear_buffer_dirty(trans, buf);
50462306a36Sopenharmony_ci		*last_ref = 1;
50562306a36Sopenharmony_ci	}
50662306a36Sopenharmony_ci	return 0;
50762306a36Sopenharmony_ci}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci/*
51062306a36Sopenharmony_ci * does the dirty work in cow of a single block.  The parent block (if
51162306a36Sopenharmony_ci * supplied) is updated to point to the new cow copy.  The new buffer is marked
51262306a36Sopenharmony_ci * dirty and returned locked.  If you modify the block it needs to be marked
51362306a36Sopenharmony_ci * dirty again.
51462306a36Sopenharmony_ci *
51562306a36Sopenharmony_ci * search_start -- an allocation hint for the new block
51662306a36Sopenharmony_ci *
51762306a36Sopenharmony_ci * empty_size -- a hint that you plan on doing more cow.  This is the size in
51862306a36Sopenharmony_ci * bytes the allocator should try to find free next to the block it returns.
51962306a36Sopenharmony_ci * This is just a hint and may be ignored by the allocator.
52062306a36Sopenharmony_ci */
52162306a36Sopenharmony_cistatic noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
52262306a36Sopenharmony_ci			     struct btrfs_root *root,
52362306a36Sopenharmony_ci			     struct extent_buffer *buf,
52462306a36Sopenharmony_ci			     struct extent_buffer *parent, int parent_slot,
52562306a36Sopenharmony_ci			     struct extent_buffer **cow_ret,
52662306a36Sopenharmony_ci			     u64 search_start, u64 empty_size,
52762306a36Sopenharmony_ci			     enum btrfs_lock_nesting nest)
52862306a36Sopenharmony_ci{
52962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
53062306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
53162306a36Sopenharmony_ci	struct extent_buffer *cow;
53262306a36Sopenharmony_ci	int level, ret;
53362306a36Sopenharmony_ci	int last_ref = 0;
53462306a36Sopenharmony_ci	int unlock_orig = 0;
53562306a36Sopenharmony_ci	u64 parent_start = 0;
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	if (*cow_ret == buf)
53862306a36Sopenharmony_ci		unlock_orig = 1;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	btrfs_assert_tree_write_locked(buf);
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
54362306a36Sopenharmony_ci		trans->transid != fs_info->running_transaction->transid);
54462306a36Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
54562306a36Sopenharmony_ci		trans->transid != root->last_trans);
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	level = btrfs_header_level(buf);
54862306a36Sopenharmony_ci
54962306a36Sopenharmony_ci	if (level == 0)
55062306a36Sopenharmony_ci		btrfs_item_key(buf, &disk_key, 0);
55162306a36Sopenharmony_ci	else
55262306a36Sopenharmony_ci		btrfs_node_key(buf, &disk_key, 0);
55362306a36Sopenharmony_ci
55462306a36Sopenharmony_ci	if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
55562306a36Sopenharmony_ci		parent_start = parent->start;
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	cow = btrfs_alloc_tree_block(trans, root, parent_start,
55862306a36Sopenharmony_ci				     root->root_key.objectid, &disk_key, level,
55962306a36Sopenharmony_ci				     search_start, empty_size, nest);
56062306a36Sopenharmony_ci	if (IS_ERR(cow))
56162306a36Sopenharmony_ci		return PTR_ERR(cow);
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	/* cow is set to blocking by btrfs_init_new_buffer */
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	copy_extent_buffer_full(cow, buf);
56662306a36Sopenharmony_ci	btrfs_set_header_bytenr(cow, cow->start);
56762306a36Sopenharmony_ci	btrfs_set_header_generation(cow, trans->transid);
56862306a36Sopenharmony_ci	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
56962306a36Sopenharmony_ci	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
57062306a36Sopenharmony_ci				     BTRFS_HEADER_FLAG_RELOC);
57162306a36Sopenharmony_ci	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
57262306a36Sopenharmony_ci		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
57362306a36Sopenharmony_ci	else
57462306a36Sopenharmony_ci		btrfs_set_header_owner(cow, root->root_key.objectid);
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
57962306a36Sopenharmony_ci	if (ret) {
58062306a36Sopenharmony_ci		btrfs_tree_unlock(cow);
58162306a36Sopenharmony_ci		free_extent_buffer(cow);
58262306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
58362306a36Sopenharmony_ci		return ret;
58462306a36Sopenharmony_ci	}
58562306a36Sopenharmony_ci
58662306a36Sopenharmony_ci	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
58762306a36Sopenharmony_ci		ret = btrfs_reloc_cow_block(trans, root, buf, cow);
58862306a36Sopenharmony_ci		if (ret) {
58962306a36Sopenharmony_ci			btrfs_tree_unlock(cow);
59062306a36Sopenharmony_ci			free_extent_buffer(cow);
59162306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
59262306a36Sopenharmony_ci			return ret;
59362306a36Sopenharmony_ci		}
59462306a36Sopenharmony_ci	}
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	if (buf == root->node) {
59762306a36Sopenharmony_ci		WARN_ON(parent && parent != buf);
59862306a36Sopenharmony_ci		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
59962306a36Sopenharmony_ci		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
60062306a36Sopenharmony_ci			parent_start = buf->start;
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
60362306a36Sopenharmony_ci		if (ret < 0) {
60462306a36Sopenharmony_ci			btrfs_tree_unlock(cow);
60562306a36Sopenharmony_ci			free_extent_buffer(cow);
60662306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
60762306a36Sopenharmony_ci			return ret;
60862306a36Sopenharmony_ci		}
60962306a36Sopenharmony_ci		atomic_inc(&cow->refs);
61062306a36Sopenharmony_ci		rcu_assign_pointer(root->node, cow);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci		btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
61362306a36Sopenharmony_ci				      parent_start, last_ref);
61462306a36Sopenharmony_ci		free_extent_buffer(buf);
61562306a36Sopenharmony_ci		add_root_to_dirty_list(root);
61662306a36Sopenharmony_ci	} else {
61762306a36Sopenharmony_ci		WARN_ON(trans->transid != btrfs_header_generation(parent));
61862306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
61962306a36Sopenharmony_ci						    BTRFS_MOD_LOG_KEY_REPLACE);
62062306a36Sopenharmony_ci		if (ret) {
62162306a36Sopenharmony_ci			btrfs_tree_unlock(cow);
62262306a36Sopenharmony_ci			free_extent_buffer(cow);
62362306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
62462306a36Sopenharmony_ci			return ret;
62562306a36Sopenharmony_ci		}
62662306a36Sopenharmony_ci		btrfs_set_node_blockptr(parent, parent_slot,
62762306a36Sopenharmony_ci					cow->start);
62862306a36Sopenharmony_ci		btrfs_set_node_ptr_generation(parent, parent_slot,
62962306a36Sopenharmony_ci					      trans->transid);
63062306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, parent);
63162306a36Sopenharmony_ci		if (last_ref) {
63262306a36Sopenharmony_ci			ret = btrfs_tree_mod_log_free_eb(buf);
63362306a36Sopenharmony_ci			if (ret) {
63462306a36Sopenharmony_ci				btrfs_tree_unlock(cow);
63562306a36Sopenharmony_ci				free_extent_buffer(cow);
63662306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
63762306a36Sopenharmony_ci				return ret;
63862306a36Sopenharmony_ci			}
63962306a36Sopenharmony_ci		}
64062306a36Sopenharmony_ci		btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
64162306a36Sopenharmony_ci				      parent_start, last_ref);
64262306a36Sopenharmony_ci	}
64362306a36Sopenharmony_ci	if (unlock_orig)
64462306a36Sopenharmony_ci		btrfs_tree_unlock(buf);
64562306a36Sopenharmony_ci	free_extent_buffer_stale(buf);
64662306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, cow);
64762306a36Sopenharmony_ci	*cow_ret = cow;
64862306a36Sopenharmony_ci	return 0;
64962306a36Sopenharmony_ci}
65062306a36Sopenharmony_ci
65162306a36Sopenharmony_cistatic inline int should_cow_block(struct btrfs_trans_handle *trans,
65262306a36Sopenharmony_ci				   struct btrfs_root *root,
65362306a36Sopenharmony_ci				   struct extent_buffer *buf)
65462306a36Sopenharmony_ci{
65562306a36Sopenharmony_ci	if (btrfs_is_testing(root->fs_info))
65662306a36Sopenharmony_ci		return 0;
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci	/* Ensure we can see the FORCE_COW bit */
65962306a36Sopenharmony_ci	smp_mb__before_atomic();
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_ci	/*
66262306a36Sopenharmony_ci	 * We do not need to cow a block if
66362306a36Sopenharmony_ci	 * 1) this block is not created or changed in this transaction;
66462306a36Sopenharmony_ci	 * 2) this block does not belong to TREE_RELOC tree;
66562306a36Sopenharmony_ci	 * 3) the root is not forced COW.
66662306a36Sopenharmony_ci	 *
66762306a36Sopenharmony_ci	 * What is forced COW:
66862306a36Sopenharmony_ci	 *    when we create snapshot during committing the transaction,
66962306a36Sopenharmony_ci	 *    after we've finished copying src root, we must COW the shared
67062306a36Sopenharmony_ci	 *    block to ensure the metadata consistency.
67162306a36Sopenharmony_ci	 */
67262306a36Sopenharmony_ci	if (btrfs_header_generation(buf) == trans->transid &&
67362306a36Sopenharmony_ci	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
67462306a36Sopenharmony_ci	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
67562306a36Sopenharmony_ci	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
67662306a36Sopenharmony_ci	    !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
67762306a36Sopenharmony_ci		return 0;
67862306a36Sopenharmony_ci	return 1;
67962306a36Sopenharmony_ci}
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci/*
68262306a36Sopenharmony_ci * cows a single block, see __btrfs_cow_block for the real work.
68362306a36Sopenharmony_ci * This version of it has extra checks so that a block isn't COWed more than
68462306a36Sopenharmony_ci * once per transaction, as long as it hasn't been written yet
68562306a36Sopenharmony_ci */
68662306a36Sopenharmony_cinoinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
68762306a36Sopenharmony_ci		    struct btrfs_root *root, struct extent_buffer *buf,
68862306a36Sopenharmony_ci		    struct extent_buffer *parent, int parent_slot,
68962306a36Sopenharmony_ci		    struct extent_buffer **cow_ret,
69062306a36Sopenharmony_ci		    enum btrfs_lock_nesting nest)
69162306a36Sopenharmony_ci{
69262306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
69362306a36Sopenharmony_ci	u64 search_start;
69462306a36Sopenharmony_ci	int ret;
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci	if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
69762306a36Sopenharmony_ci		btrfs_abort_transaction(trans, -EUCLEAN);
69862306a36Sopenharmony_ci		btrfs_crit(fs_info,
69962306a36Sopenharmony_ci		   "attempt to COW block %llu on root %llu that is being deleted",
70062306a36Sopenharmony_ci			   buf->start, btrfs_root_id(root));
70162306a36Sopenharmony_ci		return -EUCLEAN;
70262306a36Sopenharmony_ci	}
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	/*
70562306a36Sopenharmony_ci	 * COWing must happen through a running transaction, which always
70662306a36Sopenharmony_ci	 * matches the current fs generation (it's a transaction with a state
70762306a36Sopenharmony_ci	 * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
70862306a36Sopenharmony_ci	 * into error state to prevent the commit of any transaction.
70962306a36Sopenharmony_ci	 */
71062306a36Sopenharmony_ci	if (unlikely(trans->transaction != fs_info->running_transaction ||
71162306a36Sopenharmony_ci		     trans->transid != fs_info->generation)) {
71262306a36Sopenharmony_ci		btrfs_abort_transaction(trans, -EUCLEAN);
71362306a36Sopenharmony_ci		btrfs_crit(fs_info,
71462306a36Sopenharmony_ci"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu",
71562306a36Sopenharmony_ci			   buf->start, btrfs_root_id(root), trans->transid,
71662306a36Sopenharmony_ci			   fs_info->running_transaction->transid,
71762306a36Sopenharmony_ci			   fs_info->generation);
71862306a36Sopenharmony_ci		return -EUCLEAN;
71962306a36Sopenharmony_ci	}
72062306a36Sopenharmony_ci
72162306a36Sopenharmony_ci	if (!should_cow_block(trans, root, buf)) {
72262306a36Sopenharmony_ci		*cow_ret = buf;
72362306a36Sopenharmony_ci		return 0;
72462306a36Sopenharmony_ci	}
72562306a36Sopenharmony_ci
72662306a36Sopenharmony_ci	search_start = buf->start & ~((u64)SZ_1G - 1);
72762306a36Sopenharmony_ci
72862306a36Sopenharmony_ci	/*
72962306a36Sopenharmony_ci	 * Before CoWing this block for later modification, check if it's
73062306a36Sopenharmony_ci	 * the subtree root and do the delayed subtree trace if needed.
73162306a36Sopenharmony_ci	 *
73262306a36Sopenharmony_ci	 * Also We don't care about the error, as it's handled internally.
73362306a36Sopenharmony_ci	 */
73462306a36Sopenharmony_ci	btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
73562306a36Sopenharmony_ci	ret = __btrfs_cow_block(trans, root, buf, parent,
73662306a36Sopenharmony_ci				 parent_slot, cow_ret, search_start, 0, nest);
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	trace_btrfs_cow_block(root, buf, *cow_ret);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	return ret;
74162306a36Sopenharmony_ci}
74262306a36Sopenharmony_ciALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci/*
74562306a36Sopenharmony_ci * helper function for defrag to decide if two blocks pointed to by a
74662306a36Sopenharmony_ci * node are actually close by
74762306a36Sopenharmony_ci */
74862306a36Sopenharmony_cistatic int close_blocks(u64 blocknr, u64 other, u32 blocksize)
74962306a36Sopenharmony_ci{
75062306a36Sopenharmony_ci	if (blocknr < other && other - (blocknr + blocksize) < 32768)
75162306a36Sopenharmony_ci		return 1;
75262306a36Sopenharmony_ci	if (blocknr > other && blocknr - (other + blocksize) < 32768)
75362306a36Sopenharmony_ci		return 1;
75462306a36Sopenharmony_ci	return 0;
75562306a36Sopenharmony_ci}
75662306a36Sopenharmony_ci
75762306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci/*
76062306a36Sopenharmony_ci * Compare two keys, on little-endian the disk order is same as CPU order and
76162306a36Sopenharmony_ci * we can avoid the conversion.
76262306a36Sopenharmony_ci */
76362306a36Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk_key,
76462306a36Sopenharmony_ci		     const struct btrfs_key *k2)
76562306a36Sopenharmony_ci{
76662306a36Sopenharmony_ci	const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	return btrfs_comp_cpu_keys(k1, k2);
76962306a36Sopenharmony_ci}
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci#else
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci/*
77462306a36Sopenharmony_ci * compare two keys in a memcmp fashion
77562306a36Sopenharmony_ci */
77662306a36Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk,
77762306a36Sopenharmony_ci		     const struct btrfs_key *k2)
77862306a36Sopenharmony_ci{
77962306a36Sopenharmony_ci	struct btrfs_key k1;
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci	btrfs_disk_key_to_cpu(&k1, disk);
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci	return btrfs_comp_cpu_keys(&k1, k2);
78462306a36Sopenharmony_ci}
78562306a36Sopenharmony_ci#endif
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci/*
78862306a36Sopenharmony_ci * same as comp_keys only with two btrfs_key's
78962306a36Sopenharmony_ci */
79062306a36Sopenharmony_ciint __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
79162306a36Sopenharmony_ci{
79262306a36Sopenharmony_ci	if (k1->objectid > k2->objectid)
79362306a36Sopenharmony_ci		return 1;
79462306a36Sopenharmony_ci	if (k1->objectid < k2->objectid)
79562306a36Sopenharmony_ci		return -1;
79662306a36Sopenharmony_ci	if (k1->type > k2->type)
79762306a36Sopenharmony_ci		return 1;
79862306a36Sopenharmony_ci	if (k1->type < k2->type)
79962306a36Sopenharmony_ci		return -1;
80062306a36Sopenharmony_ci	if (k1->offset > k2->offset)
80162306a36Sopenharmony_ci		return 1;
80262306a36Sopenharmony_ci	if (k1->offset < k2->offset)
80362306a36Sopenharmony_ci		return -1;
80462306a36Sopenharmony_ci	return 0;
80562306a36Sopenharmony_ci}
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci/*
80862306a36Sopenharmony_ci * this is used by the defrag code to go through all the
80962306a36Sopenharmony_ci * leaves pointed to by a node and reallocate them so that
81062306a36Sopenharmony_ci * disk order is close to key order
81162306a36Sopenharmony_ci */
81262306a36Sopenharmony_ciint btrfs_realloc_node(struct btrfs_trans_handle *trans,
81362306a36Sopenharmony_ci		       struct btrfs_root *root, struct extent_buffer *parent,
81462306a36Sopenharmony_ci		       int start_slot, u64 *last_ret,
81562306a36Sopenharmony_ci		       struct btrfs_key *progress)
81662306a36Sopenharmony_ci{
81762306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
81862306a36Sopenharmony_ci	struct extent_buffer *cur;
81962306a36Sopenharmony_ci	u64 blocknr;
82062306a36Sopenharmony_ci	u64 search_start = *last_ret;
82162306a36Sopenharmony_ci	u64 last_block = 0;
82262306a36Sopenharmony_ci	u64 other;
82362306a36Sopenharmony_ci	u32 parent_nritems;
82462306a36Sopenharmony_ci	int end_slot;
82562306a36Sopenharmony_ci	int i;
82662306a36Sopenharmony_ci	int err = 0;
82762306a36Sopenharmony_ci	u32 blocksize;
82862306a36Sopenharmony_ci	int progress_passed = 0;
82962306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	/*
83262306a36Sopenharmony_ci	 * COWing must happen through a running transaction, which always
83362306a36Sopenharmony_ci	 * matches the current fs generation (it's a transaction with a state
83462306a36Sopenharmony_ci	 * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs
83562306a36Sopenharmony_ci	 * into error state to prevent the commit of any transaction.
83662306a36Sopenharmony_ci	 */
83762306a36Sopenharmony_ci	if (unlikely(trans->transaction != fs_info->running_transaction ||
83862306a36Sopenharmony_ci		     trans->transid != fs_info->generation)) {
83962306a36Sopenharmony_ci		btrfs_abort_transaction(trans, -EUCLEAN);
84062306a36Sopenharmony_ci		btrfs_crit(fs_info,
84162306a36Sopenharmony_ci"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu",
84262306a36Sopenharmony_ci			   parent->start, btrfs_root_id(root), trans->transid,
84362306a36Sopenharmony_ci			   fs_info->running_transaction->transid,
84462306a36Sopenharmony_ci			   fs_info->generation);
84562306a36Sopenharmony_ci		return -EUCLEAN;
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	parent_nritems = btrfs_header_nritems(parent);
84962306a36Sopenharmony_ci	blocksize = fs_info->nodesize;
85062306a36Sopenharmony_ci	end_slot = parent_nritems - 1;
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	if (parent_nritems <= 1)
85362306a36Sopenharmony_ci		return 0;
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci	for (i = start_slot; i <= end_slot; i++) {
85662306a36Sopenharmony_ci		int close = 1;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci		btrfs_node_key(parent, &disk_key, i);
85962306a36Sopenharmony_ci		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
86062306a36Sopenharmony_ci			continue;
86162306a36Sopenharmony_ci
86262306a36Sopenharmony_ci		progress_passed = 1;
86362306a36Sopenharmony_ci		blocknr = btrfs_node_blockptr(parent, i);
86462306a36Sopenharmony_ci		if (last_block == 0)
86562306a36Sopenharmony_ci			last_block = blocknr;
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci		if (i > 0) {
86862306a36Sopenharmony_ci			other = btrfs_node_blockptr(parent, i - 1);
86962306a36Sopenharmony_ci			close = close_blocks(blocknr, other, blocksize);
87062306a36Sopenharmony_ci		}
87162306a36Sopenharmony_ci		if (!close && i < end_slot) {
87262306a36Sopenharmony_ci			other = btrfs_node_blockptr(parent, i + 1);
87362306a36Sopenharmony_ci			close = close_blocks(blocknr, other, blocksize);
87462306a36Sopenharmony_ci		}
87562306a36Sopenharmony_ci		if (close) {
87662306a36Sopenharmony_ci			last_block = blocknr;
87762306a36Sopenharmony_ci			continue;
87862306a36Sopenharmony_ci		}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci		cur = btrfs_read_node_slot(parent, i);
88162306a36Sopenharmony_ci		if (IS_ERR(cur))
88262306a36Sopenharmony_ci			return PTR_ERR(cur);
88362306a36Sopenharmony_ci		if (search_start == 0)
88462306a36Sopenharmony_ci			search_start = last_block;
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci		btrfs_tree_lock(cur);
88762306a36Sopenharmony_ci		err = __btrfs_cow_block(trans, root, cur, parent, i,
88862306a36Sopenharmony_ci					&cur, search_start,
88962306a36Sopenharmony_ci					min(16 * blocksize,
89062306a36Sopenharmony_ci					    (end_slot - i) * blocksize),
89162306a36Sopenharmony_ci					BTRFS_NESTING_COW);
89262306a36Sopenharmony_ci		if (err) {
89362306a36Sopenharmony_ci			btrfs_tree_unlock(cur);
89462306a36Sopenharmony_ci			free_extent_buffer(cur);
89562306a36Sopenharmony_ci			break;
89662306a36Sopenharmony_ci		}
89762306a36Sopenharmony_ci		search_start = cur->start;
89862306a36Sopenharmony_ci		last_block = cur->start;
89962306a36Sopenharmony_ci		*last_ret = search_start;
90062306a36Sopenharmony_ci		btrfs_tree_unlock(cur);
90162306a36Sopenharmony_ci		free_extent_buffer(cur);
90262306a36Sopenharmony_ci	}
90362306a36Sopenharmony_ci	return err;
90462306a36Sopenharmony_ci}
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci/*
90762306a36Sopenharmony_ci * Search for a key in the given extent_buffer.
90862306a36Sopenharmony_ci *
90962306a36Sopenharmony_ci * The lower boundary for the search is specified by the slot number @first_slot.
91062306a36Sopenharmony_ci * Use a value of 0 to search over the whole extent buffer. Works for both
91162306a36Sopenharmony_ci * leaves and nodes.
91262306a36Sopenharmony_ci *
91362306a36Sopenharmony_ci * The slot in the extent buffer is returned via @slot. If the key exists in the
91462306a36Sopenharmony_ci * extent buffer, then @slot will point to the slot where the key is, otherwise
91562306a36Sopenharmony_ci * it points to the slot where you would insert the key.
91662306a36Sopenharmony_ci *
91762306a36Sopenharmony_ci * Slot may point to the total number of items (i.e. one position beyond the last
91862306a36Sopenharmony_ci * key) if the key is bigger than the last key in the extent buffer.
91962306a36Sopenharmony_ci */
92062306a36Sopenharmony_ciint btrfs_bin_search(struct extent_buffer *eb, int first_slot,
92162306a36Sopenharmony_ci		     const struct btrfs_key *key, int *slot)
92262306a36Sopenharmony_ci{
92362306a36Sopenharmony_ci	unsigned long p;
92462306a36Sopenharmony_ci	int item_size;
92562306a36Sopenharmony_ci	/*
92662306a36Sopenharmony_ci	 * Use unsigned types for the low and high slots, so that we get a more
92762306a36Sopenharmony_ci	 * efficient division in the search loop below.
92862306a36Sopenharmony_ci	 */
92962306a36Sopenharmony_ci	u32 low = first_slot;
93062306a36Sopenharmony_ci	u32 high = btrfs_header_nritems(eb);
93162306a36Sopenharmony_ci	int ret;
93262306a36Sopenharmony_ci	const int key_size = sizeof(struct btrfs_disk_key);
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	if (unlikely(low > high)) {
93562306a36Sopenharmony_ci		btrfs_err(eb->fs_info,
93662306a36Sopenharmony_ci		 "%s: low (%u) > high (%u) eb %llu owner %llu level %d",
93762306a36Sopenharmony_ci			  __func__, low, high, eb->start,
93862306a36Sopenharmony_ci			  btrfs_header_owner(eb), btrfs_header_level(eb));
93962306a36Sopenharmony_ci		return -EINVAL;
94062306a36Sopenharmony_ci	}
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	if (btrfs_header_level(eb) == 0) {
94362306a36Sopenharmony_ci		p = offsetof(struct btrfs_leaf, items);
94462306a36Sopenharmony_ci		item_size = sizeof(struct btrfs_item);
94562306a36Sopenharmony_ci	} else {
94662306a36Sopenharmony_ci		p = offsetof(struct btrfs_node, ptrs);
94762306a36Sopenharmony_ci		item_size = sizeof(struct btrfs_key_ptr);
94862306a36Sopenharmony_ci	}
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci	while (low < high) {
95162306a36Sopenharmony_ci		unsigned long oip;
95262306a36Sopenharmony_ci		unsigned long offset;
95362306a36Sopenharmony_ci		struct btrfs_disk_key *tmp;
95462306a36Sopenharmony_ci		struct btrfs_disk_key unaligned;
95562306a36Sopenharmony_ci		int mid;
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci		mid = (low + high) / 2;
95862306a36Sopenharmony_ci		offset = p + mid * item_size;
95962306a36Sopenharmony_ci		oip = offset_in_page(offset);
96062306a36Sopenharmony_ci
96162306a36Sopenharmony_ci		if (oip + key_size <= PAGE_SIZE) {
96262306a36Sopenharmony_ci			const unsigned long idx = get_eb_page_index(offset);
96362306a36Sopenharmony_ci			char *kaddr = page_address(eb->pages[idx]);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci			oip = get_eb_offset_in_page(eb, offset);
96662306a36Sopenharmony_ci			tmp = (struct btrfs_disk_key *)(kaddr + oip);
96762306a36Sopenharmony_ci		} else {
96862306a36Sopenharmony_ci			read_extent_buffer(eb, &unaligned, offset, key_size);
96962306a36Sopenharmony_ci			tmp = &unaligned;
97062306a36Sopenharmony_ci		}
97162306a36Sopenharmony_ci
97262306a36Sopenharmony_ci		ret = comp_keys(tmp, key);
97362306a36Sopenharmony_ci
97462306a36Sopenharmony_ci		if (ret < 0)
97562306a36Sopenharmony_ci			low = mid + 1;
97662306a36Sopenharmony_ci		else if (ret > 0)
97762306a36Sopenharmony_ci			high = mid;
97862306a36Sopenharmony_ci		else {
97962306a36Sopenharmony_ci			*slot = mid;
98062306a36Sopenharmony_ci			return 0;
98162306a36Sopenharmony_ci		}
98262306a36Sopenharmony_ci	}
98362306a36Sopenharmony_ci	*slot = low;
98462306a36Sopenharmony_ci	return 1;
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_cistatic void root_add_used(struct btrfs_root *root, u32 size)
98862306a36Sopenharmony_ci{
98962306a36Sopenharmony_ci	spin_lock(&root->accounting_lock);
99062306a36Sopenharmony_ci	btrfs_set_root_used(&root->root_item,
99162306a36Sopenharmony_ci			    btrfs_root_used(&root->root_item) + size);
99262306a36Sopenharmony_ci	spin_unlock(&root->accounting_lock);
99362306a36Sopenharmony_ci}
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_cistatic void root_sub_used(struct btrfs_root *root, u32 size)
99662306a36Sopenharmony_ci{
99762306a36Sopenharmony_ci	spin_lock(&root->accounting_lock);
99862306a36Sopenharmony_ci	btrfs_set_root_used(&root->root_item,
99962306a36Sopenharmony_ci			    btrfs_root_used(&root->root_item) - size);
100062306a36Sopenharmony_ci	spin_unlock(&root->accounting_lock);
100162306a36Sopenharmony_ci}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci/* given a node and slot number, this reads the blocks it points to.  The
100462306a36Sopenharmony_ci * extent buffer is returned with a reference taken (but unlocked).
100562306a36Sopenharmony_ci */
100662306a36Sopenharmony_cistruct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
100762306a36Sopenharmony_ci					   int slot)
100862306a36Sopenharmony_ci{
100962306a36Sopenharmony_ci	int level = btrfs_header_level(parent);
101062306a36Sopenharmony_ci	struct btrfs_tree_parent_check check = { 0 };
101162306a36Sopenharmony_ci	struct extent_buffer *eb;
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	if (slot < 0 || slot >= btrfs_header_nritems(parent))
101462306a36Sopenharmony_ci		return ERR_PTR(-ENOENT);
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_ci	ASSERT(level);
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci	check.level = level - 1;
101962306a36Sopenharmony_ci	check.transid = btrfs_node_ptr_generation(parent, slot);
102062306a36Sopenharmony_ci	check.owner_root = btrfs_header_owner(parent);
102162306a36Sopenharmony_ci	check.has_first_key = true;
102262306a36Sopenharmony_ci	btrfs_node_key_to_cpu(parent, &check.first_key, slot);
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
102562306a36Sopenharmony_ci			     &check);
102662306a36Sopenharmony_ci	if (IS_ERR(eb))
102762306a36Sopenharmony_ci		return eb;
102862306a36Sopenharmony_ci	if (!extent_buffer_uptodate(eb)) {
102962306a36Sopenharmony_ci		free_extent_buffer(eb);
103062306a36Sopenharmony_ci		return ERR_PTR(-EIO);
103162306a36Sopenharmony_ci	}
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_ci	return eb;
103462306a36Sopenharmony_ci}
103562306a36Sopenharmony_ci
103662306a36Sopenharmony_ci/*
103762306a36Sopenharmony_ci * node level balancing, used to make sure nodes are in proper order for
103862306a36Sopenharmony_ci * item deletion.  We balance from the top down, so we have to make sure
103962306a36Sopenharmony_ci * that a deletion won't leave an node completely empty later on.
104062306a36Sopenharmony_ci */
104162306a36Sopenharmony_cistatic noinline int balance_level(struct btrfs_trans_handle *trans,
104262306a36Sopenharmony_ci			 struct btrfs_root *root,
104362306a36Sopenharmony_ci			 struct btrfs_path *path, int level)
104462306a36Sopenharmony_ci{
104562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
104662306a36Sopenharmony_ci	struct extent_buffer *right = NULL;
104762306a36Sopenharmony_ci	struct extent_buffer *mid;
104862306a36Sopenharmony_ci	struct extent_buffer *left = NULL;
104962306a36Sopenharmony_ci	struct extent_buffer *parent = NULL;
105062306a36Sopenharmony_ci	int ret = 0;
105162306a36Sopenharmony_ci	int wret;
105262306a36Sopenharmony_ci	int pslot;
105362306a36Sopenharmony_ci	int orig_slot = path->slots[level];
105462306a36Sopenharmony_ci	u64 orig_ptr;
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_ci	ASSERT(level > 0);
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci	mid = path->nodes[level];
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK);
106162306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(mid) != trans->transid);
106262306a36Sopenharmony_ci
106362306a36Sopenharmony_ci	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci	if (level < BTRFS_MAX_LEVEL - 1) {
106662306a36Sopenharmony_ci		parent = path->nodes[level + 1];
106762306a36Sopenharmony_ci		pslot = path->slots[level + 1];
106862306a36Sopenharmony_ci	}
106962306a36Sopenharmony_ci
107062306a36Sopenharmony_ci	/*
107162306a36Sopenharmony_ci	 * deal with the case where there is only one pointer in the root
107262306a36Sopenharmony_ci	 * by promoting the node below to a root
107362306a36Sopenharmony_ci	 */
107462306a36Sopenharmony_ci	if (!parent) {
107562306a36Sopenharmony_ci		struct extent_buffer *child;
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_ci		if (btrfs_header_nritems(mid) != 1)
107862306a36Sopenharmony_ci			return 0;
107962306a36Sopenharmony_ci
108062306a36Sopenharmony_ci		/* promote the child to a root */
108162306a36Sopenharmony_ci		child = btrfs_read_node_slot(mid, 0);
108262306a36Sopenharmony_ci		if (IS_ERR(child)) {
108362306a36Sopenharmony_ci			ret = PTR_ERR(child);
108462306a36Sopenharmony_ci			goto out;
108562306a36Sopenharmony_ci		}
108662306a36Sopenharmony_ci
108762306a36Sopenharmony_ci		btrfs_tree_lock(child);
108862306a36Sopenharmony_ci		ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
108962306a36Sopenharmony_ci				      BTRFS_NESTING_COW);
109062306a36Sopenharmony_ci		if (ret) {
109162306a36Sopenharmony_ci			btrfs_tree_unlock(child);
109262306a36Sopenharmony_ci			free_extent_buffer(child);
109362306a36Sopenharmony_ci			goto out;
109462306a36Sopenharmony_ci		}
109562306a36Sopenharmony_ci
109662306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
109762306a36Sopenharmony_ci		if (ret < 0) {
109862306a36Sopenharmony_ci			btrfs_tree_unlock(child);
109962306a36Sopenharmony_ci			free_extent_buffer(child);
110062306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
110162306a36Sopenharmony_ci			goto out;
110262306a36Sopenharmony_ci		}
110362306a36Sopenharmony_ci		rcu_assign_pointer(root->node, child);
110462306a36Sopenharmony_ci
110562306a36Sopenharmony_ci		add_root_to_dirty_list(root);
110662306a36Sopenharmony_ci		btrfs_tree_unlock(child);
110762306a36Sopenharmony_ci
110862306a36Sopenharmony_ci		path->locks[level] = 0;
110962306a36Sopenharmony_ci		path->nodes[level] = NULL;
111062306a36Sopenharmony_ci		btrfs_clear_buffer_dirty(trans, mid);
111162306a36Sopenharmony_ci		btrfs_tree_unlock(mid);
111262306a36Sopenharmony_ci		/* once for the path */
111362306a36Sopenharmony_ci		free_extent_buffer(mid);
111462306a36Sopenharmony_ci
111562306a36Sopenharmony_ci		root_sub_used(root, mid->len);
111662306a36Sopenharmony_ci		btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
111762306a36Sopenharmony_ci		/* once for the root ptr */
111862306a36Sopenharmony_ci		free_extent_buffer_stale(mid);
111962306a36Sopenharmony_ci		return 0;
112062306a36Sopenharmony_ci	}
112162306a36Sopenharmony_ci	if (btrfs_header_nritems(mid) >
112262306a36Sopenharmony_ci	    BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
112362306a36Sopenharmony_ci		return 0;
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci	if (pslot) {
112662306a36Sopenharmony_ci		left = btrfs_read_node_slot(parent, pslot - 1);
112762306a36Sopenharmony_ci		if (IS_ERR(left)) {
112862306a36Sopenharmony_ci			ret = PTR_ERR(left);
112962306a36Sopenharmony_ci			left = NULL;
113062306a36Sopenharmony_ci			goto out;
113162306a36Sopenharmony_ci		}
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_ci		__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
113462306a36Sopenharmony_ci		wret = btrfs_cow_block(trans, root, left,
113562306a36Sopenharmony_ci				       parent, pslot - 1, &left,
113662306a36Sopenharmony_ci				       BTRFS_NESTING_LEFT_COW);
113762306a36Sopenharmony_ci		if (wret) {
113862306a36Sopenharmony_ci			ret = wret;
113962306a36Sopenharmony_ci			goto out;
114062306a36Sopenharmony_ci		}
114162306a36Sopenharmony_ci	}
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci	if (pslot + 1 < btrfs_header_nritems(parent)) {
114462306a36Sopenharmony_ci		right = btrfs_read_node_slot(parent, pslot + 1);
114562306a36Sopenharmony_ci		if (IS_ERR(right)) {
114662306a36Sopenharmony_ci			ret = PTR_ERR(right);
114762306a36Sopenharmony_ci			right = NULL;
114862306a36Sopenharmony_ci			goto out;
114962306a36Sopenharmony_ci		}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci		__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
115262306a36Sopenharmony_ci		wret = btrfs_cow_block(trans, root, right,
115362306a36Sopenharmony_ci				       parent, pslot + 1, &right,
115462306a36Sopenharmony_ci				       BTRFS_NESTING_RIGHT_COW);
115562306a36Sopenharmony_ci		if (wret) {
115662306a36Sopenharmony_ci			ret = wret;
115762306a36Sopenharmony_ci			goto out;
115862306a36Sopenharmony_ci		}
115962306a36Sopenharmony_ci	}
116062306a36Sopenharmony_ci
116162306a36Sopenharmony_ci	/* first, try to make some room in the middle buffer */
116262306a36Sopenharmony_ci	if (left) {
116362306a36Sopenharmony_ci		orig_slot += btrfs_header_nritems(left);
116462306a36Sopenharmony_ci		wret = push_node_left(trans, left, mid, 1);
116562306a36Sopenharmony_ci		if (wret < 0)
116662306a36Sopenharmony_ci			ret = wret;
116762306a36Sopenharmony_ci	}
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	/*
117062306a36Sopenharmony_ci	 * then try to empty the right most buffer into the middle
117162306a36Sopenharmony_ci	 */
117262306a36Sopenharmony_ci	if (right) {
117362306a36Sopenharmony_ci		wret = push_node_left(trans, mid, right, 1);
117462306a36Sopenharmony_ci		if (wret < 0 && wret != -ENOSPC)
117562306a36Sopenharmony_ci			ret = wret;
117662306a36Sopenharmony_ci		if (btrfs_header_nritems(right) == 0) {
117762306a36Sopenharmony_ci			btrfs_clear_buffer_dirty(trans, right);
117862306a36Sopenharmony_ci			btrfs_tree_unlock(right);
117962306a36Sopenharmony_ci			ret = btrfs_del_ptr(trans, root, path, level + 1, pslot + 1);
118062306a36Sopenharmony_ci			if (ret < 0) {
118162306a36Sopenharmony_ci				free_extent_buffer_stale(right);
118262306a36Sopenharmony_ci				right = NULL;
118362306a36Sopenharmony_ci				goto out;
118462306a36Sopenharmony_ci			}
118562306a36Sopenharmony_ci			root_sub_used(root, right->len);
118662306a36Sopenharmony_ci			btrfs_free_tree_block(trans, btrfs_root_id(root), right,
118762306a36Sopenharmony_ci					      0, 1);
118862306a36Sopenharmony_ci			free_extent_buffer_stale(right);
118962306a36Sopenharmony_ci			right = NULL;
119062306a36Sopenharmony_ci		} else {
119162306a36Sopenharmony_ci			struct btrfs_disk_key right_key;
119262306a36Sopenharmony_ci			btrfs_node_key(right, &right_key, 0);
119362306a36Sopenharmony_ci			ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
119462306a36Sopenharmony_ci					BTRFS_MOD_LOG_KEY_REPLACE);
119562306a36Sopenharmony_ci			if (ret < 0) {
119662306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
119762306a36Sopenharmony_ci				goto out;
119862306a36Sopenharmony_ci			}
119962306a36Sopenharmony_ci			btrfs_set_node_key(parent, &right_key, pslot + 1);
120062306a36Sopenharmony_ci			btrfs_mark_buffer_dirty(trans, parent);
120162306a36Sopenharmony_ci		}
120262306a36Sopenharmony_ci	}
120362306a36Sopenharmony_ci	if (btrfs_header_nritems(mid) == 1) {
120462306a36Sopenharmony_ci		/*
120562306a36Sopenharmony_ci		 * we're not allowed to leave a node with one item in the
120662306a36Sopenharmony_ci		 * tree during a delete.  A deletion from lower in the tree
120762306a36Sopenharmony_ci		 * could try to delete the only pointer in this node.
120862306a36Sopenharmony_ci		 * So, pull some keys from the left.
120962306a36Sopenharmony_ci		 * There has to be a left pointer at this point because
121062306a36Sopenharmony_ci		 * otherwise we would have pulled some pointers from the
121162306a36Sopenharmony_ci		 * right
121262306a36Sopenharmony_ci		 */
121362306a36Sopenharmony_ci		if (unlikely(!left)) {
121462306a36Sopenharmony_ci			btrfs_crit(fs_info,
121562306a36Sopenharmony_ci"missing left child when middle child only has 1 item, parent bytenr %llu level %d mid bytenr %llu root %llu",
121662306a36Sopenharmony_ci				   parent->start, btrfs_header_level(parent),
121762306a36Sopenharmony_ci				   mid->start, btrfs_root_id(root));
121862306a36Sopenharmony_ci			ret = -EUCLEAN;
121962306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
122062306a36Sopenharmony_ci			goto out;
122162306a36Sopenharmony_ci		}
122262306a36Sopenharmony_ci		wret = balance_node_right(trans, mid, left);
122362306a36Sopenharmony_ci		if (wret < 0) {
122462306a36Sopenharmony_ci			ret = wret;
122562306a36Sopenharmony_ci			goto out;
122662306a36Sopenharmony_ci		}
122762306a36Sopenharmony_ci		if (wret == 1) {
122862306a36Sopenharmony_ci			wret = push_node_left(trans, left, mid, 1);
122962306a36Sopenharmony_ci			if (wret < 0)
123062306a36Sopenharmony_ci				ret = wret;
123162306a36Sopenharmony_ci		}
123262306a36Sopenharmony_ci		BUG_ON(wret == 1);
123362306a36Sopenharmony_ci	}
123462306a36Sopenharmony_ci	if (btrfs_header_nritems(mid) == 0) {
123562306a36Sopenharmony_ci		btrfs_clear_buffer_dirty(trans, mid);
123662306a36Sopenharmony_ci		btrfs_tree_unlock(mid);
123762306a36Sopenharmony_ci		ret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
123862306a36Sopenharmony_ci		if (ret < 0) {
123962306a36Sopenharmony_ci			free_extent_buffer_stale(mid);
124062306a36Sopenharmony_ci			mid = NULL;
124162306a36Sopenharmony_ci			goto out;
124262306a36Sopenharmony_ci		}
124362306a36Sopenharmony_ci		root_sub_used(root, mid->len);
124462306a36Sopenharmony_ci		btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
124562306a36Sopenharmony_ci		free_extent_buffer_stale(mid);
124662306a36Sopenharmony_ci		mid = NULL;
124762306a36Sopenharmony_ci	} else {
124862306a36Sopenharmony_ci		/* update the parent key to reflect our changes */
124962306a36Sopenharmony_ci		struct btrfs_disk_key mid_key;
125062306a36Sopenharmony_ci		btrfs_node_key(mid, &mid_key, 0);
125162306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_key(parent, pslot,
125262306a36Sopenharmony_ci						    BTRFS_MOD_LOG_KEY_REPLACE);
125362306a36Sopenharmony_ci		if (ret < 0) {
125462306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
125562306a36Sopenharmony_ci			goto out;
125662306a36Sopenharmony_ci		}
125762306a36Sopenharmony_ci		btrfs_set_node_key(parent, &mid_key, pslot);
125862306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, parent);
125962306a36Sopenharmony_ci	}
126062306a36Sopenharmony_ci
126162306a36Sopenharmony_ci	/* update the path */
126262306a36Sopenharmony_ci	if (left) {
126362306a36Sopenharmony_ci		if (btrfs_header_nritems(left) > orig_slot) {
126462306a36Sopenharmony_ci			atomic_inc(&left->refs);
126562306a36Sopenharmony_ci			/* left was locked after cow */
126662306a36Sopenharmony_ci			path->nodes[level] = left;
126762306a36Sopenharmony_ci			path->slots[level + 1] -= 1;
126862306a36Sopenharmony_ci			path->slots[level] = orig_slot;
126962306a36Sopenharmony_ci			if (mid) {
127062306a36Sopenharmony_ci				btrfs_tree_unlock(mid);
127162306a36Sopenharmony_ci				free_extent_buffer(mid);
127262306a36Sopenharmony_ci			}
127362306a36Sopenharmony_ci		} else {
127462306a36Sopenharmony_ci			orig_slot -= btrfs_header_nritems(left);
127562306a36Sopenharmony_ci			path->slots[level] = orig_slot;
127662306a36Sopenharmony_ci		}
127762306a36Sopenharmony_ci	}
127862306a36Sopenharmony_ci	/* double check we haven't messed things up */
127962306a36Sopenharmony_ci	if (orig_ptr !=
128062306a36Sopenharmony_ci	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
128162306a36Sopenharmony_ci		BUG();
128262306a36Sopenharmony_ciout:
128362306a36Sopenharmony_ci	if (right) {
128462306a36Sopenharmony_ci		btrfs_tree_unlock(right);
128562306a36Sopenharmony_ci		free_extent_buffer(right);
128662306a36Sopenharmony_ci	}
128762306a36Sopenharmony_ci	if (left) {
128862306a36Sopenharmony_ci		if (path->nodes[level] != left)
128962306a36Sopenharmony_ci			btrfs_tree_unlock(left);
129062306a36Sopenharmony_ci		free_extent_buffer(left);
129162306a36Sopenharmony_ci	}
129262306a36Sopenharmony_ci	return ret;
129362306a36Sopenharmony_ci}
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_ci/* Node balancing for insertion.  Here we only split or push nodes around
129662306a36Sopenharmony_ci * when they are completely full.  This is also done top down, so we
129762306a36Sopenharmony_ci * have to be pessimistic.
129862306a36Sopenharmony_ci */
129962306a36Sopenharmony_cistatic noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
130062306a36Sopenharmony_ci					  struct btrfs_root *root,
130162306a36Sopenharmony_ci					  struct btrfs_path *path, int level)
130262306a36Sopenharmony_ci{
130362306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
130462306a36Sopenharmony_ci	struct extent_buffer *right = NULL;
130562306a36Sopenharmony_ci	struct extent_buffer *mid;
130662306a36Sopenharmony_ci	struct extent_buffer *left = NULL;
130762306a36Sopenharmony_ci	struct extent_buffer *parent = NULL;
130862306a36Sopenharmony_ci	int ret = 0;
130962306a36Sopenharmony_ci	int wret;
131062306a36Sopenharmony_ci	int pslot;
131162306a36Sopenharmony_ci	int orig_slot = path->slots[level];
131262306a36Sopenharmony_ci
131362306a36Sopenharmony_ci	if (level == 0)
131462306a36Sopenharmony_ci		return 1;
131562306a36Sopenharmony_ci
131662306a36Sopenharmony_ci	mid = path->nodes[level];
131762306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(mid) != trans->transid);
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	if (level < BTRFS_MAX_LEVEL - 1) {
132062306a36Sopenharmony_ci		parent = path->nodes[level + 1];
132162306a36Sopenharmony_ci		pslot = path->slots[level + 1];
132262306a36Sopenharmony_ci	}
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci	if (!parent)
132562306a36Sopenharmony_ci		return 1;
132662306a36Sopenharmony_ci
132762306a36Sopenharmony_ci	/* first, try to make some room in the middle buffer */
132862306a36Sopenharmony_ci	if (pslot) {
132962306a36Sopenharmony_ci		u32 left_nr;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci		left = btrfs_read_node_slot(parent, pslot - 1);
133262306a36Sopenharmony_ci		if (IS_ERR(left))
133362306a36Sopenharmony_ci			return PTR_ERR(left);
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci		__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_ci		left_nr = btrfs_header_nritems(left);
133862306a36Sopenharmony_ci		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
133962306a36Sopenharmony_ci			wret = 1;
134062306a36Sopenharmony_ci		} else {
134162306a36Sopenharmony_ci			ret = btrfs_cow_block(trans, root, left, parent,
134262306a36Sopenharmony_ci					      pslot - 1, &left,
134362306a36Sopenharmony_ci					      BTRFS_NESTING_LEFT_COW);
134462306a36Sopenharmony_ci			if (ret)
134562306a36Sopenharmony_ci				wret = 1;
134662306a36Sopenharmony_ci			else {
134762306a36Sopenharmony_ci				wret = push_node_left(trans, left, mid, 0);
134862306a36Sopenharmony_ci			}
134962306a36Sopenharmony_ci		}
135062306a36Sopenharmony_ci		if (wret < 0)
135162306a36Sopenharmony_ci			ret = wret;
135262306a36Sopenharmony_ci		if (wret == 0) {
135362306a36Sopenharmony_ci			struct btrfs_disk_key disk_key;
135462306a36Sopenharmony_ci			orig_slot += left_nr;
135562306a36Sopenharmony_ci			btrfs_node_key(mid, &disk_key, 0);
135662306a36Sopenharmony_ci			ret = btrfs_tree_mod_log_insert_key(parent, pslot,
135762306a36Sopenharmony_ci					BTRFS_MOD_LOG_KEY_REPLACE);
135862306a36Sopenharmony_ci			if (ret < 0) {
135962306a36Sopenharmony_ci				btrfs_tree_unlock(left);
136062306a36Sopenharmony_ci				free_extent_buffer(left);
136162306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
136262306a36Sopenharmony_ci				return ret;
136362306a36Sopenharmony_ci			}
136462306a36Sopenharmony_ci			btrfs_set_node_key(parent, &disk_key, pslot);
136562306a36Sopenharmony_ci			btrfs_mark_buffer_dirty(trans, parent);
136662306a36Sopenharmony_ci			if (btrfs_header_nritems(left) > orig_slot) {
136762306a36Sopenharmony_ci				path->nodes[level] = left;
136862306a36Sopenharmony_ci				path->slots[level + 1] -= 1;
136962306a36Sopenharmony_ci				path->slots[level] = orig_slot;
137062306a36Sopenharmony_ci				btrfs_tree_unlock(mid);
137162306a36Sopenharmony_ci				free_extent_buffer(mid);
137262306a36Sopenharmony_ci			} else {
137362306a36Sopenharmony_ci				orig_slot -=
137462306a36Sopenharmony_ci					btrfs_header_nritems(left);
137562306a36Sopenharmony_ci				path->slots[level] = orig_slot;
137662306a36Sopenharmony_ci				btrfs_tree_unlock(left);
137762306a36Sopenharmony_ci				free_extent_buffer(left);
137862306a36Sopenharmony_ci			}
137962306a36Sopenharmony_ci			return 0;
138062306a36Sopenharmony_ci		}
138162306a36Sopenharmony_ci		btrfs_tree_unlock(left);
138262306a36Sopenharmony_ci		free_extent_buffer(left);
138362306a36Sopenharmony_ci	}
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci	/*
138662306a36Sopenharmony_ci	 * then try to empty the right most buffer into the middle
138762306a36Sopenharmony_ci	 */
138862306a36Sopenharmony_ci	if (pslot + 1 < btrfs_header_nritems(parent)) {
138962306a36Sopenharmony_ci		u32 right_nr;
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci		right = btrfs_read_node_slot(parent, pslot + 1);
139262306a36Sopenharmony_ci		if (IS_ERR(right))
139362306a36Sopenharmony_ci			return PTR_ERR(right);
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci		__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
139662306a36Sopenharmony_ci
139762306a36Sopenharmony_ci		right_nr = btrfs_header_nritems(right);
139862306a36Sopenharmony_ci		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
139962306a36Sopenharmony_ci			wret = 1;
140062306a36Sopenharmony_ci		} else {
140162306a36Sopenharmony_ci			ret = btrfs_cow_block(trans, root, right,
140262306a36Sopenharmony_ci					      parent, pslot + 1,
140362306a36Sopenharmony_ci					      &right, BTRFS_NESTING_RIGHT_COW);
140462306a36Sopenharmony_ci			if (ret)
140562306a36Sopenharmony_ci				wret = 1;
140662306a36Sopenharmony_ci			else {
140762306a36Sopenharmony_ci				wret = balance_node_right(trans, right, mid);
140862306a36Sopenharmony_ci			}
140962306a36Sopenharmony_ci		}
141062306a36Sopenharmony_ci		if (wret < 0)
141162306a36Sopenharmony_ci			ret = wret;
141262306a36Sopenharmony_ci		if (wret == 0) {
141362306a36Sopenharmony_ci			struct btrfs_disk_key disk_key;
141462306a36Sopenharmony_ci
141562306a36Sopenharmony_ci			btrfs_node_key(right, &disk_key, 0);
141662306a36Sopenharmony_ci			ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
141762306a36Sopenharmony_ci					BTRFS_MOD_LOG_KEY_REPLACE);
141862306a36Sopenharmony_ci			if (ret < 0) {
141962306a36Sopenharmony_ci				btrfs_tree_unlock(right);
142062306a36Sopenharmony_ci				free_extent_buffer(right);
142162306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
142262306a36Sopenharmony_ci				return ret;
142362306a36Sopenharmony_ci			}
142462306a36Sopenharmony_ci			btrfs_set_node_key(parent, &disk_key, pslot + 1);
142562306a36Sopenharmony_ci			btrfs_mark_buffer_dirty(trans, parent);
142662306a36Sopenharmony_ci
142762306a36Sopenharmony_ci			if (btrfs_header_nritems(mid) <= orig_slot) {
142862306a36Sopenharmony_ci				path->nodes[level] = right;
142962306a36Sopenharmony_ci				path->slots[level + 1] += 1;
143062306a36Sopenharmony_ci				path->slots[level] = orig_slot -
143162306a36Sopenharmony_ci					btrfs_header_nritems(mid);
143262306a36Sopenharmony_ci				btrfs_tree_unlock(mid);
143362306a36Sopenharmony_ci				free_extent_buffer(mid);
143462306a36Sopenharmony_ci			} else {
143562306a36Sopenharmony_ci				btrfs_tree_unlock(right);
143662306a36Sopenharmony_ci				free_extent_buffer(right);
143762306a36Sopenharmony_ci			}
143862306a36Sopenharmony_ci			return 0;
143962306a36Sopenharmony_ci		}
144062306a36Sopenharmony_ci		btrfs_tree_unlock(right);
144162306a36Sopenharmony_ci		free_extent_buffer(right);
144262306a36Sopenharmony_ci	}
144362306a36Sopenharmony_ci	return 1;
144462306a36Sopenharmony_ci}
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_ci/*
144762306a36Sopenharmony_ci * readahead one full node of leaves, finding things that are close
144862306a36Sopenharmony_ci * to the block in 'slot', and triggering ra on them.
144962306a36Sopenharmony_ci */
145062306a36Sopenharmony_cistatic void reada_for_search(struct btrfs_fs_info *fs_info,
145162306a36Sopenharmony_ci			     struct btrfs_path *path,
145262306a36Sopenharmony_ci			     int level, int slot, u64 objectid)
145362306a36Sopenharmony_ci{
145462306a36Sopenharmony_ci	struct extent_buffer *node;
145562306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
145662306a36Sopenharmony_ci	u32 nritems;
145762306a36Sopenharmony_ci	u64 search;
145862306a36Sopenharmony_ci	u64 target;
145962306a36Sopenharmony_ci	u64 nread = 0;
146062306a36Sopenharmony_ci	u64 nread_max;
146162306a36Sopenharmony_ci	u32 nr;
146262306a36Sopenharmony_ci	u32 blocksize;
146362306a36Sopenharmony_ci	u32 nscan = 0;
146462306a36Sopenharmony_ci
146562306a36Sopenharmony_ci	if (level != 1 && path->reada != READA_FORWARD_ALWAYS)
146662306a36Sopenharmony_ci		return;
146762306a36Sopenharmony_ci
146862306a36Sopenharmony_ci	if (!path->nodes[level])
146962306a36Sopenharmony_ci		return;
147062306a36Sopenharmony_ci
147162306a36Sopenharmony_ci	node = path->nodes[level];
147262306a36Sopenharmony_ci
147362306a36Sopenharmony_ci	/*
147462306a36Sopenharmony_ci	 * Since the time between visiting leaves is much shorter than the time
147562306a36Sopenharmony_ci	 * between visiting nodes, limit read ahead of nodes to 1, to avoid too
147662306a36Sopenharmony_ci	 * much IO at once (possibly random).
147762306a36Sopenharmony_ci	 */
147862306a36Sopenharmony_ci	if (path->reada == READA_FORWARD_ALWAYS) {
147962306a36Sopenharmony_ci		if (level > 1)
148062306a36Sopenharmony_ci			nread_max = node->fs_info->nodesize;
148162306a36Sopenharmony_ci		else
148262306a36Sopenharmony_ci			nread_max = SZ_128K;
148362306a36Sopenharmony_ci	} else {
148462306a36Sopenharmony_ci		nread_max = SZ_64K;
148562306a36Sopenharmony_ci	}
148662306a36Sopenharmony_ci
148762306a36Sopenharmony_ci	search = btrfs_node_blockptr(node, slot);
148862306a36Sopenharmony_ci	blocksize = fs_info->nodesize;
148962306a36Sopenharmony_ci	if (path->reada != READA_FORWARD_ALWAYS) {
149062306a36Sopenharmony_ci		struct extent_buffer *eb;
149162306a36Sopenharmony_ci
149262306a36Sopenharmony_ci		eb = find_extent_buffer(fs_info, search);
149362306a36Sopenharmony_ci		if (eb) {
149462306a36Sopenharmony_ci			free_extent_buffer(eb);
149562306a36Sopenharmony_ci			return;
149662306a36Sopenharmony_ci		}
149762306a36Sopenharmony_ci	}
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	target = search;
150062306a36Sopenharmony_ci
150162306a36Sopenharmony_ci	nritems = btrfs_header_nritems(node);
150262306a36Sopenharmony_ci	nr = slot;
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_ci	while (1) {
150562306a36Sopenharmony_ci		if (path->reada == READA_BACK) {
150662306a36Sopenharmony_ci			if (nr == 0)
150762306a36Sopenharmony_ci				break;
150862306a36Sopenharmony_ci			nr--;
150962306a36Sopenharmony_ci		} else if (path->reada == READA_FORWARD ||
151062306a36Sopenharmony_ci			   path->reada == READA_FORWARD_ALWAYS) {
151162306a36Sopenharmony_ci			nr++;
151262306a36Sopenharmony_ci			if (nr >= nritems)
151362306a36Sopenharmony_ci				break;
151462306a36Sopenharmony_ci		}
151562306a36Sopenharmony_ci		if (path->reada == READA_BACK && objectid) {
151662306a36Sopenharmony_ci			btrfs_node_key(node, &disk_key, nr);
151762306a36Sopenharmony_ci			if (btrfs_disk_key_objectid(&disk_key) != objectid)
151862306a36Sopenharmony_ci				break;
151962306a36Sopenharmony_ci		}
152062306a36Sopenharmony_ci		search = btrfs_node_blockptr(node, nr);
152162306a36Sopenharmony_ci		if (path->reada == READA_FORWARD_ALWAYS ||
152262306a36Sopenharmony_ci		    (search <= target && target - search <= 65536) ||
152362306a36Sopenharmony_ci		    (search > target && search - target <= 65536)) {
152462306a36Sopenharmony_ci			btrfs_readahead_node_child(node, nr);
152562306a36Sopenharmony_ci			nread += blocksize;
152662306a36Sopenharmony_ci		}
152762306a36Sopenharmony_ci		nscan++;
152862306a36Sopenharmony_ci		if (nread > nread_max || nscan > 32)
152962306a36Sopenharmony_ci			break;
153062306a36Sopenharmony_ci	}
153162306a36Sopenharmony_ci}
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_cistatic noinline void reada_for_balance(struct btrfs_path *path, int level)
153462306a36Sopenharmony_ci{
153562306a36Sopenharmony_ci	struct extent_buffer *parent;
153662306a36Sopenharmony_ci	int slot;
153762306a36Sopenharmony_ci	int nritems;
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	parent = path->nodes[level + 1];
154062306a36Sopenharmony_ci	if (!parent)
154162306a36Sopenharmony_ci		return;
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci	nritems = btrfs_header_nritems(parent);
154462306a36Sopenharmony_ci	slot = path->slots[level + 1];
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_ci	if (slot > 0)
154762306a36Sopenharmony_ci		btrfs_readahead_node_child(parent, slot - 1);
154862306a36Sopenharmony_ci	if (slot + 1 < nritems)
154962306a36Sopenharmony_ci		btrfs_readahead_node_child(parent, slot + 1);
155062306a36Sopenharmony_ci}
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci/*
155462306a36Sopenharmony_ci * when we walk down the tree, it is usually safe to unlock the higher layers
155562306a36Sopenharmony_ci * in the tree.  The exceptions are when our path goes through slot 0, because
155662306a36Sopenharmony_ci * operations on the tree might require changing key pointers higher up in the
155762306a36Sopenharmony_ci * tree.
155862306a36Sopenharmony_ci *
155962306a36Sopenharmony_ci * callers might also have set path->keep_locks, which tells this code to keep
156062306a36Sopenharmony_ci * the lock if the path points to the last slot in the block.  This is part of
156162306a36Sopenharmony_ci * walking through the tree, and selecting the next slot in the higher block.
156262306a36Sopenharmony_ci *
156362306a36Sopenharmony_ci * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
156462306a36Sopenharmony_ci * if lowest_unlock is 1, level 0 won't be unlocked
156562306a36Sopenharmony_ci */
156662306a36Sopenharmony_cistatic noinline void unlock_up(struct btrfs_path *path, int level,
156762306a36Sopenharmony_ci			       int lowest_unlock, int min_write_lock_level,
156862306a36Sopenharmony_ci			       int *write_lock_level)
156962306a36Sopenharmony_ci{
157062306a36Sopenharmony_ci	int i;
157162306a36Sopenharmony_ci	int skip_level = level;
157262306a36Sopenharmony_ci	bool check_skip = true;
157362306a36Sopenharmony_ci
157462306a36Sopenharmony_ci	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
157562306a36Sopenharmony_ci		if (!path->nodes[i])
157662306a36Sopenharmony_ci			break;
157762306a36Sopenharmony_ci		if (!path->locks[i])
157862306a36Sopenharmony_ci			break;
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci		if (check_skip) {
158162306a36Sopenharmony_ci			if (path->slots[i] == 0) {
158262306a36Sopenharmony_ci				skip_level = i + 1;
158362306a36Sopenharmony_ci				continue;
158462306a36Sopenharmony_ci			}
158562306a36Sopenharmony_ci
158662306a36Sopenharmony_ci			if (path->keep_locks) {
158762306a36Sopenharmony_ci				u32 nritems;
158862306a36Sopenharmony_ci
158962306a36Sopenharmony_ci				nritems = btrfs_header_nritems(path->nodes[i]);
159062306a36Sopenharmony_ci				if (nritems < 1 || path->slots[i] >= nritems - 1) {
159162306a36Sopenharmony_ci					skip_level = i + 1;
159262306a36Sopenharmony_ci					continue;
159362306a36Sopenharmony_ci				}
159462306a36Sopenharmony_ci			}
159562306a36Sopenharmony_ci		}
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci		if (i >= lowest_unlock && i > skip_level) {
159862306a36Sopenharmony_ci			check_skip = false;
159962306a36Sopenharmony_ci			btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
160062306a36Sopenharmony_ci			path->locks[i] = 0;
160162306a36Sopenharmony_ci			if (write_lock_level &&
160262306a36Sopenharmony_ci			    i > min_write_lock_level &&
160362306a36Sopenharmony_ci			    i <= *write_lock_level) {
160462306a36Sopenharmony_ci				*write_lock_level = i - 1;
160562306a36Sopenharmony_ci			}
160662306a36Sopenharmony_ci		}
160762306a36Sopenharmony_ci	}
160862306a36Sopenharmony_ci}
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_ci/*
161162306a36Sopenharmony_ci * Helper function for btrfs_search_slot() and other functions that do a search
161262306a36Sopenharmony_ci * on a btree. The goal is to find a tree block in the cache (the radix tree at
161362306a36Sopenharmony_ci * fs_info->buffer_radix), but if we can't find it, or it's not up to date, read
161462306a36Sopenharmony_ci * its pages from disk.
161562306a36Sopenharmony_ci *
161662306a36Sopenharmony_ci * Returns -EAGAIN, with the path unlocked, if the caller needs to repeat the
161762306a36Sopenharmony_ci * whole btree search, starting again from the current root node.
161862306a36Sopenharmony_ci */
161962306a36Sopenharmony_cistatic int
162062306a36Sopenharmony_ciread_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
162162306a36Sopenharmony_ci		      struct extent_buffer **eb_ret, int level, int slot,
162262306a36Sopenharmony_ci		      const struct btrfs_key *key)
162362306a36Sopenharmony_ci{
162462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
162562306a36Sopenharmony_ci	struct btrfs_tree_parent_check check = { 0 };
162662306a36Sopenharmony_ci	u64 blocknr;
162762306a36Sopenharmony_ci	u64 gen;
162862306a36Sopenharmony_ci	struct extent_buffer *tmp;
162962306a36Sopenharmony_ci	int ret;
163062306a36Sopenharmony_ci	int parent_level;
163162306a36Sopenharmony_ci	bool unlock_up;
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci	unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
163462306a36Sopenharmony_ci	blocknr = btrfs_node_blockptr(*eb_ret, slot);
163562306a36Sopenharmony_ci	gen = btrfs_node_ptr_generation(*eb_ret, slot);
163662306a36Sopenharmony_ci	parent_level = btrfs_header_level(*eb_ret);
163762306a36Sopenharmony_ci	btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot);
163862306a36Sopenharmony_ci	check.has_first_key = true;
163962306a36Sopenharmony_ci	check.level = parent_level - 1;
164062306a36Sopenharmony_ci	check.transid = gen;
164162306a36Sopenharmony_ci	check.owner_root = root->root_key.objectid;
164262306a36Sopenharmony_ci
164362306a36Sopenharmony_ci	/*
164462306a36Sopenharmony_ci	 * If we need to read an extent buffer from disk and we are holding locks
164562306a36Sopenharmony_ci	 * on upper level nodes, we unlock all the upper nodes before reading the
164662306a36Sopenharmony_ci	 * extent buffer, and then return -EAGAIN to the caller as it needs to
164762306a36Sopenharmony_ci	 * restart the search. We don't release the lock on the current level
164862306a36Sopenharmony_ci	 * because we need to walk this node to figure out which blocks to read.
164962306a36Sopenharmony_ci	 */
165062306a36Sopenharmony_ci	tmp = find_extent_buffer(fs_info, blocknr);
165162306a36Sopenharmony_ci	if (tmp) {
165262306a36Sopenharmony_ci		if (p->reada == READA_FORWARD_ALWAYS)
165362306a36Sopenharmony_ci			reada_for_search(fs_info, p, level, slot, key->objectid);
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci		/* first we do an atomic uptodate check */
165662306a36Sopenharmony_ci		if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
165762306a36Sopenharmony_ci			/*
165862306a36Sopenharmony_ci			 * Do extra check for first_key, eb can be stale due to
165962306a36Sopenharmony_ci			 * being cached, read from scrub, or have multiple
166062306a36Sopenharmony_ci			 * parents (shared tree blocks).
166162306a36Sopenharmony_ci			 */
166262306a36Sopenharmony_ci			if (btrfs_verify_level_key(tmp,
166362306a36Sopenharmony_ci					parent_level - 1, &check.first_key, gen)) {
166462306a36Sopenharmony_ci				free_extent_buffer(tmp);
166562306a36Sopenharmony_ci				return -EUCLEAN;
166662306a36Sopenharmony_ci			}
166762306a36Sopenharmony_ci			*eb_ret = tmp;
166862306a36Sopenharmony_ci			return 0;
166962306a36Sopenharmony_ci		}
167062306a36Sopenharmony_ci
167162306a36Sopenharmony_ci		if (p->nowait) {
167262306a36Sopenharmony_ci			free_extent_buffer(tmp);
167362306a36Sopenharmony_ci			return -EAGAIN;
167462306a36Sopenharmony_ci		}
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci		if (unlock_up)
167762306a36Sopenharmony_ci			btrfs_unlock_up_safe(p, level + 1);
167862306a36Sopenharmony_ci
167962306a36Sopenharmony_ci		/* now we're allowed to do a blocking uptodate check */
168062306a36Sopenharmony_ci		ret = btrfs_read_extent_buffer(tmp, &check);
168162306a36Sopenharmony_ci		if (ret) {
168262306a36Sopenharmony_ci			free_extent_buffer(tmp);
168362306a36Sopenharmony_ci			btrfs_release_path(p);
168462306a36Sopenharmony_ci			return -EIO;
168562306a36Sopenharmony_ci		}
168662306a36Sopenharmony_ci		if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) {
168762306a36Sopenharmony_ci			free_extent_buffer(tmp);
168862306a36Sopenharmony_ci			btrfs_release_path(p);
168962306a36Sopenharmony_ci			return -EUCLEAN;
169062306a36Sopenharmony_ci		}
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci		if (unlock_up)
169362306a36Sopenharmony_ci			ret = -EAGAIN;
169462306a36Sopenharmony_ci
169562306a36Sopenharmony_ci		goto out;
169662306a36Sopenharmony_ci	} else if (p->nowait) {
169762306a36Sopenharmony_ci		return -EAGAIN;
169862306a36Sopenharmony_ci	}
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_ci	if (unlock_up) {
170162306a36Sopenharmony_ci		btrfs_unlock_up_safe(p, level + 1);
170262306a36Sopenharmony_ci		ret = -EAGAIN;
170362306a36Sopenharmony_ci	} else {
170462306a36Sopenharmony_ci		ret = 0;
170562306a36Sopenharmony_ci	}
170662306a36Sopenharmony_ci
170762306a36Sopenharmony_ci	if (p->reada != READA_NONE)
170862306a36Sopenharmony_ci		reada_for_search(fs_info, p, level, slot, key->objectid);
170962306a36Sopenharmony_ci
171062306a36Sopenharmony_ci	tmp = read_tree_block(fs_info, blocknr, &check);
171162306a36Sopenharmony_ci	if (IS_ERR(tmp)) {
171262306a36Sopenharmony_ci		btrfs_release_path(p);
171362306a36Sopenharmony_ci		return PTR_ERR(tmp);
171462306a36Sopenharmony_ci	}
171562306a36Sopenharmony_ci	/*
171662306a36Sopenharmony_ci	 * If the read above didn't mark this buffer up to date,
171762306a36Sopenharmony_ci	 * it will never end up being up to date.  Set ret to EIO now
171862306a36Sopenharmony_ci	 * and give up so that our caller doesn't loop forever
171962306a36Sopenharmony_ci	 * on our EAGAINs.
172062306a36Sopenharmony_ci	 */
172162306a36Sopenharmony_ci	if (!extent_buffer_uptodate(tmp))
172262306a36Sopenharmony_ci		ret = -EIO;
172362306a36Sopenharmony_ci
172462306a36Sopenharmony_ciout:
172562306a36Sopenharmony_ci	if (ret == 0) {
172662306a36Sopenharmony_ci		*eb_ret = tmp;
172762306a36Sopenharmony_ci	} else {
172862306a36Sopenharmony_ci		free_extent_buffer(tmp);
172962306a36Sopenharmony_ci		btrfs_release_path(p);
173062306a36Sopenharmony_ci	}
173162306a36Sopenharmony_ci
173262306a36Sopenharmony_ci	return ret;
173362306a36Sopenharmony_ci}
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_ci/*
173662306a36Sopenharmony_ci * helper function for btrfs_search_slot.  This does all of the checks
173762306a36Sopenharmony_ci * for node-level blocks and does any balancing required based on
173862306a36Sopenharmony_ci * the ins_len.
173962306a36Sopenharmony_ci *
174062306a36Sopenharmony_ci * If no extra work was required, zero is returned.  If we had to
174162306a36Sopenharmony_ci * drop the path, -EAGAIN is returned and btrfs_search_slot must
174262306a36Sopenharmony_ci * start over
174362306a36Sopenharmony_ci */
174462306a36Sopenharmony_cistatic int
174562306a36Sopenharmony_cisetup_nodes_for_search(struct btrfs_trans_handle *trans,
174662306a36Sopenharmony_ci		       struct btrfs_root *root, struct btrfs_path *p,
174762306a36Sopenharmony_ci		       struct extent_buffer *b, int level, int ins_len,
174862306a36Sopenharmony_ci		       int *write_lock_level)
174962306a36Sopenharmony_ci{
175062306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
175162306a36Sopenharmony_ci	int ret = 0;
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_ci	if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
175462306a36Sopenharmony_ci	    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci		if (*write_lock_level < level + 1) {
175762306a36Sopenharmony_ci			*write_lock_level = level + 1;
175862306a36Sopenharmony_ci			btrfs_release_path(p);
175962306a36Sopenharmony_ci			return -EAGAIN;
176062306a36Sopenharmony_ci		}
176162306a36Sopenharmony_ci
176262306a36Sopenharmony_ci		reada_for_balance(p, level);
176362306a36Sopenharmony_ci		ret = split_node(trans, root, p, level);
176462306a36Sopenharmony_ci
176562306a36Sopenharmony_ci		b = p->nodes[level];
176662306a36Sopenharmony_ci	} else if (ins_len < 0 && btrfs_header_nritems(b) <
176762306a36Sopenharmony_ci		   BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
176862306a36Sopenharmony_ci
176962306a36Sopenharmony_ci		if (*write_lock_level < level + 1) {
177062306a36Sopenharmony_ci			*write_lock_level = level + 1;
177162306a36Sopenharmony_ci			btrfs_release_path(p);
177262306a36Sopenharmony_ci			return -EAGAIN;
177362306a36Sopenharmony_ci		}
177462306a36Sopenharmony_ci
177562306a36Sopenharmony_ci		reada_for_balance(p, level);
177662306a36Sopenharmony_ci		ret = balance_level(trans, root, p, level);
177762306a36Sopenharmony_ci		if (ret)
177862306a36Sopenharmony_ci			return ret;
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci		b = p->nodes[level];
178162306a36Sopenharmony_ci		if (!b) {
178262306a36Sopenharmony_ci			btrfs_release_path(p);
178362306a36Sopenharmony_ci			return -EAGAIN;
178462306a36Sopenharmony_ci		}
178562306a36Sopenharmony_ci		BUG_ON(btrfs_header_nritems(b) == 1);
178662306a36Sopenharmony_ci	}
178762306a36Sopenharmony_ci	return ret;
178862306a36Sopenharmony_ci}
178962306a36Sopenharmony_ci
179062306a36Sopenharmony_ciint btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
179162306a36Sopenharmony_ci		u64 iobjectid, u64 ioff, u8 key_type,
179262306a36Sopenharmony_ci		struct btrfs_key *found_key)
179362306a36Sopenharmony_ci{
179462306a36Sopenharmony_ci	int ret;
179562306a36Sopenharmony_ci	struct btrfs_key key;
179662306a36Sopenharmony_ci	struct extent_buffer *eb;
179762306a36Sopenharmony_ci
179862306a36Sopenharmony_ci	ASSERT(path);
179962306a36Sopenharmony_ci	ASSERT(found_key);
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	key.type = key_type;
180262306a36Sopenharmony_ci	key.objectid = iobjectid;
180362306a36Sopenharmony_ci	key.offset = ioff;
180462306a36Sopenharmony_ci
180562306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
180662306a36Sopenharmony_ci	if (ret < 0)
180762306a36Sopenharmony_ci		return ret;
180862306a36Sopenharmony_ci
180962306a36Sopenharmony_ci	eb = path->nodes[0];
181062306a36Sopenharmony_ci	if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
181162306a36Sopenharmony_ci		ret = btrfs_next_leaf(fs_root, path);
181262306a36Sopenharmony_ci		if (ret)
181362306a36Sopenharmony_ci			return ret;
181462306a36Sopenharmony_ci		eb = path->nodes[0];
181562306a36Sopenharmony_ci	}
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_ci	btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
181862306a36Sopenharmony_ci	if (found_key->type != key.type ||
181962306a36Sopenharmony_ci			found_key->objectid != key.objectid)
182062306a36Sopenharmony_ci		return 1;
182162306a36Sopenharmony_ci
182262306a36Sopenharmony_ci	return 0;
182362306a36Sopenharmony_ci}
182462306a36Sopenharmony_ci
182562306a36Sopenharmony_cistatic struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
182662306a36Sopenharmony_ci							struct btrfs_path *p,
182762306a36Sopenharmony_ci							int write_lock_level)
182862306a36Sopenharmony_ci{
182962306a36Sopenharmony_ci	struct extent_buffer *b;
183062306a36Sopenharmony_ci	int root_lock = 0;
183162306a36Sopenharmony_ci	int level = 0;
183262306a36Sopenharmony_ci
183362306a36Sopenharmony_ci	if (p->search_commit_root) {
183462306a36Sopenharmony_ci		b = root->commit_root;
183562306a36Sopenharmony_ci		atomic_inc(&b->refs);
183662306a36Sopenharmony_ci		level = btrfs_header_level(b);
183762306a36Sopenharmony_ci		/*
183862306a36Sopenharmony_ci		 * Ensure that all callers have set skip_locking when
183962306a36Sopenharmony_ci		 * p->search_commit_root = 1.
184062306a36Sopenharmony_ci		 */
184162306a36Sopenharmony_ci		ASSERT(p->skip_locking == 1);
184262306a36Sopenharmony_ci
184362306a36Sopenharmony_ci		goto out;
184462306a36Sopenharmony_ci	}
184562306a36Sopenharmony_ci
184662306a36Sopenharmony_ci	if (p->skip_locking) {
184762306a36Sopenharmony_ci		b = btrfs_root_node(root);
184862306a36Sopenharmony_ci		level = btrfs_header_level(b);
184962306a36Sopenharmony_ci		goto out;
185062306a36Sopenharmony_ci	}
185162306a36Sopenharmony_ci
185262306a36Sopenharmony_ci	/* We try very hard to do read locks on the root */
185362306a36Sopenharmony_ci	root_lock = BTRFS_READ_LOCK;
185462306a36Sopenharmony_ci
185562306a36Sopenharmony_ci	/*
185662306a36Sopenharmony_ci	 * If the level is set to maximum, we can skip trying to get the read
185762306a36Sopenharmony_ci	 * lock.
185862306a36Sopenharmony_ci	 */
185962306a36Sopenharmony_ci	if (write_lock_level < BTRFS_MAX_LEVEL) {
186062306a36Sopenharmony_ci		/*
186162306a36Sopenharmony_ci		 * We don't know the level of the root node until we actually
186262306a36Sopenharmony_ci		 * have it read locked
186362306a36Sopenharmony_ci		 */
186462306a36Sopenharmony_ci		if (p->nowait) {
186562306a36Sopenharmony_ci			b = btrfs_try_read_lock_root_node(root);
186662306a36Sopenharmony_ci			if (IS_ERR(b))
186762306a36Sopenharmony_ci				return b;
186862306a36Sopenharmony_ci		} else {
186962306a36Sopenharmony_ci			b = btrfs_read_lock_root_node(root);
187062306a36Sopenharmony_ci		}
187162306a36Sopenharmony_ci		level = btrfs_header_level(b);
187262306a36Sopenharmony_ci		if (level > write_lock_level)
187362306a36Sopenharmony_ci			goto out;
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci		/* Whoops, must trade for write lock */
187662306a36Sopenharmony_ci		btrfs_tree_read_unlock(b);
187762306a36Sopenharmony_ci		free_extent_buffer(b);
187862306a36Sopenharmony_ci	}
187962306a36Sopenharmony_ci
188062306a36Sopenharmony_ci	b = btrfs_lock_root_node(root);
188162306a36Sopenharmony_ci	root_lock = BTRFS_WRITE_LOCK;
188262306a36Sopenharmony_ci
188362306a36Sopenharmony_ci	/* The level might have changed, check again */
188462306a36Sopenharmony_ci	level = btrfs_header_level(b);
188562306a36Sopenharmony_ci
188662306a36Sopenharmony_ciout:
188762306a36Sopenharmony_ci	/*
188862306a36Sopenharmony_ci	 * The root may have failed to write out at some point, and thus is no
188962306a36Sopenharmony_ci	 * longer valid, return an error in this case.
189062306a36Sopenharmony_ci	 */
189162306a36Sopenharmony_ci	if (!extent_buffer_uptodate(b)) {
189262306a36Sopenharmony_ci		if (root_lock)
189362306a36Sopenharmony_ci			btrfs_tree_unlock_rw(b, root_lock);
189462306a36Sopenharmony_ci		free_extent_buffer(b);
189562306a36Sopenharmony_ci		return ERR_PTR(-EIO);
189662306a36Sopenharmony_ci	}
189762306a36Sopenharmony_ci
189862306a36Sopenharmony_ci	p->nodes[level] = b;
189962306a36Sopenharmony_ci	if (!p->skip_locking)
190062306a36Sopenharmony_ci		p->locks[level] = root_lock;
190162306a36Sopenharmony_ci	/*
190262306a36Sopenharmony_ci	 * Callers are responsible for dropping b's references.
190362306a36Sopenharmony_ci	 */
190462306a36Sopenharmony_ci	return b;
190562306a36Sopenharmony_ci}
190662306a36Sopenharmony_ci
190762306a36Sopenharmony_ci/*
190862306a36Sopenharmony_ci * Replace the extent buffer at the lowest level of the path with a cloned
190962306a36Sopenharmony_ci * version. The purpose is to be able to use it safely, after releasing the
191062306a36Sopenharmony_ci * commit root semaphore, even if relocation is happening in parallel, the
191162306a36Sopenharmony_ci * transaction used for relocation is committed and the extent buffer is
191262306a36Sopenharmony_ci * reallocated in the next transaction.
191362306a36Sopenharmony_ci *
191462306a36Sopenharmony_ci * This is used in a context where the caller does not prevent transaction
191562306a36Sopenharmony_ci * commits from happening, either by holding a transaction handle or holding
191662306a36Sopenharmony_ci * some lock, while it's doing searches through a commit root.
191762306a36Sopenharmony_ci * At the moment it's only used for send operations.
191862306a36Sopenharmony_ci */
191962306a36Sopenharmony_cistatic int finish_need_commit_sem_search(struct btrfs_path *path)
192062306a36Sopenharmony_ci{
192162306a36Sopenharmony_ci	const int i = path->lowest_level;
192262306a36Sopenharmony_ci	const int slot = path->slots[i];
192362306a36Sopenharmony_ci	struct extent_buffer *lowest = path->nodes[i];
192462306a36Sopenharmony_ci	struct extent_buffer *clone;
192562306a36Sopenharmony_ci
192662306a36Sopenharmony_ci	ASSERT(path->need_commit_sem);
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	if (!lowest)
192962306a36Sopenharmony_ci		return 0;
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_ci	lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
193262306a36Sopenharmony_ci
193362306a36Sopenharmony_ci	clone = btrfs_clone_extent_buffer(lowest);
193462306a36Sopenharmony_ci	if (!clone)
193562306a36Sopenharmony_ci		return -ENOMEM;
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci	btrfs_release_path(path);
193862306a36Sopenharmony_ci	path->nodes[i] = clone;
193962306a36Sopenharmony_ci	path->slots[i] = slot;
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	return 0;
194262306a36Sopenharmony_ci}
194362306a36Sopenharmony_ci
194462306a36Sopenharmony_cistatic inline int search_for_key_slot(struct extent_buffer *eb,
194562306a36Sopenharmony_ci				      int search_low_slot,
194662306a36Sopenharmony_ci				      const struct btrfs_key *key,
194762306a36Sopenharmony_ci				      int prev_cmp,
194862306a36Sopenharmony_ci				      int *slot)
194962306a36Sopenharmony_ci{
195062306a36Sopenharmony_ci	/*
195162306a36Sopenharmony_ci	 * If a previous call to btrfs_bin_search() on a parent node returned an
195262306a36Sopenharmony_ci	 * exact match (prev_cmp == 0), we can safely assume the target key will
195362306a36Sopenharmony_ci	 * always be at slot 0 on lower levels, since each key pointer
195462306a36Sopenharmony_ci	 * (struct btrfs_key_ptr) refers to the lowest key accessible from the
195562306a36Sopenharmony_ci	 * subtree it points to. Thus we can skip searching lower levels.
195662306a36Sopenharmony_ci	 */
195762306a36Sopenharmony_ci	if (prev_cmp == 0) {
195862306a36Sopenharmony_ci		*slot = 0;
195962306a36Sopenharmony_ci		return 0;
196062306a36Sopenharmony_ci	}
196162306a36Sopenharmony_ci
196262306a36Sopenharmony_ci	return btrfs_bin_search(eb, search_low_slot, key, slot);
196362306a36Sopenharmony_ci}
196462306a36Sopenharmony_ci
196562306a36Sopenharmony_cistatic int search_leaf(struct btrfs_trans_handle *trans,
196662306a36Sopenharmony_ci		       struct btrfs_root *root,
196762306a36Sopenharmony_ci		       const struct btrfs_key *key,
196862306a36Sopenharmony_ci		       struct btrfs_path *path,
196962306a36Sopenharmony_ci		       int ins_len,
197062306a36Sopenharmony_ci		       int prev_cmp)
197162306a36Sopenharmony_ci{
197262306a36Sopenharmony_ci	struct extent_buffer *leaf = path->nodes[0];
197362306a36Sopenharmony_ci	int leaf_free_space = -1;
197462306a36Sopenharmony_ci	int search_low_slot = 0;
197562306a36Sopenharmony_ci	int ret;
197662306a36Sopenharmony_ci	bool do_bin_search = true;
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_ci	/*
197962306a36Sopenharmony_ci	 * If we are doing an insertion, the leaf has enough free space and the
198062306a36Sopenharmony_ci	 * destination slot for the key is not slot 0, then we can unlock our
198162306a36Sopenharmony_ci	 * write lock on the parent, and any other upper nodes, before doing the
198262306a36Sopenharmony_ci	 * binary search on the leaf (with search_for_key_slot()), allowing other
198362306a36Sopenharmony_ci	 * tasks to lock the parent and any other upper nodes.
198462306a36Sopenharmony_ci	 */
198562306a36Sopenharmony_ci	if (ins_len > 0) {
198662306a36Sopenharmony_ci		/*
198762306a36Sopenharmony_ci		 * Cache the leaf free space, since we will need it later and it
198862306a36Sopenharmony_ci		 * will not change until then.
198962306a36Sopenharmony_ci		 */
199062306a36Sopenharmony_ci		leaf_free_space = btrfs_leaf_free_space(leaf);
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci		/*
199362306a36Sopenharmony_ci		 * !path->locks[1] means we have a single node tree, the leaf is
199462306a36Sopenharmony_ci		 * the root of the tree.
199562306a36Sopenharmony_ci		 */
199662306a36Sopenharmony_ci		if (path->locks[1] && leaf_free_space >= ins_len) {
199762306a36Sopenharmony_ci			struct btrfs_disk_key first_key;
199862306a36Sopenharmony_ci
199962306a36Sopenharmony_ci			ASSERT(btrfs_header_nritems(leaf) > 0);
200062306a36Sopenharmony_ci			btrfs_item_key(leaf, &first_key, 0);
200162306a36Sopenharmony_ci
200262306a36Sopenharmony_ci			/*
200362306a36Sopenharmony_ci			 * Doing the extra comparison with the first key is cheap,
200462306a36Sopenharmony_ci			 * taking into account that the first key is very likely
200562306a36Sopenharmony_ci			 * already in a cache line because it immediately follows
200662306a36Sopenharmony_ci			 * the extent buffer's header and we have recently accessed
200762306a36Sopenharmony_ci			 * the header's level field.
200862306a36Sopenharmony_ci			 */
200962306a36Sopenharmony_ci			ret = comp_keys(&first_key, key);
201062306a36Sopenharmony_ci			if (ret < 0) {
201162306a36Sopenharmony_ci				/*
201262306a36Sopenharmony_ci				 * The first key is smaller than the key we want
201362306a36Sopenharmony_ci				 * to insert, so we are safe to unlock all upper
201462306a36Sopenharmony_ci				 * nodes and we have to do the binary search.
201562306a36Sopenharmony_ci				 *
201662306a36Sopenharmony_ci				 * We do use btrfs_unlock_up_safe() and not
201762306a36Sopenharmony_ci				 * unlock_up() because the later does not unlock
201862306a36Sopenharmony_ci				 * nodes with a slot of 0 - we can safely unlock
201962306a36Sopenharmony_ci				 * any node even if its slot is 0 since in this
202062306a36Sopenharmony_ci				 * case the key does not end up at slot 0 of the
202162306a36Sopenharmony_ci				 * leaf and there's no need to split the leaf.
202262306a36Sopenharmony_ci				 */
202362306a36Sopenharmony_ci				btrfs_unlock_up_safe(path, 1);
202462306a36Sopenharmony_ci				search_low_slot = 1;
202562306a36Sopenharmony_ci			} else {
202662306a36Sopenharmony_ci				/*
202762306a36Sopenharmony_ci				 * The first key is >= then the key we want to
202862306a36Sopenharmony_ci				 * insert, so we can skip the binary search as
202962306a36Sopenharmony_ci				 * the target key will be at slot 0.
203062306a36Sopenharmony_ci				 *
203162306a36Sopenharmony_ci				 * We can not unlock upper nodes when the key is
203262306a36Sopenharmony_ci				 * less than the first key, because we will need
203362306a36Sopenharmony_ci				 * to update the key at slot 0 of the parent node
203462306a36Sopenharmony_ci				 * and possibly of other upper nodes too.
203562306a36Sopenharmony_ci				 * If the key matches the first key, then we can
203662306a36Sopenharmony_ci				 * unlock all the upper nodes, using
203762306a36Sopenharmony_ci				 * btrfs_unlock_up_safe() instead of unlock_up()
203862306a36Sopenharmony_ci				 * as stated above.
203962306a36Sopenharmony_ci				 */
204062306a36Sopenharmony_ci				if (ret == 0)
204162306a36Sopenharmony_ci					btrfs_unlock_up_safe(path, 1);
204262306a36Sopenharmony_ci				/*
204362306a36Sopenharmony_ci				 * ret is already 0 or 1, matching the result of
204462306a36Sopenharmony_ci				 * a btrfs_bin_search() call, so there is no need
204562306a36Sopenharmony_ci				 * to adjust it.
204662306a36Sopenharmony_ci				 */
204762306a36Sopenharmony_ci				do_bin_search = false;
204862306a36Sopenharmony_ci				path->slots[0] = 0;
204962306a36Sopenharmony_ci			}
205062306a36Sopenharmony_ci		}
205162306a36Sopenharmony_ci	}
205262306a36Sopenharmony_ci
205362306a36Sopenharmony_ci	if (do_bin_search) {
205462306a36Sopenharmony_ci		ret = search_for_key_slot(leaf, search_low_slot, key,
205562306a36Sopenharmony_ci					  prev_cmp, &path->slots[0]);
205662306a36Sopenharmony_ci		if (ret < 0)
205762306a36Sopenharmony_ci			return ret;
205862306a36Sopenharmony_ci	}
205962306a36Sopenharmony_ci
206062306a36Sopenharmony_ci	if (ins_len > 0) {
206162306a36Sopenharmony_ci		/*
206262306a36Sopenharmony_ci		 * Item key already exists. In this case, if we are allowed to
206362306a36Sopenharmony_ci		 * insert the item (for example, in dir_item case, item key
206462306a36Sopenharmony_ci		 * collision is allowed), it will be merged with the original
206562306a36Sopenharmony_ci		 * item. Only the item size grows, no new btrfs item will be
206662306a36Sopenharmony_ci		 * added. If search_for_extension is not set, ins_len already
206762306a36Sopenharmony_ci		 * accounts the size btrfs_item, deduct it here so leaf space
206862306a36Sopenharmony_ci		 * check will be correct.
206962306a36Sopenharmony_ci		 */
207062306a36Sopenharmony_ci		if (ret == 0 && !path->search_for_extension) {
207162306a36Sopenharmony_ci			ASSERT(ins_len >= sizeof(struct btrfs_item));
207262306a36Sopenharmony_ci			ins_len -= sizeof(struct btrfs_item);
207362306a36Sopenharmony_ci		}
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_ci		ASSERT(leaf_free_space >= 0);
207662306a36Sopenharmony_ci
207762306a36Sopenharmony_ci		if (leaf_free_space < ins_len) {
207862306a36Sopenharmony_ci			int err;
207962306a36Sopenharmony_ci
208062306a36Sopenharmony_ci			err = split_leaf(trans, root, key, path, ins_len,
208162306a36Sopenharmony_ci					 (ret == 0));
208262306a36Sopenharmony_ci			ASSERT(err <= 0);
208362306a36Sopenharmony_ci			if (WARN_ON(err > 0))
208462306a36Sopenharmony_ci				err = -EUCLEAN;
208562306a36Sopenharmony_ci			if (err)
208662306a36Sopenharmony_ci				ret = err;
208762306a36Sopenharmony_ci		}
208862306a36Sopenharmony_ci	}
208962306a36Sopenharmony_ci
209062306a36Sopenharmony_ci	return ret;
209162306a36Sopenharmony_ci}
209262306a36Sopenharmony_ci
209362306a36Sopenharmony_ci/*
209462306a36Sopenharmony_ci * btrfs_search_slot - look for a key in a tree and perform necessary
209562306a36Sopenharmony_ci * modifications to preserve tree invariants.
209662306a36Sopenharmony_ci *
209762306a36Sopenharmony_ci * @trans:	Handle of transaction, used when modifying the tree
209862306a36Sopenharmony_ci * @p:		Holds all btree nodes along the search path
209962306a36Sopenharmony_ci * @root:	The root node of the tree
210062306a36Sopenharmony_ci * @key:	The key we are looking for
210162306a36Sopenharmony_ci * @ins_len:	Indicates purpose of search:
210262306a36Sopenharmony_ci *              >0  for inserts it's size of item inserted (*)
210362306a36Sopenharmony_ci *              <0  for deletions
210462306a36Sopenharmony_ci *               0  for plain searches, not modifying the tree
210562306a36Sopenharmony_ci *
210662306a36Sopenharmony_ci *              (*) If size of item inserted doesn't include
210762306a36Sopenharmony_ci *              sizeof(struct btrfs_item), then p->search_for_extension must
210862306a36Sopenharmony_ci *              be set.
210962306a36Sopenharmony_ci * @cow:	boolean should CoW operations be performed. Must always be 1
211062306a36Sopenharmony_ci *		when modifying the tree.
211162306a36Sopenharmony_ci *
211262306a36Sopenharmony_ci * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
211362306a36Sopenharmony_ci * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
211462306a36Sopenharmony_ci *
211562306a36Sopenharmony_ci * If @key is found, 0 is returned and you can find the item in the leaf level
211662306a36Sopenharmony_ci * of the path (level 0)
211762306a36Sopenharmony_ci *
211862306a36Sopenharmony_ci * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
211962306a36Sopenharmony_ci * points to the slot where it should be inserted
212062306a36Sopenharmony_ci *
212162306a36Sopenharmony_ci * If an error is encountered while searching the tree a negative error number
212262306a36Sopenharmony_ci * is returned
212362306a36Sopenharmony_ci */
212462306a36Sopenharmony_ciint btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
212562306a36Sopenharmony_ci		      const struct btrfs_key *key, struct btrfs_path *p,
212662306a36Sopenharmony_ci		      int ins_len, int cow)
212762306a36Sopenharmony_ci{
212862306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
212962306a36Sopenharmony_ci	struct extent_buffer *b;
213062306a36Sopenharmony_ci	int slot;
213162306a36Sopenharmony_ci	int ret;
213262306a36Sopenharmony_ci	int err;
213362306a36Sopenharmony_ci	int level;
213462306a36Sopenharmony_ci	int lowest_unlock = 1;
213562306a36Sopenharmony_ci	/* everything at write_lock_level or lower must be write locked */
213662306a36Sopenharmony_ci	int write_lock_level = 0;
213762306a36Sopenharmony_ci	u8 lowest_level = 0;
213862306a36Sopenharmony_ci	int min_write_lock_level;
213962306a36Sopenharmony_ci	int prev_cmp;
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci	might_sleep();
214262306a36Sopenharmony_ci
214362306a36Sopenharmony_ci	lowest_level = p->lowest_level;
214462306a36Sopenharmony_ci	WARN_ON(lowest_level && ins_len > 0);
214562306a36Sopenharmony_ci	WARN_ON(p->nodes[0] != NULL);
214662306a36Sopenharmony_ci	BUG_ON(!cow && ins_len);
214762306a36Sopenharmony_ci
214862306a36Sopenharmony_ci	/*
214962306a36Sopenharmony_ci	 * For now only allow nowait for read only operations.  There's no
215062306a36Sopenharmony_ci	 * strict reason why we can't, we just only need it for reads so it's
215162306a36Sopenharmony_ci	 * only implemented for reads.
215262306a36Sopenharmony_ci	 */
215362306a36Sopenharmony_ci	ASSERT(!p->nowait || !cow);
215462306a36Sopenharmony_ci
215562306a36Sopenharmony_ci	if (ins_len < 0) {
215662306a36Sopenharmony_ci		lowest_unlock = 2;
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci		/* when we are removing items, we might have to go up to level
215962306a36Sopenharmony_ci		 * two as we update tree pointers  Make sure we keep write
216062306a36Sopenharmony_ci		 * for those levels as well
216162306a36Sopenharmony_ci		 */
216262306a36Sopenharmony_ci		write_lock_level = 2;
216362306a36Sopenharmony_ci	} else if (ins_len > 0) {
216462306a36Sopenharmony_ci		/*
216562306a36Sopenharmony_ci		 * for inserting items, make sure we have a write lock on
216662306a36Sopenharmony_ci		 * level 1 so we can update keys
216762306a36Sopenharmony_ci		 */
216862306a36Sopenharmony_ci		write_lock_level = 1;
216962306a36Sopenharmony_ci	}
217062306a36Sopenharmony_ci
217162306a36Sopenharmony_ci	if (!cow)
217262306a36Sopenharmony_ci		write_lock_level = -1;
217362306a36Sopenharmony_ci
217462306a36Sopenharmony_ci	if (cow && (p->keep_locks || p->lowest_level))
217562306a36Sopenharmony_ci		write_lock_level = BTRFS_MAX_LEVEL;
217662306a36Sopenharmony_ci
217762306a36Sopenharmony_ci	min_write_lock_level = write_lock_level;
217862306a36Sopenharmony_ci
217962306a36Sopenharmony_ci	if (p->need_commit_sem) {
218062306a36Sopenharmony_ci		ASSERT(p->search_commit_root);
218162306a36Sopenharmony_ci		if (p->nowait) {
218262306a36Sopenharmony_ci			if (!down_read_trylock(&fs_info->commit_root_sem))
218362306a36Sopenharmony_ci				return -EAGAIN;
218462306a36Sopenharmony_ci		} else {
218562306a36Sopenharmony_ci			down_read(&fs_info->commit_root_sem);
218662306a36Sopenharmony_ci		}
218762306a36Sopenharmony_ci	}
218862306a36Sopenharmony_ci
218962306a36Sopenharmony_ciagain:
219062306a36Sopenharmony_ci	prev_cmp = -1;
219162306a36Sopenharmony_ci	b = btrfs_search_slot_get_root(root, p, write_lock_level);
219262306a36Sopenharmony_ci	if (IS_ERR(b)) {
219362306a36Sopenharmony_ci		ret = PTR_ERR(b);
219462306a36Sopenharmony_ci		goto done;
219562306a36Sopenharmony_ci	}
219662306a36Sopenharmony_ci
219762306a36Sopenharmony_ci	while (b) {
219862306a36Sopenharmony_ci		int dec = 0;
219962306a36Sopenharmony_ci
220062306a36Sopenharmony_ci		level = btrfs_header_level(b);
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci		if (cow) {
220362306a36Sopenharmony_ci			bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
220462306a36Sopenharmony_ci
220562306a36Sopenharmony_ci			/*
220662306a36Sopenharmony_ci			 * if we don't really need to cow this block
220762306a36Sopenharmony_ci			 * then we don't want to set the path blocking,
220862306a36Sopenharmony_ci			 * so we test it here
220962306a36Sopenharmony_ci			 */
221062306a36Sopenharmony_ci			if (!should_cow_block(trans, root, b))
221162306a36Sopenharmony_ci				goto cow_done;
221262306a36Sopenharmony_ci
221362306a36Sopenharmony_ci			/*
221462306a36Sopenharmony_ci			 * must have write locks on this node and the
221562306a36Sopenharmony_ci			 * parent
221662306a36Sopenharmony_ci			 */
221762306a36Sopenharmony_ci			if (level > write_lock_level ||
221862306a36Sopenharmony_ci			    (level + 1 > write_lock_level &&
221962306a36Sopenharmony_ci			    level + 1 < BTRFS_MAX_LEVEL &&
222062306a36Sopenharmony_ci			    p->nodes[level + 1])) {
222162306a36Sopenharmony_ci				write_lock_level = level + 1;
222262306a36Sopenharmony_ci				btrfs_release_path(p);
222362306a36Sopenharmony_ci				goto again;
222462306a36Sopenharmony_ci			}
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci			if (last_level)
222762306a36Sopenharmony_ci				err = btrfs_cow_block(trans, root, b, NULL, 0,
222862306a36Sopenharmony_ci						      &b,
222962306a36Sopenharmony_ci						      BTRFS_NESTING_COW);
223062306a36Sopenharmony_ci			else
223162306a36Sopenharmony_ci				err = btrfs_cow_block(trans, root, b,
223262306a36Sopenharmony_ci						      p->nodes[level + 1],
223362306a36Sopenharmony_ci						      p->slots[level + 1], &b,
223462306a36Sopenharmony_ci						      BTRFS_NESTING_COW);
223562306a36Sopenharmony_ci			if (err) {
223662306a36Sopenharmony_ci				ret = err;
223762306a36Sopenharmony_ci				goto done;
223862306a36Sopenharmony_ci			}
223962306a36Sopenharmony_ci		}
224062306a36Sopenharmony_cicow_done:
224162306a36Sopenharmony_ci		p->nodes[level] = b;
224262306a36Sopenharmony_ci
224362306a36Sopenharmony_ci		/*
224462306a36Sopenharmony_ci		 * we have a lock on b and as long as we aren't changing
224562306a36Sopenharmony_ci		 * the tree, there is no way to for the items in b to change.
224662306a36Sopenharmony_ci		 * It is safe to drop the lock on our parent before we
224762306a36Sopenharmony_ci		 * go through the expensive btree search on b.
224862306a36Sopenharmony_ci		 *
224962306a36Sopenharmony_ci		 * If we're inserting or deleting (ins_len != 0), then we might
225062306a36Sopenharmony_ci		 * be changing slot zero, which may require changing the parent.
225162306a36Sopenharmony_ci		 * So, we can't drop the lock until after we know which slot
225262306a36Sopenharmony_ci		 * we're operating on.
225362306a36Sopenharmony_ci		 */
225462306a36Sopenharmony_ci		if (!ins_len && !p->keep_locks) {
225562306a36Sopenharmony_ci			int u = level + 1;
225662306a36Sopenharmony_ci
225762306a36Sopenharmony_ci			if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
225862306a36Sopenharmony_ci				btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
225962306a36Sopenharmony_ci				p->locks[u] = 0;
226062306a36Sopenharmony_ci			}
226162306a36Sopenharmony_ci		}
226262306a36Sopenharmony_ci
226362306a36Sopenharmony_ci		if (level == 0) {
226462306a36Sopenharmony_ci			if (ins_len > 0)
226562306a36Sopenharmony_ci				ASSERT(write_lock_level >= 1);
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_ci			ret = search_leaf(trans, root, key, p, ins_len, prev_cmp);
226862306a36Sopenharmony_ci			if (!p->search_for_split)
226962306a36Sopenharmony_ci				unlock_up(p, level, lowest_unlock,
227062306a36Sopenharmony_ci					  min_write_lock_level, NULL);
227162306a36Sopenharmony_ci			goto done;
227262306a36Sopenharmony_ci		}
227362306a36Sopenharmony_ci
227462306a36Sopenharmony_ci		ret = search_for_key_slot(b, 0, key, prev_cmp, &slot);
227562306a36Sopenharmony_ci		if (ret < 0)
227662306a36Sopenharmony_ci			goto done;
227762306a36Sopenharmony_ci		prev_cmp = ret;
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci		if (ret && slot > 0) {
228062306a36Sopenharmony_ci			dec = 1;
228162306a36Sopenharmony_ci			slot--;
228262306a36Sopenharmony_ci		}
228362306a36Sopenharmony_ci		p->slots[level] = slot;
228462306a36Sopenharmony_ci		err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
228562306a36Sopenharmony_ci					     &write_lock_level);
228662306a36Sopenharmony_ci		if (err == -EAGAIN)
228762306a36Sopenharmony_ci			goto again;
228862306a36Sopenharmony_ci		if (err) {
228962306a36Sopenharmony_ci			ret = err;
229062306a36Sopenharmony_ci			goto done;
229162306a36Sopenharmony_ci		}
229262306a36Sopenharmony_ci		b = p->nodes[level];
229362306a36Sopenharmony_ci		slot = p->slots[level];
229462306a36Sopenharmony_ci
229562306a36Sopenharmony_ci		/*
229662306a36Sopenharmony_ci		 * Slot 0 is special, if we change the key we have to update
229762306a36Sopenharmony_ci		 * the parent pointer which means we must have a write lock on
229862306a36Sopenharmony_ci		 * the parent
229962306a36Sopenharmony_ci		 */
230062306a36Sopenharmony_ci		if (slot == 0 && ins_len && write_lock_level < level + 1) {
230162306a36Sopenharmony_ci			write_lock_level = level + 1;
230262306a36Sopenharmony_ci			btrfs_release_path(p);
230362306a36Sopenharmony_ci			goto again;
230462306a36Sopenharmony_ci		}
230562306a36Sopenharmony_ci
230662306a36Sopenharmony_ci		unlock_up(p, level, lowest_unlock, min_write_lock_level,
230762306a36Sopenharmony_ci			  &write_lock_level);
230862306a36Sopenharmony_ci
230962306a36Sopenharmony_ci		if (level == lowest_level) {
231062306a36Sopenharmony_ci			if (dec)
231162306a36Sopenharmony_ci				p->slots[level]++;
231262306a36Sopenharmony_ci			goto done;
231362306a36Sopenharmony_ci		}
231462306a36Sopenharmony_ci
231562306a36Sopenharmony_ci		err = read_block_for_search(root, p, &b, level, slot, key);
231662306a36Sopenharmony_ci		if (err == -EAGAIN)
231762306a36Sopenharmony_ci			goto again;
231862306a36Sopenharmony_ci		if (err) {
231962306a36Sopenharmony_ci			ret = err;
232062306a36Sopenharmony_ci			goto done;
232162306a36Sopenharmony_ci		}
232262306a36Sopenharmony_ci
232362306a36Sopenharmony_ci		if (!p->skip_locking) {
232462306a36Sopenharmony_ci			level = btrfs_header_level(b);
232562306a36Sopenharmony_ci
232662306a36Sopenharmony_ci			btrfs_maybe_reset_lockdep_class(root, b);
232762306a36Sopenharmony_ci
232862306a36Sopenharmony_ci			if (level <= write_lock_level) {
232962306a36Sopenharmony_ci				btrfs_tree_lock(b);
233062306a36Sopenharmony_ci				p->locks[level] = BTRFS_WRITE_LOCK;
233162306a36Sopenharmony_ci			} else {
233262306a36Sopenharmony_ci				if (p->nowait) {
233362306a36Sopenharmony_ci					if (!btrfs_try_tree_read_lock(b)) {
233462306a36Sopenharmony_ci						free_extent_buffer(b);
233562306a36Sopenharmony_ci						ret = -EAGAIN;
233662306a36Sopenharmony_ci						goto done;
233762306a36Sopenharmony_ci					}
233862306a36Sopenharmony_ci				} else {
233962306a36Sopenharmony_ci					btrfs_tree_read_lock(b);
234062306a36Sopenharmony_ci				}
234162306a36Sopenharmony_ci				p->locks[level] = BTRFS_READ_LOCK;
234262306a36Sopenharmony_ci			}
234362306a36Sopenharmony_ci			p->nodes[level] = b;
234462306a36Sopenharmony_ci		}
234562306a36Sopenharmony_ci	}
234662306a36Sopenharmony_ci	ret = 1;
234762306a36Sopenharmony_cidone:
234862306a36Sopenharmony_ci	if (ret < 0 && !p->skip_release_on_error)
234962306a36Sopenharmony_ci		btrfs_release_path(p);
235062306a36Sopenharmony_ci
235162306a36Sopenharmony_ci	if (p->need_commit_sem) {
235262306a36Sopenharmony_ci		int ret2;
235362306a36Sopenharmony_ci
235462306a36Sopenharmony_ci		ret2 = finish_need_commit_sem_search(p);
235562306a36Sopenharmony_ci		up_read(&fs_info->commit_root_sem);
235662306a36Sopenharmony_ci		if (ret2)
235762306a36Sopenharmony_ci			ret = ret2;
235862306a36Sopenharmony_ci	}
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci	return ret;
236162306a36Sopenharmony_ci}
236262306a36Sopenharmony_ciALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
236362306a36Sopenharmony_ci
236462306a36Sopenharmony_ci/*
236562306a36Sopenharmony_ci * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
236662306a36Sopenharmony_ci * current state of the tree together with the operations recorded in the tree
236762306a36Sopenharmony_ci * modification log to search for the key in a previous version of this tree, as
236862306a36Sopenharmony_ci * denoted by the time_seq parameter.
236962306a36Sopenharmony_ci *
237062306a36Sopenharmony_ci * Naturally, there is no support for insert, delete or cow operations.
237162306a36Sopenharmony_ci *
237262306a36Sopenharmony_ci * The resulting path and return value will be set up as if we called
237362306a36Sopenharmony_ci * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
237462306a36Sopenharmony_ci */
237562306a36Sopenharmony_ciint btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
237662306a36Sopenharmony_ci			  struct btrfs_path *p, u64 time_seq)
237762306a36Sopenharmony_ci{
237862306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
237962306a36Sopenharmony_ci	struct extent_buffer *b;
238062306a36Sopenharmony_ci	int slot;
238162306a36Sopenharmony_ci	int ret;
238262306a36Sopenharmony_ci	int err;
238362306a36Sopenharmony_ci	int level;
238462306a36Sopenharmony_ci	int lowest_unlock = 1;
238562306a36Sopenharmony_ci	u8 lowest_level = 0;
238662306a36Sopenharmony_ci
238762306a36Sopenharmony_ci	lowest_level = p->lowest_level;
238862306a36Sopenharmony_ci	WARN_ON(p->nodes[0] != NULL);
238962306a36Sopenharmony_ci	ASSERT(!p->nowait);
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_ci	if (p->search_commit_root) {
239262306a36Sopenharmony_ci		BUG_ON(time_seq);
239362306a36Sopenharmony_ci		return btrfs_search_slot(NULL, root, key, p, 0, 0);
239462306a36Sopenharmony_ci	}
239562306a36Sopenharmony_ci
239662306a36Sopenharmony_ciagain:
239762306a36Sopenharmony_ci	b = btrfs_get_old_root(root, time_seq);
239862306a36Sopenharmony_ci	if (!b) {
239962306a36Sopenharmony_ci		ret = -EIO;
240062306a36Sopenharmony_ci		goto done;
240162306a36Sopenharmony_ci	}
240262306a36Sopenharmony_ci	level = btrfs_header_level(b);
240362306a36Sopenharmony_ci	p->locks[level] = BTRFS_READ_LOCK;
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci	while (b) {
240662306a36Sopenharmony_ci		int dec = 0;
240762306a36Sopenharmony_ci
240862306a36Sopenharmony_ci		level = btrfs_header_level(b);
240962306a36Sopenharmony_ci		p->nodes[level] = b;
241062306a36Sopenharmony_ci
241162306a36Sopenharmony_ci		/*
241262306a36Sopenharmony_ci		 * we have a lock on b and as long as we aren't changing
241362306a36Sopenharmony_ci		 * the tree, there is no way to for the items in b to change.
241462306a36Sopenharmony_ci		 * It is safe to drop the lock on our parent before we
241562306a36Sopenharmony_ci		 * go through the expensive btree search on b.
241662306a36Sopenharmony_ci		 */
241762306a36Sopenharmony_ci		btrfs_unlock_up_safe(p, level + 1);
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ci		ret = btrfs_bin_search(b, 0, key, &slot);
242062306a36Sopenharmony_ci		if (ret < 0)
242162306a36Sopenharmony_ci			goto done;
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_ci		if (level == 0) {
242462306a36Sopenharmony_ci			p->slots[level] = slot;
242562306a36Sopenharmony_ci			unlock_up(p, level, lowest_unlock, 0, NULL);
242662306a36Sopenharmony_ci			goto done;
242762306a36Sopenharmony_ci		}
242862306a36Sopenharmony_ci
242962306a36Sopenharmony_ci		if (ret && slot > 0) {
243062306a36Sopenharmony_ci			dec = 1;
243162306a36Sopenharmony_ci			slot--;
243262306a36Sopenharmony_ci		}
243362306a36Sopenharmony_ci		p->slots[level] = slot;
243462306a36Sopenharmony_ci		unlock_up(p, level, lowest_unlock, 0, NULL);
243562306a36Sopenharmony_ci
243662306a36Sopenharmony_ci		if (level == lowest_level) {
243762306a36Sopenharmony_ci			if (dec)
243862306a36Sopenharmony_ci				p->slots[level]++;
243962306a36Sopenharmony_ci			goto done;
244062306a36Sopenharmony_ci		}
244162306a36Sopenharmony_ci
244262306a36Sopenharmony_ci		err = read_block_for_search(root, p, &b, level, slot, key);
244362306a36Sopenharmony_ci		if (err == -EAGAIN)
244462306a36Sopenharmony_ci			goto again;
244562306a36Sopenharmony_ci		if (err) {
244662306a36Sopenharmony_ci			ret = err;
244762306a36Sopenharmony_ci			goto done;
244862306a36Sopenharmony_ci		}
244962306a36Sopenharmony_ci
245062306a36Sopenharmony_ci		level = btrfs_header_level(b);
245162306a36Sopenharmony_ci		btrfs_tree_read_lock(b);
245262306a36Sopenharmony_ci		b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
245362306a36Sopenharmony_ci		if (!b) {
245462306a36Sopenharmony_ci			ret = -ENOMEM;
245562306a36Sopenharmony_ci			goto done;
245662306a36Sopenharmony_ci		}
245762306a36Sopenharmony_ci		p->locks[level] = BTRFS_READ_LOCK;
245862306a36Sopenharmony_ci		p->nodes[level] = b;
245962306a36Sopenharmony_ci	}
246062306a36Sopenharmony_ci	ret = 1;
246162306a36Sopenharmony_cidone:
246262306a36Sopenharmony_ci	if (ret < 0)
246362306a36Sopenharmony_ci		btrfs_release_path(p);
246462306a36Sopenharmony_ci
246562306a36Sopenharmony_ci	return ret;
246662306a36Sopenharmony_ci}
246762306a36Sopenharmony_ci
246862306a36Sopenharmony_ci/*
246962306a36Sopenharmony_ci * Search the tree again to find a leaf with smaller keys.
247062306a36Sopenharmony_ci * Returns 0 if it found something.
247162306a36Sopenharmony_ci * Returns 1 if there are no smaller keys.
247262306a36Sopenharmony_ci * Returns < 0 on error.
247362306a36Sopenharmony_ci *
247462306a36Sopenharmony_ci * This may release the path, and so you may lose any locks held at the
247562306a36Sopenharmony_ci * time you call it.
247662306a36Sopenharmony_ci */
247762306a36Sopenharmony_cistatic int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
247862306a36Sopenharmony_ci{
247962306a36Sopenharmony_ci	struct btrfs_key key;
248062306a36Sopenharmony_ci	struct btrfs_key orig_key;
248162306a36Sopenharmony_ci	struct btrfs_disk_key found_key;
248262306a36Sopenharmony_ci	int ret;
248362306a36Sopenharmony_ci
248462306a36Sopenharmony_ci	btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
248562306a36Sopenharmony_ci	orig_key = key;
248662306a36Sopenharmony_ci
248762306a36Sopenharmony_ci	if (key.offset > 0) {
248862306a36Sopenharmony_ci		key.offset--;
248962306a36Sopenharmony_ci	} else if (key.type > 0) {
249062306a36Sopenharmony_ci		key.type--;
249162306a36Sopenharmony_ci		key.offset = (u64)-1;
249262306a36Sopenharmony_ci	} else if (key.objectid > 0) {
249362306a36Sopenharmony_ci		key.objectid--;
249462306a36Sopenharmony_ci		key.type = (u8)-1;
249562306a36Sopenharmony_ci		key.offset = (u64)-1;
249662306a36Sopenharmony_ci	} else {
249762306a36Sopenharmony_ci		return 1;
249862306a36Sopenharmony_ci	}
249962306a36Sopenharmony_ci
250062306a36Sopenharmony_ci	btrfs_release_path(path);
250162306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
250262306a36Sopenharmony_ci	if (ret <= 0)
250362306a36Sopenharmony_ci		return ret;
250462306a36Sopenharmony_ci
250562306a36Sopenharmony_ci	/*
250662306a36Sopenharmony_ci	 * Previous key not found. Even if we were at slot 0 of the leaf we had
250762306a36Sopenharmony_ci	 * before releasing the path and calling btrfs_search_slot(), we now may
250862306a36Sopenharmony_ci	 * be in a slot pointing to the same original key - this can happen if
250962306a36Sopenharmony_ci	 * after we released the path, one of more items were moved from a
251062306a36Sopenharmony_ci	 * sibling leaf into the front of the leaf we had due to an insertion
251162306a36Sopenharmony_ci	 * (see push_leaf_right()).
251262306a36Sopenharmony_ci	 * If we hit this case and our slot is > 0 and just decrement the slot
251362306a36Sopenharmony_ci	 * so that the caller does not process the same key again, which may or
251462306a36Sopenharmony_ci	 * may not break the caller, depending on its logic.
251562306a36Sopenharmony_ci	 */
251662306a36Sopenharmony_ci	if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
251762306a36Sopenharmony_ci		btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
251862306a36Sopenharmony_ci		ret = comp_keys(&found_key, &orig_key);
251962306a36Sopenharmony_ci		if (ret == 0) {
252062306a36Sopenharmony_ci			if (path->slots[0] > 0) {
252162306a36Sopenharmony_ci				path->slots[0]--;
252262306a36Sopenharmony_ci				return 0;
252362306a36Sopenharmony_ci			}
252462306a36Sopenharmony_ci			/*
252562306a36Sopenharmony_ci			 * At slot 0, same key as before, it means orig_key is
252662306a36Sopenharmony_ci			 * the lowest, leftmost, key in the tree. We're done.
252762306a36Sopenharmony_ci			 */
252862306a36Sopenharmony_ci			return 1;
252962306a36Sopenharmony_ci		}
253062306a36Sopenharmony_ci	}
253162306a36Sopenharmony_ci
253262306a36Sopenharmony_ci	btrfs_item_key(path->nodes[0], &found_key, 0);
253362306a36Sopenharmony_ci	ret = comp_keys(&found_key, &key);
253462306a36Sopenharmony_ci	/*
253562306a36Sopenharmony_ci	 * We might have had an item with the previous key in the tree right
253662306a36Sopenharmony_ci	 * before we released our path. And after we released our path, that
253762306a36Sopenharmony_ci	 * item might have been pushed to the first slot (0) of the leaf we
253862306a36Sopenharmony_ci	 * were holding due to a tree balance. Alternatively, an item with the
253962306a36Sopenharmony_ci	 * previous key can exist as the only element of a leaf (big fat item).
254062306a36Sopenharmony_ci	 * Therefore account for these 2 cases, so that our callers (like
254162306a36Sopenharmony_ci	 * btrfs_previous_item) don't miss an existing item with a key matching
254262306a36Sopenharmony_ci	 * the previous key we computed above.
254362306a36Sopenharmony_ci	 */
254462306a36Sopenharmony_ci	if (ret <= 0)
254562306a36Sopenharmony_ci		return 0;
254662306a36Sopenharmony_ci	return 1;
254762306a36Sopenharmony_ci}
254862306a36Sopenharmony_ci
254962306a36Sopenharmony_ci/*
255062306a36Sopenharmony_ci * helper to use instead of search slot if no exact match is needed but
255162306a36Sopenharmony_ci * instead the next or previous item should be returned.
255262306a36Sopenharmony_ci * When find_higher is true, the next higher item is returned, the next lower
255362306a36Sopenharmony_ci * otherwise.
255462306a36Sopenharmony_ci * When return_any and find_higher are both true, and no higher item is found,
255562306a36Sopenharmony_ci * return the next lower instead.
255662306a36Sopenharmony_ci * When return_any is true and find_higher is false, and no lower item is found,
255762306a36Sopenharmony_ci * return the next higher instead.
255862306a36Sopenharmony_ci * It returns 0 if any item is found, 1 if none is found (tree empty), and
255962306a36Sopenharmony_ci * < 0 on error
256062306a36Sopenharmony_ci */
256162306a36Sopenharmony_ciint btrfs_search_slot_for_read(struct btrfs_root *root,
256262306a36Sopenharmony_ci			       const struct btrfs_key *key,
256362306a36Sopenharmony_ci			       struct btrfs_path *p, int find_higher,
256462306a36Sopenharmony_ci			       int return_any)
256562306a36Sopenharmony_ci{
256662306a36Sopenharmony_ci	int ret;
256762306a36Sopenharmony_ci	struct extent_buffer *leaf;
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ciagain:
257062306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
257162306a36Sopenharmony_ci	if (ret <= 0)
257262306a36Sopenharmony_ci		return ret;
257362306a36Sopenharmony_ci	/*
257462306a36Sopenharmony_ci	 * a return value of 1 means the path is at the position where the
257562306a36Sopenharmony_ci	 * item should be inserted. Normally this is the next bigger item,
257662306a36Sopenharmony_ci	 * but in case the previous item is the last in a leaf, path points
257762306a36Sopenharmony_ci	 * to the first free slot in the previous leaf, i.e. at an invalid
257862306a36Sopenharmony_ci	 * item.
257962306a36Sopenharmony_ci	 */
258062306a36Sopenharmony_ci	leaf = p->nodes[0];
258162306a36Sopenharmony_ci
258262306a36Sopenharmony_ci	if (find_higher) {
258362306a36Sopenharmony_ci		if (p->slots[0] >= btrfs_header_nritems(leaf)) {
258462306a36Sopenharmony_ci			ret = btrfs_next_leaf(root, p);
258562306a36Sopenharmony_ci			if (ret <= 0)
258662306a36Sopenharmony_ci				return ret;
258762306a36Sopenharmony_ci			if (!return_any)
258862306a36Sopenharmony_ci				return 1;
258962306a36Sopenharmony_ci			/*
259062306a36Sopenharmony_ci			 * no higher item found, return the next
259162306a36Sopenharmony_ci			 * lower instead
259262306a36Sopenharmony_ci			 */
259362306a36Sopenharmony_ci			return_any = 0;
259462306a36Sopenharmony_ci			find_higher = 0;
259562306a36Sopenharmony_ci			btrfs_release_path(p);
259662306a36Sopenharmony_ci			goto again;
259762306a36Sopenharmony_ci		}
259862306a36Sopenharmony_ci	} else {
259962306a36Sopenharmony_ci		if (p->slots[0] == 0) {
260062306a36Sopenharmony_ci			ret = btrfs_prev_leaf(root, p);
260162306a36Sopenharmony_ci			if (ret < 0)
260262306a36Sopenharmony_ci				return ret;
260362306a36Sopenharmony_ci			if (!ret) {
260462306a36Sopenharmony_ci				leaf = p->nodes[0];
260562306a36Sopenharmony_ci				if (p->slots[0] == btrfs_header_nritems(leaf))
260662306a36Sopenharmony_ci					p->slots[0]--;
260762306a36Sopenharmony_ci				return 0;
260862306a36Sopenharmony_ci			}
260962306a36Sopenharmony_ci			if (!return_any)
261062306a36Sopenharmony_ci				return 1;
261162306a36Sopenharmony_ci			/*
261262306a36Sopenharmony_ci			 * no lower item found, return the next
261362306a36Sopenharmony_ci			 * higher instead
261462306a36Sopenharmony_ci			 */
261562306a36Sopenharmony_ci			return_any = 0;
261662306a36Sopenharmony_ci			find_higher = 1;
261762306a36Sopenharmony_ci			btrfs_release_path(p);
261862306a36Sopenharmony_ci			goto again;
261962306a36Sopenharmony_ci		} else {
262062306a36Sopenharmony_ci			--p->slots[0];
262162306a36Sopenharmony_ci		}
262262306a36Sopenharmony_ci	}
262362306a36Sopenharmony_ci	return 0;
262462306a36Sopenharmony_ci}
262562306a36Sopenharmony_ci
262662306a36Sopenharmony_ci/*
262762306a36Sopenharmony_ci * Execute search and call btrfs_previous_item to traverse backwards if the item
262862306a36Sopenharmony_ci * was not found.
262962306a36Sopenharmony_ci *
263062306a36Sopenharmony_ci * Return 0 if found, 1 if not found and < 0 if error.
263162306a36Sopenharmony_ci */
263262306a36Sopenharmony_ciint btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key,
263362306a36Sopenharmony_ci			   struct btrfs_path *path)
263462306a36Sopenharmony_ci{
263562306a36Sopenharmony_ci	int ret;
263662306a36Sopenharmony_ci
263762306a36Sopenharmony_ci	ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
263862306a36Sopenharmony_ci	if (ret > 0)
263962306a36Sopenharmony_ci		ret = btrfs_previous_item(root, path, key->objectid, key->type);
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci	if (ret == 0)
264262306a36Sopenharmony_ci		btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
264362306a36Sopenharmony_ci
264462306a36Sopenharmony_ci	return ret;
264562306a36Sopenharmony_ci}
264662306a36Sopenharmony_ci
264762306a36Sopenharmony_ci/*
264862306a36Sopenharmony_ci * Search for a valid slot for the given path.
264962306a36Sopenharmony_ci *
265062306a36Sopenharmony_ci * @root:	The root node of the tree.
265162306a36Sopenharmony_ci * @key:	Will contain a valid item if found.
265262306a36Sopenharmony_ci * @path:	The starting point to validate the slot.
265362306a36Sopenharmony_ci *
265462306a36Sopenharmony_ci * Return: 0  if the item is valid
265562306a36Sopenharmony_ci *         1  if not found
265662306a36Sopenharmony_ci *         <0 if error.
265762306a36Sopenharmony_ci */
265862306a36Sopenharmony_ciint btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key,
265962306a36Sopenharmony_ci			      struct btrfs_path *path)
266062306a36Sopenharmony_ci{
266162306a36Sopenharmony_ci	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
266262306a36Sopenharmony_ci		int ret;
266362306a36Sopenharmony_ci
266462306a36Sopenharmony_ci		ret = btrfs_next_leaf(root, path);
266562306a36Sopenharmony_ci		if (ret)
266662306a36Sopenharmony_ci			return ret;
266762306a36Sopenharmony_ci	}
266862306a36Sopenharmony_ci
266962306a36Sopenharmony_ci	btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]);
267062306a36Sopenharmony_ci	return 0;
267162306a36Sopenharmony_ci}
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ci/*
267462306a36Sopenharmony_ci * adjust the pointers going up the tree, starting at level
267562306a36Sopenharmony_ci * making sure the right key of each node is points to 'key'.
267662306a36Sopenharmony_ci * This is used after shifting pointers to the left, so it stops
267762306a36Sopenharmony_ci * fixing up pointers when a given leaf/node is not in slot 0 of the
267862306a36Sopenharmony_ci * higher levels
267962306a36Sopenharmony_ci *
268062306a36Sopenharmony_ci */
268162306a36Sopenharmony_cistatic void fixup_low_keys(struct btrfs_trans_handle *trans,
268262306a36Sopenharmony_ci			   struct btrfs_path *path,
268362306a36Sopenharmony_ci			   struct btrfs_disk_key *key, int level)
268462306a36Sopenharmony_ci{
268562306a36Sopenharmony_ci	int i;
268662306a36Sopenharmony_ci	struct extent_buffer *t;
268762306a36Sopenharmony_ci	int ret;
268862306a36Sopenharmony_ci
268962306a36Sopenharmony_ci	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
269062306a36Sopenharmony_ci		int tslot = path->slots[i];
269162306a36Sopenharmony_ci
269262306a36Sopenharmony_ci		if (!path->nodes[i])
269362306a36Sopenharmony_ci			break;
269462306a36Sopenharmony_ci		t = path->nodes[i];
269562306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_key(t, tslot,
269662306a36Sopenharmony_ci						    BTRFS_MOD_LOG_KEY_REPLACE);
269762306a36Sopenharmony_ci		BUG_ON(ret < 0);
269862306a36Sopenharmony_ci		btrfs_set_node_key(t, key, tslot);
269962306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, path->nodes[i]);
270062306a36Sopenharmony_ci		if (tslot != 0)
270162306a36Sopenharmony_ci			break;
270262306a36Sopenharmony_ci	}
270362306a36Sopenharmony_ci}
270462306a36Sopenharmony_ci
270562306a36Sopenharmony_ci/*
270662306a36Sopenharmony_ci * update item key.
270762306a36Sopenharmony_ci *
270862306a36Sopenharmony_ci * This function isn't completely safe. It's the caller's responsibility
270962306a36Sopenharmony_ci * that the new key won't break the order
271062306a36Sopenharmony_ci */
271162306a36Sopenharmony_civoid btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
271262306a36Sopenharmony_ci			     struct btrfs_path *path,
271362306a36Sopenharmony_ci			     const struct btrfs_key *new_key)
271462306a36Sopenharmony_ci{
271562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
271662306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
271762306a36Sopenharmony_ci	struct extent_buffer *eb;
271862306a36Sopenharmony_ci	int slot;
271962306a36Sopenharmony_ci
272062306a36Sopenharmony_ci	eb = path->nodes[0];
272162306a36Sopenharmony_ci	slot = path->slots[0];
272262306a36Sopenharmony_ci	if (slot > 0) {
272362306a36Sopenharmony_ci		btrfs_item_key(eb, &disk_key, slot - 1);
272462306a36Sopenharmony_ci		if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
272562306a36Sopenharmony_ci			btrfs_print_leaf(eb);
272662306a36Sopenharmony_ci			btrfs_crit(fs_info,
272762306a36Sopenharmony_ci		"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
272862306a36Sopenharmony_ci				   slot, btrfs_disk_key_objectid(&disk_key),
272962306a36Sopenharmony_ci				   btrfs_disk_key_type(&disk_key),
273062306a36Sopenharmony_ci				   btrfs_disk_key_offset(&disk_key),
273162306a36Sopenharmony_ci				   new_key->objectid, new_key->type,
273262306a36Sopenharmony_ci				   new_key->offset);
273362306a36Sopenharmony_ci			BUG();
273462306a36Sopenharmony_ci		}
273562306a36Sopenharmony_ci	}
273662306a36Sopenharmony_ci	if (slot < btrfs_header_nritems(eb) - 1) {
273762306a36Sopenharmony_ci		btrfs_item_key(eb, &disk_key, slot + 1);
273862306a36Sopenharmony_ci		if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
273962306a36Sopenharmony_ci			btrfs_print_leaf(eb);
274062306a36Sopenharmony_ci			btrfs_crit(fs_info,
274162306a36Sopenharmony_ci		"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
274262306a36Sopenharmony_ci				   slot, btrfs_disk_key_objectid(&disk_key),
274362306a36Sopenharmony_ci				   btrfs_disk_key_type(&disk_key),
274462306a36Sopenharmony_ci				   btrfs_disk_key_offset(&disk_key),
274562306a36Sopenharmony_ci				   new_key->objectid, new_key->type,
274662306a36Sopenharmony_ci				   new_key->offset);
274762306a36Sopenharmony_ci			BUG();
274862306a36Sopenharmony_ci		}
274962306a36Sopenharmony_ci	}
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci	btrfs_cpu_key_to_disk(&disk_key, new_key);
275262306a36Sopenharmony_ci	btrfs_set_item_key(eb, &disk_key, slot);
275362306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, eb);
275462306a36Sopenharmony_ci	if (slot == 0)
275562306a36Sopenharmony_ci		fixup_low_keys(trans, path, &disk_key, 1);
275662306a36Sopenharmony_ci}
275762306a36Sopenharmony_ci
275862306a36Sopenharmony_ci/*
275962306a36Sopenharmony_ci * Check key order of two sibling extent buffers.
276062306a36Sopenharmony_ci *
276162306a36Sopenharmony_ci * Return true if something is wrong.
276262306a36Sopenharmony_ci * Return false if everything is fine.
276362306a36Sopenharmony_ci *
276462306a36Sopenharmony_ci * Tree-checker only works inside one tree block, thus the following
276562306a36Sopenharmony_ci * corruption can not be detected by tree-checker:
276662306a36Sopenharmony_ci *
276762306a36Sopenharmony_ci * Leaf @left			| Leaf @right
276862306a36Sopenharmony_ci * --------------------------------------------------------------
276962306a36Sopenharmony_ci * | 1 | 2 | 3 | 4 | 5 | f6 |   | 7 | 8 |
277062306a36Sopenharmony_ci *
277162306a36Sopenharmony_ci * Key f6 in leaf @left itself is valid, but not valid when the next
277262306a36Sopenharmony_ci * key in leaf @right is 7.
277362306a36Sopenharmony_ci * This can only be checked at tree block merge time.
277462306a36Sopenharmony_ci * And since tree checker has ensured all key order in each tree block
277562306a36Sopenharmony_ci * is correct, we only need to bother the last key of @left and the first
277662306a36Sopenharmony_ci * key of @right.
277762306a36Sopenharmony_ci */
277862306a36Sopenharmony_cistatic bool check_sibling_keys(struct extent_buffer *left,
277962306a36Sopenharmony_ci			       struct extent_buffer *right)
278062306a36Sopenharmony_ci{
278162306a36Sopenharmony_ci	struct btrfs_key left_last;
278262306a36Sopenharmony_ci	struct btrfs_key right_first;
278362306a36Sopenharmony_ci	int level = btrfs_header_level(left);
278462306a36Sopenharmony_ci	int nr_left = btrfs_header_nritems(left);
278562306a36Sopenharmony_ci	int nr_right = btrfs_header_nritems(right);
278662306a36Sopenharmony_ci
278762306a36Sopenharmony_ci	/* No key to check in one of the tree blocks */
278862306a36Sopenharmony_ci	if (!nr_left || !nr_right)
278962306a36Sopenharmony_ci		return false;
279062306a36Sopenharmony_ci
279162306a36Sopenharmony_ci	if (level) {
279262306a36Sopenharmony_ci		btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
279362306a36Sopenharmony_ci		btrfs_node_key_to_cpu(right, &right_first, 0);
279462306a36Sopenharmony_ci	} else {
279562306a36Sopenharmony_ci		btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
279662306a36Sopenharmony_ci		btrfs_item_key_to_cpu(right, &right_first, 0);
279762306a36Sopenharmony_ci	}
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci	if (unlikely(btrfs_comp_cpu_keys(&left_last, &right_first) >= 0)) {
280062306a36Sopenharmony_ci		btrfs_crit(left->fs_info, "left extent buffer:");
280162306a36Sopenharmony_ci		btrfs_print_tree(left, false);
280262306a36Sopenharmony_ci		btrfs_crit(left->fs_info, "right extent buffer:");
280362306a36Sopenharmony_ci		btrfs_print_tree(right, false);
280462306a36Sopenharmony_ci		btrfs_crit(left->fs_info,
280562306a36Sopenharmony_ci"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
280662306a36Sopenharmony_ci			   left_last.objectid, left_last.type,
280762306a36Sopenharmony_ci			   left_last.offset, right_first.objectid,
280862306a36Sopenharmony_ci			   right_first.type, right_first.offset);
280962306a36Sopenharmony_ci		return true;
281062306a36Sopenharmony_ci	}
281162306a36Sopenharmony_ci	return false;
281262306a36Sopenharmony_ci}
281362306a36Sopenharmony_ci
281462306a36Sopenharmony_ci/*
281562306a36Sopenharmony_ci * try to push data from one node into the next node left in the
281662306a36Sopenharmony_ci * tree.
281762306a36Sopenharmony_ci *
281862306a36Sopenharmony_ci * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
281962306a36Sopenharmony_ci * error, and > 0 if there was no room in the left hand block.
282062306a36Sopenharmony_ci */
282162306a36Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans,
282262306a36Sopenharmony_ci			  struct extent_buffer *dst,
282362306a36Sopenharmony_ci			  struct extent_buffer *src, int empty)
282462306a36Sopenharmony_ci{
282562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
282662306a36Sopenharmony_ci	int push_items = 0;
282762306a36Sopenharmony_ci	int src_nritems;
282862306a36Sopenharmony_ci	int dst_nritems;
282962306a36Sopenharmony_ci	int ret = 0;
283062306a36Sopenharmony_ci
283162306a36Sopenharmony_ci	src_nritems = btrfs_header_nritems(src);
283262306a36Sopenharmony_ci	dst_nritems = btrfs_header_nritems(dst);
283362306a36Sopenharmony_ci	push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
283462306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(src) != trans->transid);
283562306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(dst) != trans->transid);
283662306a36Sopenharmony_ci
283762306a36Sopenharmony_ci	if (!empty && src_nritems <= 8)
283862306a36Sopenharmony_ci		return 1;
283962306a36Sopenharmony_ci
284062306a36Sopenharmony_ci	if (push_items <= 0)
284162306a36Sopenharmony_ci		return 1;
284262306a36Sopenharmony_ci
284362306a36Sopenharmony_ci	if (empty) {
284462306a36Sopenharmony_ci		push_items = min(src_nritems, push_items);
284562306a36Sopenharmony_ci		if (push_items < src_nritems) {
284662306a36Sopenharmony_ci			/* leave at least 8 pointers in the node if
284762306a36Sopenharmony_ci			 * we aren't going to empty it
284862306a36Sopenharmony_ci			 */
284962306a36Sopenharmony_ci			if (src_nritems - push_items < 8) {
285062306a36Sopenharmony_ci				if (push_items <= 8)
285162306a36Sopenharmony_ci					return 1;
285262306a36Sopenharmony_ci				push_items -= 8;
285362306a36Sopenharmony_ci			}
285462306a36Sopenharmony_ci		}
285562306a36Sopenharmony_ci	} else
285662306a36Sopenharmony_ci		push_items = min(src_nritems - 8, push_items);
285762306a36Sopenharmony_ci
285862306a36Sopenharmony_ci	/* dst is the left eb, src is the middle eb */
285962306a36Sopenharmony_ci	if (check_sibling_keys(dst, src)) {
286062306a36Sopenharmony_ci		ret = -EUCLEAN;
286162306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
286262306a36Sopenharmony_ci		return ret;
286362306a36Sopenharmony_ci	}
286462306a36Sopenharmony_ci	ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
286562306a36Sopenharmony_ci	if (ret) {
286662306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
286762306a36Sopenharmony_ci		return ret;
286862306a36Sopenharmony_ci	}
286962306a36Sopenharmony_ci	copy_extent_buffer(dst, src,
287062306a36Sopenharmony_ci			   btrfs_node_key_ptr_offset(dst, dst_nritems),
287162306a36Sopenharmony_ci			   btrfs_node_key_ptr_offset(src, 0),
287262306a36Sopenharmony_ci			   push_items * sizeof(struct btrfs_key_ptr));
287362306a36Sopenharmony_ci
287462306a36Sopenharmony_ci	if (push_items < src_nritems) {
287562306a36Sopenharmony_ci		/*
287662306a36Sopenharmony_ci		 * btrfs_tree_mod_log_eb_copy handles logging the move, so we
287762306a36Sopenharmony_ci		 * don't need to do an explicit tree mod log operation for it.
287862306a36Sopenharmony_ci		 */
287962306a36Sopenharmony_ci		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0),
288062306a36Sopenharmony_ci				      btrfs_node_key_ptr_offset(src, push_items),
288162306a36Sopenharmony_ci				      (src_nritems - push_items) *
288262306a36Sopenharmony_ci				      sizeof(struct btrfs_key_ptr));
288362306a36Sopenharmony_ci	}
288462306a36Sopenharmony_ci	btrfs_set_header_nritems(src, src_nritems - push_items);
288562306a36Sopenharmony_ci	btrfs_set_header_nritems(dst, dst_nritems + push_items);
288662306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, src);
288762306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, dst);
288862306a36Sopenharmony_ci
288962306a36Sopenharmony_ci	return ret;
289062306a36Sopenharmony_ci}
289162306a36Sopenharmony_ci
289262306a36Sopenharmony_ci/*
289362306a36Sopenharmony_ci * try to push data from one node into the next node right in the
289462306a36Sopenharmony_ci * tree.
289562306a36Sopenharmony_ci *
289662306a36Sopenharmony_ci * returns 0 if some ptrs were pushed, < 0 if there was some horrible
289762306a36Sopenharmony_ci * error, and > 0 if there was no room in the right hand block.
289862306a36Sopenharmony_ci *
289962306a36Sopenharmony_ci * this will  only push up to 1/2 the contents of the left node over
290062306a36Sopenharmony_ci */
290162306a36Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans,
290262306a36Sopenharmony_ci			      struct extent_buffer *dst,
290362306a36Sopenharmony_ci			      struct extent_buffer *src)
290462306a36Sopenharmony_ci{
290562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
290662306a36Sopenharmony_ci	int push_items = 0;
290762306a36Sopenharmony_ci	int max_push;
290862306a36Sopenharmony_ci	int src_nritems;
290962306a36Sopenharmony_ci	int dst_nritems;
291062306a36Sopenharmony_ci	int ret = 0;
291162306a36Sopenharmony_ci
291262306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(src) != trans->transid);
291362306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(dst) != trans->transid);
291462306a36Sopenharmony_ci
291562306a36Sopenharmony_ci	src_nritems = btrfs_header_nritems(src);
291662306a36Sopenharmony_ci	dst_nritems = btrfs_header_nritems(dst);
291762306a36Sopenharmony_ci	push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
291862306a36Sopenharmony_ci	if (push_items <= 0)
291962306a36Sopenharmony_ci		return 1;
292062306a36Sopenharmony_ci
292162306a36Sopenharmony_ci	if (src_nritems < 4)
292262306a36Sopenharmony_ci		return 1;
292362306a36Sopenharmony_ci
292462306a36Sopenharmony_ci	max_push = src_nritems / 2 + 1;
292562306a36Sopenharmony_ci	/* don't try to empty the node */
292662306a36Sopenharmony_ci	if (max_push >= src_nritems)
292762306a36Sopenharmony_ci		return 1;
292862306a36Sopenharmony_ci
292962306a36Sopenharmony_ci	if (max_push < push_items)
293062306a36Sopenharmony_ci		push_items = max_push;
293162306a36Sopenharmony_ci
293262306a36Sopenharmony_ci	/* dst is the right eb, src is the middle eb */
293362306a36Sopenharmony_ci	if (check_sibling_keys(src, dst)) {
293462306a36Sopenharmony_ci		ret = -EUCLEAN;
293562306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
293662306a36Sopenharmony_ci		return ret;
293762306a36Sopenharmony_ci	}
293862306a36Sopenharmony_ci
293962306a36Sopenharmony_ci	/*
294062306a36Sopenharmony_ci	 * btrfs_tree_mod_log_eb_copy handles logging the move, so we don't
294162306a36Sopenharmony_ci	 * need to do an explicit tree mod log operation for it.
294262306a36Sopenharmony_ci	 */
294362306a36Sopenharmony_ci	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items),
294462306a36Sopenharmony_ci				      btrfs_node_key_ptr_offset(dst, 0),
294562306a36Sopenharmony_ci				      (dst_nritems) *
294662306a36Sopenharmony_ci				      sizeof(struct btrfs_key_ptr));
294762306a36Sopenharmony_ci
294862306a36Sopenharmony_ci	ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
294962306a36Sopenharmony_ci					 push_items);
295062306a36Sopenharmony_ci	if (ret) {
295162306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
295262306a36Sopenharmony_ci		return ret;
295362306a36Sopenharmony_ci	}
295462306a36Sopenharmony_ci	copy_extent_buffer(dst, src,
295562306a36Sopenharmony_ci			   btrfs_node_key_ptr_offset(dst, 0),
295662306a36Sopenharmony_ci			   btrfs_node_key_ptr_offset(src, src_nritems - push_items),
295762306a36Sopenharmony_ci			   push_items * sizeof(struct btrfs_key_ptr));
295862306a36Sopenharmony_ci
295962306a36Sopenharmony_ci	btrfs_set_header_nritems(src, src_nritems - push_items);
296062306a36Sopenharmony_ci	btrfs_set_header_nritems(dst, dst_nritems + push_items);
296162306a36Sopenharmony_ci
296262306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, src);
296362306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, dst);
296462306a36Sopenharmony_ci
296562306a36Sopenharmony_ci	return ret;
296662306a36Sopenharmony_ci}
296762306a36Sopenharmony_ci
296862306a36Sopenharmony_ci/*
296962306a36Sopenharmony_ci * helper function to insert a new root level in the tree.
297062306a36Sopenharmony_ci * A new node is allocated, and a single item is inserted to
297162306a36Sopenharmony_ci * point to the existing root
297262306a36Sopenharmony_ci *
297362306a36Sopenharmony_ci * returns zero on success or < 0 on failure.
297462306a36Sopenharmony_ci */
297562306a36Sopenharmony_cistatic noinline int insert_new_root(struct btrfs_trans_handle *trans,
297662306a36Sopenharmony_ci			   struct btrfs_root *root,
297762306a36Sopenharmony_ci			   struct btrfs_path *path, int level)
297862306a36Sopenharmony_ci{
297962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
298062306a36Sopenharmony_ci	u64 lower_gen;
298162306a36Sopenharmony_ci	struct extent_buffer *lower;
298262306a36Sopenharmony_ci	struct extent_buffer *c;
298362306a36Sopenharmony_ci	struct extent_buffer *old;
298462306a36Sopenharmony_ci	struct btrfs_disk_key lower_key;
298562306a36Sopenharmony_ci	int ret;
298662306a36Sopenharmony_ci
298762306a36Sopenharmony_ci	BUG_ON(path->nodes[level]);
298862306a36Sopenharmony_ci	BUG_ON(path->nodes[level-1] != root->node);
298962306a36Sopenharmony_ci
299062306a36Sopenharmony_ci	lower = path->nodes[level-1];
299162306a36Sopenharmony_ci	if (level == 1)
299262306a36Sopenharmony_ci		btrfs_item_key(lower, &lower_key, 0);
299362306a36Sopenharmony_ci	else
299462306a36Sopenharmony_ci		btrfs_node_key(lower, &lower_key, 0);
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_ci	c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
299762306a36Sopenharmony_ci				   &lower_key, level, root->node->start, 0,
299862306a36Sopenharmony_ci				   BTRFS_NESTING_NEW_ROOT);
299962306a36Sopenharmony_ci	if (IS_ERR(c))
300062306a36Sopenharmony_ci		return PTR_ERR(c);
300162306a36Sopenharmony_ci
300262306a36Sopenharmony_ci	root_add_used(root, fs_info->nodesize);
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_ci	btrfs_set_header_nritems(c, 1);
300562306a36Sopenharmony_ci	btrfs_set_node_key(c, &lower_key, 0);
300662306a36Sopenharmony_ci	btrfs_set_node_blockptr(c, 0, lower->start);
300762306a36Sopenharmony_ci	lower_gen = btrfs_header_generation(lower);
300862306a36Sopenharmony_ci	WARN_ON(lower_gen != trans->transid);
300962306a36Sopenharmony_ci
301062306a36Sopenharmony_ci	btrfs_set_node_ptr_generation(c, 0, lower_gen);
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, c);
301362306a36Sopenharmony_ci
301462306a36Sopenharmony_ci	old = root->node;
301562306a36Sopenharmony_ci	ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
301662306a36Sopenharmony_ci	if (ret < 0) {
301762306a36Sopenharmony_ci		btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
301862306a36Sopenharmony_ci		btrfs_tree_unlock(c);
301962306a36Sopenharmony_ci		free_extent_buffer(c);
302062306a36Sopenharmony_ci		return ret;
302162306a36Sopenharmony_ci	}
302262306a36Sopenharmony_ci	rcu_assign_pointer(root->node, c);
302362306a36Sopenharmony_ci
302462306a36Sopenharmony_ci	/* the super has an extra ref to root->node */
302562306a36Sopenharmony_ci	free_extent_buffer(old);
302662306a36Sopenharmony_ci
302762306a36Sopenharmony_ci	add_root_to_dirty_list(root);
302862306a36Sopenharmony_ci	atomic_inc(&c->refs);
302962306a36Sopenharmony_ci	path->nodes[level] = c;
303062306a36Sopenharmony_ci	path->locks[level] = BTRFS_WRITE_LOCK;
303162306a36Sopenharmony_ci	path->slots[level] = 0;
303262306a36Sopenharmony_ci	return 0;
303362306a36Sopenharmony_ci}
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci/*
303662306a36Sopenharmony_ci * worker function to insert a single pointer in a node.
303762306a36Sopenharmony_ci * the node should have enough room for the pointer already
303862306a36Sopenharmony_ci *
303962306a36Sopenharmony_ci * slot and level indicate where you want the key to go, and
304062306a36Sopenharmony_ci * blocknr is the block the key points to.
304162306a36Sopenharmony_ci */
304262306a36Sopenharmony_cistatic int insert_ptr(struct btrfs_trans_handle *trans,
304362306a36Sopenharmony_ci		      struct btrfs_path *path,
304462306a36Sopenharmony_ci		      struct btrfs_disk_key *key, u64 bytenr,
304562306a36Sopenharmony_ci		      int slot, int level)
304662306a36Sopenharmony_ci{
304762306a36Sopenharmony_ci	struct extent_buffer *lower;
304862306a36Sopenharmony_ci	int nritems;
304962306a36Sopenharmony_ci	int ret;
305062306a36Sopenharmony_ci
305162306a36Sopenharmony_ci	BUG_ON(!path->nodes[level]);
305262306a36Sopenharmony_ci	btrfs_assert_tree_write_locked(path->nodes[level]);
305362306a36Sopenharmony_ci	lower = path->nodes[level];
305462306a36Sopenharmony_ci	nritems = btrfs_header_nritems(lower);
305562306a36Sopenharmony_ci	BUG_ON(slot > nritems);
305662306a36Sopenharmony_ci	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
305762306a36Sopenharmony_ci	if (slot != nritems) {
305862306a36Sopenharmony_ci		if (level) {
305962306a36Sopenharmony_ci			ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
306062306a36Sopenharmony_ci					slot, nritems - slot);
306162306a36Sopenharmony_ci			if (ret < 0) {
306262306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
306362306a36Sopenharmony_ci				return ret;
306462306a36Sopenharmony_ci			}
306562306a36Sopenharmony_ci		}
306662306a36Sopenharmony_ci		memmove_extent_buffer(lower,
306762306a36Sopenharmony_ci			      btrfs_node_key_ptr_offset(lower, slot + 1),
306862306a36Sopenharmony_ci			      btrfs_node_key_ptr_offset(lower, slot),
306962306a36Sopenharmony_ci			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
307062306a36Sopenharmony_ci	}
307162306a36Sopenharmony_ci	if (level) {
307262306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_key(lower, slot,
307362306a36Sopenharmony_ci						    BTRFS_MOD_LOG_KEY_ADD);
307462306a36Sopenharmony_ci		if (ret < 0) {
307562306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
307662306a36Sopenharmony_ci			return ret;
307762306a36Sopenharmony_ci		}
307862306a36Sopenharmony_ci	}
307962306a36Sopenharmony_ci	btrfs_set_node_key(lower, key, slot);
308062306a36Sopenharmony_ci	btrfs_set_node_blockptr(lower, slot, bytenr);
308162306a36Sopenharmony_ci	WARN_ON(trans->transid == 0);
308262306a36Sopenharmony_ci	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
308362306a36Sopenharmony_ci	btrfs_set_header_nritems(lower, nritems + 1);
308462306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, lower);
308562306a36Sopenharmony_ci
308662306a36Sopenharmony_ci	return 0;
308762306a36Sopenharmony_ci}
308862306a36Sopenharmony_ci
308962306a36Sopenharmony_ci/*
309062306a36Sopenharmony_ci * split the node at the specified level in path in two.
309162306a36Sopenharmony_ci * The path is corrected to point to the appropriate node after the split
309262306a36Sopenharmony_ci *
309362306a36Sopenharmony_ci * Before splitting this tries to make some room in the node by pushing
309462306a36Sopenharmony_ci * left and right, if either one works, it returns right away.
309562306a36Sopenharmony_ci *
309662306a36Sopenharmony_ci * returns 0 on success and < 0 on failure
309762306a36Sopenharmony_ci */
309862306a36Sopenharmony_cistatic noinline int split_node(struct btrfs_trans_handle *trans,
309962306a36Sopenharmony_ci			       struct btrfs_root *root,
310062306a36Sopenharmony_ci			       struct btrfs_path *path, int level)
310162306a36Sopenharmony_ci{
310262306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
310362306a36Sopenharmony_ci	struct extent_buffer *c;
310462306a36Sopenharmony_ci	struct extent_buffer *split;
310562306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
310662306a36Sopenharmony_ci	int mid;
310762306a36Sopenharmony_ci	int ret;
310862306a36Sopenharmony_ci	u32 c_nritems;
310962306a36Sopenharmony_ci
311062306a36Sopenharmony_ci	c = path->nodes[level];
311162306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(c) != trans->transid);
311262306a36Sopenharmony_ci	if (c == root->node) {
311362306a36Sopenharmony_ci		/*
311462306a36Sopenharmony_ci		 * trying to split the root, lets make a new one
311562306a36Sopenharmony_ci		 *
311662306a36Sopenharmony_ci		 * tree mod log: We don't log_removal old root in
311762306a36Sopenharmony_ci		 * insert_new_root, because that root buffer will be kept as a
311862306a36Sopenharmony_ci		 * normal node. We are going to log removal of half of the
311962306a36Sopenharmony_ci		 * elements below with btrfs_tree_mod_log_eb_copy(). We're
312062306a36Sopenharmony_ci		 * holding a tree lock on the buffer, which is why we cannot
312162306a36Sopenharmony_ci		 * race with other tree_mod_log users.
312262306a36Sopenharmony_ci		 */
312362306a36Sopenharmony_ci		ret = insert_new_root(trans, root, path, level + 1);
312462306a36Sopenharmony_ci		if (ret)
312562306a36Sopenharmony_ci			return ret;
312662306a36Sopenharmony_ci	} else {
312762306a36Sopenharmony_ci		ret = push_nodes_for_insert(trans, root, path, level);
312862306a36Sopenharmony_ci		c = path->nodes[level];
312962306a36Sopenharmony_ci		if (!ret && btrfs_header_nritems(c) <
313062306a36Sopenharmony_ci		    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
313162306a36Sopenharmony_ci			return 0;
313262306a36Sopenharmony_ci		if (ret < 0)
313362306a36Sopenharmony_ci			return ret;
313462306a36Sopenharmony_ci	}
313562306a36Sopenharmony_ci
313662306a36Sopenharmony_ci	c_nritems = btrfs_header_nritems(c);
313762306a36Sopenharmony_ci	mid = (c_nritems + 1) / 2;
313862306a36Sopenharmony_ci	btrfs_node_key(c, &disk_key, mid);
313962306a36Sopenharmony_ci
314062306a36Sopenharmony_ci	split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
314162306a36Sopenharmony_ci				       &disk_key, level, c->start, 0,
314262306a36Sopenharmony_ci				       BTRFS_NESTING_SPLIT);
314362306a36Sopenharmony_ci	if (IS_ERR(split))
314462306a36Sopenharmony_ci		return PTR_ERR(split);
314562306a36Sopenharmony_ci
314662306a36Sopenharmony_ci	root_add_used(root, fs_info->nodesize);
314762306a36Sopenharmony_ci	ASSERT(btrfs_header_level(c) == level);
314862306a36Sopenharmony_ci
314962306a36Sopenharmony_ci	ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
315062306a36Sopenharmony_ci	if (ret) {
315162306a36Sopenharmony_ci		btrfs_tree_unlock(split);
315262306a36Sopenharmony_ci		free_extent_buffer(split);
315362306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
315462306a36Sopenharmony_ci		return ret;
315562306a36Sopenharmony_ci	}
315662306a36Sopenharmony_ci	copy_extent_buffer(split, c,
315762306a36Sopenharmony_ci			   btrfs_node_key_ptr_offset(split, 0),
315862306a36Sopenharmony_ci			   btrfs_node_key_ptr_offset(c, mid),
315962306a36Sopenharmony_ci			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
316062306a36Sopenharmony_ci	btrfs_set_header_nritems(split, c_nritems - mid);
316162306a36Sopenharmony_ci	btrfs_set_header_nritems(c, mid);
316262306a36Sopenharmony_ci
316362306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, c);
316462306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, split);
316562306a36Sopenharmony_ci
316662306a36Sopenharmony_ci	ret = insert_ptr(trans, path, &disk_key, split->start,
316762306a36Sopenharmony_ci			 path->slots[level + 1] + 1, level + 1);
316862306a36Sopenharmony_ci	if (ret < 0) {
316962306a36Sopenharmony_ci		btrfs_tree_unlock(split);
317062306a36Sopenharmony_ci		free_extent_buffer(split);
317162306a36Sopenharmony_ci		return ret;
317262306a36Sopenharmony_ci	}
317362306a36Sopenharmony_ci
317462306a36Sopenharmony_ci	if (path->slots[level] >= mid) {
317562306a36Sopenharmony_ci		path->slots[level] -= mid;
317662306a36Sopenharmony_ci		btrfs_tree_unlock(c);
317762306a36Sopenharmony_ci		free_extent_buffer(c);
317862306a36Sopenharmony_ci		path->nodes[level] = split;
317962306a36Sopenharmony_ci		path->slots[level + 1] += 1;
318062306a36Sopenharmony_ci	} else {
318162306a36Sopenharmony_ci		btrfs_tree_unlock(split);
318262306a36Sopenharmony_ci		free_extent_buffer(split);
318362306a36Sopenharmony_ci	}
318462306a36Sopenharmony_ci	return 0;
318562306a36Sopenharmony_ci}
318662306a36Sopenharmony_ci
318762306a36Sopenharmony_ci/*
318862306a36Sopenharmony_ci * how many bytes are required to store the items in a leaf.  start
318962306a36Sopenharmony_ci * and nr indicate which items in the leaf to check.  This totals up the
319062306a36Sopenharmony_ci * space used both by the item structs and the item data
319162306a36Sopenharmony_ci */
319262306a36Sopenharmony_cistatic int leaf_space_used(const struct extent_buffer *l, int start, int nr)
319362306a36Sopenharmony_ci{
319462306a36Sopenharmony_ci	int data_len;
319562306a36Sopenharmony_ci	int nritems = btrfs_header_nritems(l);
319662306a36Sopenharmony_ci	int end = min(nritems, start + nr) - 1;
319762306a36Sopenharmony_ci
319862306a36Sopenharmony_ci	if (!nr)
319962306a36Sopenharmony_ci		return 0;
320062306a36Sopenharmony_ci	data_len = btrfs_item_offset(l, start) + btrfs_item_size(l, start);
320162306a36Sopenharmony_ci	data_len = data_len - btrfs_item_offset(l, end);
320262306a36Sopenharmony_ci	data_len += sizeof(struct btrfs_item) * nr;
320362306a36Sopenharmony_ci	WARN_ON(data_len < 0);
320462306a36Sopenharmony_ci	return data_len;
320562306a36Sopenharmony_ci}
320662306a36Sopenharmony_ci
320762306a36Sopenharmony_ci/*
320862306a36Sopenharmony_ci * The space between the end of the leaf items and
320962306a36Sopenharmony_ci * the start of the leaf data.  IOW, how much room
321062306a36Sopenharmony_ci * the leaf has left for both items and data
321162306a36Sopenharmony_ci */
321262306a36Sopenharmony_ciint btrfs_leaf_free_space(const struct extent_buffer *leaf)
321362306a36Sopenharmony_ci{
321462306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = leaf->fs_info;
321562306a36Sopenharmony_ci	int nritems = btrfs_header_nritems(leaf);
321662306a36Sopenharmony_ci	int ret;
321762306a36Sopenharmony_ci
321862306a36Sopenharmony_ci	ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
321962306a36Sopenharmony_ci	if (ret < 0) {
322062306a36Sopenharmony_ci		btrfs_crit(fs_info,
322162306a36Sopenharmony_ci			   "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
322262306a36Sopenharmony_ci			   ret,
322362306a36Sopenharmony_ci			   (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
322462306a36Sopenharmony_ci			   leaf_space_used(leaf, 0, nritems), nritems);
322562306a36Sopenharmony_ci	}
322662306a36Sopenharmony_ci	return ret;
322762306a36Sopenharmony_ci}
322862306a36Sopenharmony_ci
322962306a36Sopenharmony_ci/*
323062306a36Sopenharmony_ci * min slot controls the lowest index we're willing to push to the
323162306a36Sopenharmony_ci * right.  We'll push up to and including min_slot, but no lower
323262306a36Sopenharmony_ci */
323362306a36Sopenharmony_cistatic noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
323462306a36Sopenharmony_ci				      struct btrfs_path *path,
323562306a36Sopenharmony_ci				      int data_size, int empty,
323662306a36Sopenharmony_ci				      struct extent_buffer *right,
323762306a36Sopenharmony_ci				      int free_space, u32 left_nritems,
323862306a36Sopenharmony_ci				      u32 min_slot)
323962306a36Sopenharmony_ci{
324062306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = right->fs_info;
324162306a36Sopenharmony_ci	struct extent_buffer *left = path->nodes[0];
324262306a36Sopenharmony_ci	struct extent_buffer *upper = path->nodes[1];
324362306a36Sopenharmony_ci	struct btrfs_map_token token;
324462306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
324562306a36Sopenharmony_ci	int slot;
324662306a36Sopenharmony_ci	u32 i;
324762306a36Sopenharmony_ci	int push_space = 0;
324862306a36Sopenharmony_ci	int push_items = 0;
324962306a36Sopenharmony_ci	u32 nr;
325062306a36Sopenharmony_ci	u32 right_nritems;
325162306a36Sopenharmony_ci	u32 data_end;
325262306a36Sopenharmony_ci	u32 this_item_size;
325362306a36Sopenharmony_ci
325462306a36Sopenharmony_ci	if (empty)
325562306a36Sopenharmony_ci		nr = 0;
325662306a36Sopenharmony_ci	else
325762306a36Sopenharmony_ci		nr = max_t(u32, 1, min_slot);
325862306a36Sopenharmony_ci
325962306a36Sopenharmony_ci	if (path->slots[0] >= left_nritems)
326062306a36Sopenharmony_ci		push_space += data_size;
326162306a36Sopenharmony_ci
326262306a36Sopenharmony_ci	slot = path->slots[1];
326362306a36Sopenharmony_ci	i = left_nritems - 1;
326462306a36Sopenharmony_ci	while (i >= nr) {
326562306a36Sopenharmony_ci		if (!empty && push_items > 0) {
326662306a36Sopenharmony_ci			if (path->slots[0] > i)
326762306a36Sopenharmony_ci				break;
326862306a36Sopenharmony_ci			if (path->slots[0] == i) {
326962306a36Sopenharmony_ci				int space = btrfs_leaf_free_space(left);
327062306a36Sopenharmony_ci
327162306a36Sopenharmony_ci				if (space + push_space * 2 > free_space)
327262306a36Sopenharmony_ci					break;
327362306a36Sopenharmony_ci			}
327462306a36Sopenharmony_ci		}
327562306a36Sopenharmony_ci
327662306a36Sopenharmony_ci		if (path->slots[0] == i)
327762306a36Sopenharmony_ci			push_space += data_size;
327862306a36Sopenharmony_ci
327962306a36Sopenharmony_ci		this_item_size = btrfs_item_size(left, i);
328062306a36Sopenharmony_ci		if (this_item_size + sizeof(struct btrfs_item) +
328162306a36Sopenharmony_ci		    push_space > free_space)
328262306a36Sopenharmony_ci			break;
328362306a36Sopenharmony_ci
328462306a36Sopenharmony_ci		push_items++;
328562306a36Sopenharmony_ci		push_space += this_item_size + sizeof(struct btrfs_item);
328662306a36Sopenharmony_ci		if (i == 0)
328762306a36Sopenharmony_ci			break;
328862306a36Sopenharmony_ci		i--;
328962306a36Sopenharmony_ci	}
329062306a36Sopenharmony_ci
329162306a36Sopenharmony_ci	if (push_items == 0)
329262306a36Sopenharmony_ci		goto out_unlock;
329362306a36Sopenharmony_ci
329462306a36Sopenharmony_ci	WARN_ON(!empty && push_items == left_nritems);
329562306a36Sopenharmony_ci
329662306a36Sopenharmony_ci	/* push left to right */
329762306a36Sopenharmony_ci	right_nritems = btrfs_header_nritems(right);
329862306a36Sopenharmony_ci
329962306a36Sopenharmony_ci	push_space = btrfs_item_data_end(left, left_nritems - push_items);
330062306a36Sopenharmony_ci	push_space -= leaf_data_end(left);
330162306a36Sopenharmony_ci
330262306a36Sopenharmony_ci	/* make room in the right data area */
330362306a36Sopenharmony_ci	data_end = leaf_data_end(right);
330462306a36Sopenharmony_ci	memmove_leaf_data(right, data_end - push_space, data_end,
330562306a36Sopenharmony_ci			  BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
330662306a36Sopenharmony_ci
330762306a36Sopenharmony_ci	/* copy from the left data area */
330862306a36Sopenharmony_ci	copy_leaf_data(right, left, BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
330962306a36Sopenharmony_ci		       leaf_data_end(left), push_space);
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci	memmove_leaf_items(right, push_items, 0, right_nritems);
331262306a36Sopenharmony_ci
331362306a36Sopenharmony_ci	/* copy the items from left to right */
331462306a36Sopenharmony_ci	copy_leaf_items(right, left, 0, left_nritems - push_items, push_items);
331562306a36Sopenharmony_ci
331662306a36Sopenharmony_ci	/* update the item pointers */
331762306a36Sopenharmony_ci	btrfs_init_map_token(&token, right);
331862306a36Sopenharmony_ci	right_nritems += push_items;
331962306a36Sopenharmony_ci	btrfs_set_header_nritems(right, right_nritems);
332062306a36Sopenharmony_ci	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
332162306a36Sopenharmony_ci	for (i = 0; i < right_nritems; i++) {
332262306a36Sopenharmony_ci		push_space -= btrfs_token_item_size(&token, i);
332362306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, i, push_space);
332462306a36Sopenharmony_ci	}
332562306a36Sopenharmony_ci
332662306a36Sopenharmony_ci	left_nritems -= push_items;
332762306a36Sopenharmony_ci	btrfs_set_header_nritems(left, left_nritems);
332862306a36Sopenharmony_ci
332962306a36Sopenharmony_ci	if (left_nritems)
333062306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, left);
333162306a36Sopenharmony_ci	else
333262306a36Sopenharmony_ci		btrfs_clear_buffer_dirty(trans, left);
333362306a36Sopenharmony_ci
333462306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, right);
333562306a36Sopenharmony_ci
333662306a36Sopenharmony_ci	btrfs_item_key(right, &disk_key, 0);
333762306a36Sopenharmony_ci	btrfs_set_node_key(upper, &disk_key, slot + 1);
333862306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, upper);
333962306a36Sopenharmony_ci
334062306a36Sopenharmony_ci	/* then fixup the leaf pointer in the path */
334162306a36Sopenharmony_ci	if (path->slots[0] >= left_nritems) {
334262306a36Sopenharmony_ci		path->slots[0] -= left_nritems;
334362306a36Sopenharmony_ci		if (btrfs_header_nritems(path->nodes[0]) == 0)
334462306a36Sopenharmony_ci			btrfs_clear_buffer_dirty(trans, path->nodes[0]);
334562306a36Sopenharmony_ci		btrfs_tree_unlock(path->nodes[0]);
334662306a36Sopenharmony_ci		free_extent_buffer(path->nodes[0]);
334762306a36Sopenharmony_ci		path->nodes[0] = right;
334862306a36Sopenharmony_ci		path->slots[1] += 1;
334962306a36Sopenharmony_ci	} else {
335062306a36Sopenharmony_ci		btrfs_tree_unlock(right);
335162306a36Sopenharmony_ci		free_extent_buffer(right);
335262306a36Sopenharmony_ci	}
335362306a36Sopenharmony_ci	return 0;
335462306a36Sopenharmony_ci
335562306a36Sopenharmony_ciout_unlock:
335662306a36Sopenharmony_ci	btrfs_tree_unlock(right);
335762306a36Sopenharmony_ci	free_extent_buffer(right);
335862306a36Sopenharmony_ci	return 1;
335962306a36Sopenharmony_ci}
336062306a36Sopenharmony_ci
336162306a36Sopenharmony_ci/*
336262306a36Sopenharmony_ci * push some data in the path leaf to the right, trying to free up at
336362306a36Sopenharmony_ci * least data_size bytes.  returns zero if the push worked, nonzero otherwise
336462306a36Sopenharmony_ci *
336562306a36Sopenharmony_ci * returns 1 if the push failed because the other node didn't have enough
336662306a36Sopenharmony_ci * room, 0 if everything worked out and < 0 if there were major errors.
336762306a36Sopenharmony_ci *
336862306a36Sopenharmony_ci * this will push starting from min_slot to the end of the leaf.  It won't
336962306a36Sopenharmony_ci * push any slot lower than min_slot
337062306a36Sopenharmony_ci */
337162306a36Sopenharmony_cistatic int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
337262306a36Sopenharmony_ci			   *root, struct btrfs_path *path,
337362306a36Sopenharmony_ci			   int min_data_size, int data_size,
337462306a36Sopenharmony_ci			   int empty, u32 min_slot)
337562306a36Sopenharmony_ci{
337662306a36Sopenharmony_ci	struct extent_buffer *left = path->nodes[0];
337762306a36Sopenharmony_ci	struct extent_buffer *right;
337862306a36Sopenharmony_ci	struct extent_buffer *upper;
337962306a36Sopenharmony_ci	int slot;
338062306a36Sopenharmony_ci	int free_space;
338162306a36Sopenharmony_ci	u32 left_nritems;
338262306a36Sopenharmony_ci	int ret;
338362306a36Sopenharmony_ci
338462306a36Sopenharmony_ci	if (!path->nodes[1])
338562306a36Sopenharmony_ci		return 1;
338662306a36Sopenharmony_ci
338762306a36Sopenharmony_ci	slot = path->slots[1];
338862306a36Sopenharmony_ci	upper = path->nodes[1];
338962306a36Sopenharmony_ci	if (slot >= btrfs_header_nritems(upper) - 1)
339062306a36Sopenharmony_ci		return 1;
339162306a36Sopenharmony_ci
339262306a36Sopenharmony_ci	btrfs_assert_tree_write_locked(path->nodes[1]);
339362306a36Sopenharmony_ci
339462306a36Sopenharmony_ci	right = btrfs_read_node_slot(upper, slot + 1);
339562306a36Sopenharmony_ci	if (IS_ERR(right))
339662306a36Sopenharmony_ci		return PTR_ERR(right);
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci	__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
339962306a36Sopenharmony_ci
340062306a36Sopenharmony_ci	free_space = btrfs_leaf_free_space(right);
340162306a36Sopenharmony_ci	if (free_space < data_size)
340262306a36Sopenharmony_ci		goto out_unlock;
340362306a36Sopenharmony_ci
340462306a36Sopenharmony_ci	ret = btrfs_cow_block(trans, root, right, upper,
340562306a36Sopenharmony_ci			      slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
340662306a36Sopenharmony_ci	if (ret)
340762306a36Sopenharmony_ci		goto out_unlock;
340862306a36Sopenharmony_ci
340962306a36Sopenharmony_ci	left_nritems = btrfs_header_nritems(left);
341062306a36Sopenharmony_ci	if (left_nritems == 0)
341162306a36Sopenharmony_ci		goto out_unlock;
341262306a36Sopenharmony_ci
341362306a36Sopenharmony_ci	if (check_sibling_keys(left, right)) {
341462306a36Sopenharmony_ci		ret = -EUCLEAN;
341562306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
341662306a36Sopenharmony_ci		btrfs_tree_unlock(right);
341762306a36Sopenharmony_ci		free_extent_buffer(right);
341862306a36Sopenharmony_ci		return ret;
341962306a36Sopenharmony_ci	}
342062306a36Sopenharmony_ci	if (path->slots[0] == left_nritems && !empty) {
342162306a36Sopenharmony_ci		/* Key greater than all keys in the leaf, right neighbor has
342262306a36Sopenharmony_ci		 * enough room for it and we're not emptying our leaf to delete
342362306a36Sopenharmony_ci		 * it, therefore use right neighbor to insert the new item and
342462306a36Sopenharmony_ci		 * no need to touch/dirty our left leaf. */
342562306a36Sopenharmony_ci		btrfs_tree_unlock(left);
342662306a36Sopenharmony_ci		free_extent_buffer(left);
342762306a36Sopenharmony_ci		path->nodes[0] = right;
342862306a36Sopenharmony_ci		path->slots[0] = 0;
342962306a36Sopenharmony_ci		path->slots[1]++;
343062306a36Sopenharmony_ci		return 0;
343162306a36Sopenharmony_ci	}
343262306a36Sopenharmony_ci
343362306a36Sopenharmony_ci	return __push_leaf_right(trans, path, min_data_size, empty, right,
343462306a36Sopenharmony_ci				 free_space, left_nritems, min_slot);
343562306a36Sopenharmony_ciout_unlock:
343662306a36Sopenharmony_ci	btrfs_tree_unlock(right);
343762306a36Sopenharmony_ci	free_extent_buffer(right);
343862306a36Sopenharmony_ci	return 1;
343962306a36Sopenharmony_ci}
344062306a36Sopenharmony_ci
344162306a36Sopenharmony_ci/*
344262306a36Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at
344362306a36Sopenharmony_ci * least data_size bytes.  returns zero if the push worked, nonzero otherwise
344462306a36Sopenharmony_ci *
344562306a36Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items.  The
344662306a36Sopenharmony_ci * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
344762306a36Sopenharmony_ci * items
344862306a36Sopenharmony_ci */
344962306a36Sopenharmony_cistatic noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
345062306a36Sopenharmony_ci				     struct btrfs_path *path, int data_size,
345162306a36Sopenharmony_ci				     int empty, struct extent_buffer *left,
345262306a36Sopenharmony_ci				     int free_space, u32 right_nritems,
345362306a36Sopenharmony_ci				     u32 max_slot)
345462306a36Sopenharmony_ci{
345562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = left->fs_info;
345662306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
345762306a36Sopenharmony_ci	struct extent_buffer *right = path->nodes[0];
345862306a36Sopenharmony_ci	int i;
345962306a36Sopenharmony_ci	int push_space = 0;
346062306a36Sopenharmony_ci	int push_items = 0;
346162306a36Sopenharmony_ci	u32 old_left_nritems;
346262306a36Sopenharmony_ci	u32 nr;
346362306a36Sopenharmony_ci	int ret = 0;
346462306a36Sopenharmony_ci	u32 this_item_size;
346562306a36Sopenharmony_ci	u32 old_left_item_size;
346662306a36Sopenharmony_ci	struct btrfs_map_token token;
346762306a36Sopenharmony_ci
346862306a36Sopenharmony_ci	if (empty)
346962306a36Sopenharmony_ci		nr = min(right_nritems, max_slot);
347062306a36Sopenharmony_ci	else
347162306a36Sopenharmony_ci		nr = min(right_nritems - 1, max_slot);
347262306a36Sopenharmony_ci
347362306a36Sopenharmony_ci	for (i = 0; i < nr; i++) {
347462306a36Sopenharmony_ci		if (!empty && push_items > 0) {
347562306a36Sopenharmony_ci			if (path->slots[0] < i)
347662306a36Sopenharmony_ci				break;
347762306a36Sopenharmony_ci			if (path->slots[0] == i) {
347862306a36Sopenharmony_ci				int space = btrfs_leaf_free_space(right);
347962306a36Sopenharmony_ci
348062306a36Sopenharmony_ci				if (space + push_space * 2 > free_space)
348162306a36Sopenharmony_ci					break;
348262306a36Sopenharmony_ci			}
348362306a36Sopenharmony_ci		}
348462306a36Sopenharmony_ci
348562306a36Sopenharmony_ci		if (path->slots[0] == i)
348662306a36Sopenharmony_ci			push_space += data_size;
348762306a36Sopenharmony_ci
348862306a36Sopenharmony_ci		this_item_size = btrfs_item_size(right, i);
348962306a36Sopenharmony_ci		if (this_item_size + sizeof(struct btrfs_item) + push_space >
349062306a36Sopenharmony_ci		    free_space)
349162306a36Sopenharmony_ci			break;
349262306a36Sopenharmony_ci
349362306a36Sopenharmony_ci		push_items++;
349462306a36Sopenharmony_ci		push_space += this_item_size + sizeof(struct btrfs_item);
349562306a36Sopenharmony_ci	}
349662306a36Sopenharmony_ci
349762306a36Sopenharmony_ci	if (push_items == 0) {
349862306a36Sopenharmony_ci		ret = 1;
349962306a36Sopenharmony_ci		goto out;
350062306a36Sopenharmony_ci	}
350162306a36Sopenharmony_ci	WARN_ON(!empty && push_items == btrfs_header_nritems(right));
350262306a36Sopenharmony_ci
350362306a36Sopenharmony_ci	/* push data from right to left */
350462306a36Sopenharmony_ci	copy_leaf_items(left, right, btrfs_header_nritems(left), 0, push_items);
350562306a36Sopenharmony_ci
350662306a36Sopenharmony_ci	push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
350762306a36Sopenharmony_ci		     btrfs_item_offset(right, push_items - 1);
350862306a36Sopenharmony_ci
350962306a36Sopenharmony_ci	copy_leaf_data(left, right, leaf_data_end(left) - push_space,
351062306a36Sopenharmony_ci		       btrfs_item_offset(right, push_items - 1), push_space);
351162306a36Sopenharmony_ci	old_left_nritems = btrfs_header_nritems(left);
351262306a36Sopenharmony_ci	BUG_ON(old_left_nritems <= 0);
351362306a36Sopenharmony_ci
351462306a36Sopenharmony_ci	btrfs_init_map_token(&token, left);
351562306a36Sopenharmony_ci	old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1);
351662306a36Sopenharmony_ci	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
351762306a36Sopenharmony_ci		u32 ioff;
351862306a36Sopenharmony_ci
351962306a36Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, i);
352062306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, i,
352162306a36Sopenharmony_ci		      ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
352262306a36Sopenharmony_ci	}
352362306a36Sopenharmony_ci	btrfs_set_header_nritems(left, old_left_nritems + push_items);
352462306a36Sopenharmony_ci
352562306a36Sopenharmony_ci	/* fixup right node */
352662306a36Sopenharmony_ci	if (push_items > right_nritems)
352762306a36Sopenharmony_ci		WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
352862306a36Sopenharmony_ci		       right_nritems);
352962306a36Sopenharmony_ci
353062306a36Sopenharmony_ci	if (push_items < right_nritems) {
353162306a36Sopenharmony_ci		push_space = btrfs_item_offset(right, push_items - 1) -
353262306a36Sopenharmony_ci						  leaf_data_end(right);
353362306a36Sopenharmony_ci		memmove_leaf_data(right,
353462306a36Sopenharmony_ci				  BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
353562306a36Sopenharmony_ci				  leaf_data_end(right), push_space);
353662306a36Sopenharmony_ci
353762306a36Sopenharmony_ci		memmove_leaf_items(right, 0, push_items,
353862306a36Sopenharmony_ci				   btrfs_header_nritems(right) - push_items);
353962306a36Sopenharmony_ci	}
354062306a36Sopenharmony_ci
354162306a36Sopenharmony_ci	btrfs_init_map_token(&token, right);
354262306a36Sopenharmony_ci	right_nritems -= push_items;
354362306a36Sopenharmony_ci	btrfs_set_header_nritems(right, right_nritems);
354462306a36Sopenharmony_ci	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
354562306a36Sopenharmony_ci	for (i = 0; i < right_nritems; i++) {
354662306a36Sopenharmony_ci		push_space = push_space - btrfs_token_item_size(&token, i);
354762306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, i, push_space);
354862306a36Sopenharmony_ci	}
354962306a36Sopenharmony_ci
355062306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, left);
355162306a36Sopenharmony_ci	if (right_nritems)
355262306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, right);
355362306a36Sopenharmony_ci	else
355462306a36Sopenharmony_ci		btrfs_clear_buffer_dirty(trans, right);
355562306a36Sopenharmony_ci
355662306a36Sopenharmony_ci	btrfs_item_key(right, &disk_key, 0);
355762306a36Sopenharmony_ci	fixup_low_keys(trans, path, &disk_key, 1);
355862306a36Sopenharmony_ci
355962306a36Sopenharmony_ci	/* then fixup the leaf pointer in the path */
356062306a36Sopenharmony_ci	if (path->slots[0] < push_items) {
356162306a36Sopenharmony_ci		path->slots[0] += old_left_nritems;
356262306a36Sopenharmony_ci		btrfs_tree_unlock(path->nodes[0]);
356362306a36Sopenharmony_ci		free_extent_buffer(path->nodes[0]);
356462306a36Sopenharmony_ci		path->nodes[0] = left;
356562306a36Sopenharmony_ci		path->slots[1] -= 1;
356662306a36Sopenharmony_ci	} else {
356762306a36Sopenharmony_ci		btrfs_tree_unlock(left);
356862306a36Sopenharmony_ci		free_extent_buffer(left);
356962306a36Sopenharmony_ci		path->slots[0] -= push_items;
357062306a36Sopenharmony_ci	}
357162306a36Sopenharmony_ci	BUG_ON(path->slots[0] < 0);
357262306a36Sopenharmony_ci	return ret;
357362306a36Sopenharmony_ciout:
357462306a36Sopenharmony_ci	btrfs_tree_unlock(left);
357562306a36Sopenharmony_ci	free_extent_buffer(left);
357662306a36Sopenharmony_ci	return ret;
357762306a36Sopenharmony_ci}
357862306a36Sopenharmony_ci
357962306a36Sopenharmony_ci/*
358062306a36Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at
358162306a36Sopenharmony_ci * least data_size bytes.  returns zero if the push worked, nonzero otherwise
358262306a36Sopenharmony_ci *
358362306a36Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items.  The
358462306a36Sopenharmony_ci * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
358562306a36Sopenharmony_ci * items
358662306a36Sopenharmony_ci */
358762306a36Sopenharmony_cistatic int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
358862306a36Sopenharmony_ci			  *root, struct btrfs_path *path, int min_data_size,
358962306a36Sopenharmony_ci			  int data_size, int empty, u32 max_slot)
359062306a36Sopenharmony_ci{
359162306a36Sopenharmony_ci	struct extent_buffer *right = path->nodes[0];
359262306a36Sopenharmony_ci	struct extent_buffer *left;
359362306a36Sopenharmony_ci	int slot;
359462306a36Sopenharmony_ci	int free_space;
359562306a36Sopenharmony_ci	u32 right_nritems;
359662306a36Sopenharmony_ci	int ret = 0;
359762306a36Sopenharmony_ci
359862306a36Sopenharmony_ci	slot = path->slots[1];
359962306a36Sopenharmony_ci	if (slot == 0)
360062306a36Sopenharmony_ci		return 1;
360162306a36Sopenharmony_ci	if (!path->nodes[1])
360262306a36Sopenharmony_ci		return 1;
360362306a36Sopenharmony_ci
360462306a36Sopenharmony_ci	right_nritems = btrfs_header_nritems(right);
360562306a36Sopenharmony_ci	if (right_nritems == 0)
360662306a36Sopenharmony_ci		return 1;
360762306a36Sopenharmony_ci
360862306a36Sopenharmony_ci	btrfs_assert_tree_write_locked(path->nodes[1]);
360962306a36Sopenharmony_ci
361062306a36Sopenharmony_ci	left = btrfs_read_node_slot(path->nodes[1], slot - 1);
361162306a36Sopenharmony_ci	if (IS_ERR(left))
361262306a36Sopenharmony_ci		return PTR_ERR(left);
361362306a36Sopenharmony_ci
361462306a36Sopenharmony_ci	__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
361562306a36Sopenharmony_ci
361662306a36Sopenharmony_ci	free_space = btrfs_leaf_free_space(left);
361762306a36Sopenharmony_ci	if (free_space < data_size) {
361862306a36Sopenharmony_ci		ret = 1;
361962306a36Sopenharmony_ci		goto out;
362062306a36Sopenharmony_ci	}
362162306a36Sopenharmony_ci
362262306a36Sopenharmony_ci	ret = btrfs_cow_block(trans, root, left,
362362306a36Sopenharmony_ci			      path->nodes[1], slot - 1, &left,
362462306a36Sopenharmony_ci			      BTRFS_NESTING_LEFT_COW);
362562306a36Sopenharmony_ci	if (ret) {
362662306a36Sopenharmony_ci		/* we hit -ENOSPC, but it isn't fatal here */
362762306a36Sopenharmony_ci		if (ret == -ENOSPC)
362862306a36Sopenharmony_ci			ret = 1;
362962306a36Sopenharmony_ci		goto out;
363062306a36Sopenharmony_ci	}
363162306a36Sopenharmony_ci
363262306a36Sopenharmony_ci	if (check_sibling_keys(left, right)) {
363362306a36Sopenharmony_ci		ret = -EUCLEAN;
363462306a36Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
363562306a36Sopenharmony_ci		goto out;
363662306a36Sopenharmony_ci	}
363762306a36Sopenharmony_ci	return __push_leaf_left(trans, path, min_data_size, empty, left,
363862306a36Sopenharmony_ci				free_space, right_nritems, max_slot);
363962306a36Sopenharmony_ciout:
364062306a36Sopenharmony_ci	btrfs_tree_unlock(left);
364162306a36Sopenharmony_ci	free_extent_buffer(left);
364262306a36Sopenharmony_ci	return ret;
364362306a36Sopenharmony_ci}
364462306a36Sopenharmony_ci
364562306a36Sopenharmony_ci/*
364662306a36Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size
364762306a36Sopenharmony_ci * available for the resulting leaf level of the path.
364862306a36Sopenharmony_ci */
364962306a36Sopenharmony_cistatic noinline int copy_for_split(struct btrfs_trans_handle *trans,
365062306a36Sopenharmony_ci				   struct btrfs_path *path,
365162306a36Sopenharmony_ci				   struct extent_buffer *l,
365262306a36Sopenharmony_ci				   struct extent_buffer *right,
365362306a36Sopenharmony_ci				   int slot, int mid, int nritems)
365462306a36Sopenharmony_ci{
365562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
365662306a36Sopenharmony_ci	int data_copy_size;
365762306a36Sopenharmony_ci	int rt_data_off;
365862306a36Sopenharmony_ci	int i;
365962306a36Sopenharmony_ci	int ret;
366062306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
366162306a36Sopenharmony_ci	struct btrfs_map_token token;
366262306a36Sopenharmony_ci
366362306a36Sopenharmony_ci	nritems = nritems - mid;
366462306a36Sopenharmony_ci	btrfs_set_header_nritems(right, nritems);
366562306a36Sopenharmony_ci	data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l);
366662306a36Sopenharmony_ci
366762306a36Sopenharmony_ci	copy_leaf_items(right, l, 0, mid, nritems);
366862306a36Sopenharmony_ci
366962306a36Sopenharmony_ci	copy_leaf_data(right, l, BTRFS_LEAF_DATA_SIZE(fs_info) - data_copy_size,
367062306a36Sopenharmony_ci		       leaf_data_end(l), data_copy_size);
367162306a36Sopenharmony_ci
367262306a36Sopenharmony_ci	rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid);
367362306a36Sopenharmony_ci
367462306a36Sopenharmony_ci	btrfs_init_map_token(&token, right);
367562306a36Sopenharmony_ci	for (i = 0; i < nritems; i++) {
367662306a36Sopenharmony_ci		u32 ioff;
367762306a36Sopenharmony_ci
367862306a36Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, i);
367962306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, i, ioff + rt_data_off);
368062306a36Sopenharmony_ci	}
368162306a36Sopenharmony_ci
368262306a36Sopenharmony_ci	btrfs_set_header_nritems(l, mid);
368362306a36Sopenharmony_ci	btrfs_item_key(right, &disk_key, 0);
368462306a36Sopenharmony_ci	ret = insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
368562306a36Sopenharmony_ci	if (ret < 0)
368662306a36Sopenharmony_ci		return ret;
368762306a36Sopenharmony_ci
368862306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, right);
368962306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, l);
369062306a36Sopenharmony_ci	BUG_ON(path->slots[0] != slot);
369162306a36Sopenharmony_ci
369262306a36Sopenharmony_ci	if (mid <= slot) {
369362306a36Sopenharmony_ci		btrfs_tree_unlock(path->nodes[0]);
369462306a36Sopenharmony_ci		free_extent_buffer(path->nodes[0]);
369562306a36Sopenharmony_ci		path->nodes[0] = right;
369662306a36Sopenharmony_ci		path->slots[0] -= mid;
369762306a36Sopenharmony_ci		path->slots[1] += 1;
369862306a36Sopenharmony_ci	} else {
369962306a36Sopenharmony_ci		btrfs_tree_unlock(right);
370062306a36Sopenharmony_ci		free_extent_buffer(right);
370162306a36Sopenharmony_ci	}
370262306a36Sopenharmony_ci
370362306a36Sopenharmony_ci	BUG_ON(path->slots[0] < 0);
370462306a36Sopenharmony_ci
370562306a36Sopenharmony_ci	return 0;
370662306a36Sopenharmony_ci}
370762306a36Sopenharmony_ci
370862306a36Sopenharmony_ci/*
370962306a36Sopenharmony_ci * double splits happen when we need to insert a big item in the middle
371062306a36Sopenharmony_ci * of a leaf.  A double split can leave us with 3 mostly empty leaves:
371162306a36Sopenharmony_ci * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
371262306a36Sopenharmony_ci *          A                 B                 C
371362306a36Sopenharmony_ci *
371462306a36Sopenharmony_ci * We avoid this by trying to push the items on either side of our target
371562306a36Sopenharmony_ci * into the adjacent leaves.  If all goes well we can avoid the double split
371662306a36Sopenharmony_ci * completely.
371762306a36Sopenharmony_ci */
371862306a36Sopenharmony_cistatic noinline int push_for_double_split(struct btrfs_trans_handle *trans,
371962306a36Sopenharmony_ci					  struct btrfs_root *root,
372062306a36Sopenharmony_ci					  struct btrfs_path *path,
372162306a36Sopenharmony_ci					  int data_size)
372262306a36Sopenharmony_ci{
372362306a36Sopenharmony_ci	int ret;
372462306a36Sopenharmony_ci	int progress = 0;
372562306a36Sopenharmony_ci	int slot;
372662306a36Sopenharmony_ci	u32 nritems;
372762306a36Sopenharmony_ci	int space_needed = data_size;
372862306a36Sopenharmony_ci
372962306a36Sopenharmony_ci	slot = path->slots[0];
373062306a36Sopenharmony_ci	if (slot < btrfs_header_nritems(path->nodes[0]))
373162306a36Sopenharmony_ci		space_needed -= btrfs_leaf_free_space(path->nodes[0]);
373262306a36Sopenharmony_ci
373362306a36Sopenharmony_ci	/*
373462306a36Sopenharmony_ci	 * try to push all the items after our slot into the
373562306a36Sopenharmony_ci	 * right leaf
373662306a36Sopenharmony_ci	 */
373762306a36Sopenharmony_ci	ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
373862306a36Sopenharmony_ci	if (ret < 0)
373962306a36Sopenharmony_ci		return ret;
374062306a36Sopenharmony_ci
374162306a36Sopenharmony_ci	if (ret == 0)
374262306a36Sopenharmony_ci		progress++;
374362306a36Sopenharmony_ci
374462306a36Sopenharmony_ci	nritems = btrfs_header_nritems(path->nodes[0]);
374562306a36Sopenharmony_ci	/*
374662306a36Sopenharmony_ci	 * our goal is to get our slot at the start or end of a leaf.  If
374762306a36Sopenharmony_ci	 * we've done so we're done
374862306a36Sopenharmony_ci	 */
374962306a36Sopenharmony_ci	if (path->slots[0] == 0 || path->slots[0] == nritems)
375062306a36Sopenharmony_ci		return 0;
375162306a36Sopenharmony_ci
375262306a36Sopenharmony_ci	if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
375362306a36Sopenharmony_ci		return 0;
375462306a36Sopenharmony_ci
375562306a36Sopenharmony_ci	/* try to push all the items before our slot into the next leaf */
375662306a36Sopenharmony_ci	slot = path->slots[0];
375762306a36Sopenharmony_ci	space_needed = data_size;
375862306a36Sopenharmony_ci	if (slot > 0)
375962306a36Sopenharmony_ci		space_needed -= btrfs_leaf_free_space(path->nodes[0]);
376062306a36Sopenharmony_ci	ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
376162306a36Sopenharmony_ci	if (ret < 0)
376262306a36Sopenharmony_ci		return ret;
376362306a36Sopenharmony_ci
376462306a36Sopenharmony_ci	if (ret == 0)
376562306a36Sopenharmony_ci		progress++;
376662306a36Sopenharmony_ci
376762306a36Sopenharmony_ci	if (progress)
376862306a36Sopenharmony_ci		return 0;
376962306a36Sopenharmony_ci	return 1;
377062306a36Sopenharmony_ci}
377162306a36Sopenharmony_ci
377262306a36Sopenharmony_ci/*
377362306a36Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size
377462306a36Sopenharmony_ci * available for the resulting leaf level of the path.
377562306a36Sopenharmony_ci *
377662306a36Sopenharmony_ci * returns 0 if all went well and < 0 on failure.
377762306a36Sopenharmony_ci */
377862306a36Sopenharmony_cistatic noinline int split_leaf(struct btrfs_trans_handle *trans,
377962306a36Sopenharmony_ci			       struct btrfs_root *root,
378062306a36Sopenharmony_ci			       const struct btrfs_key *ins_key,
378162306a36Sopenharmony_ci			       struct btrfs_path *path, int data_size,
378262306a36Sopenharmony_ci			       int extend)
378362306a36Sopenharmony_ci{
378462306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
378562306a36Sopenharmony_ci	struct extent_buffer *l;
378662306a36Sopenharmony_ci	u32 nritems;
378762306a36Sopenharmony_ci	int mid;
378862306a36Sopenharmony_ci	int slot;
378962306a36Sopenharmony_ci	struct extent_buffer *right;
379062306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
379162306a36Sopenharmony_ci	int ret = 0;
379262306a36Sopenharmony_ci	int wret;
379362306a36Sopenharmony_ci	int split;
379462306a36Sopenharmony_ci	int num_doubles = 0;
379562306a36Sopenharmony_ci	int tried_avoid_double = 0;
379662306a36Sopenharmony_ci
379762306a36Sopenharmony_ci	l = path->nodes[0];
379862306a36Sopenharmony_ci	slot = path->slots[0];
379962306a36Sopenharmony_ci	if (extend && data_size + btrfs_item_size(l, slot) +
380062306a36Sopenharmony_ci	    sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
380162306a36Sopenharmony_ci		return -EOVERFLOW;
380262306a36Sopenharmony_ci
380362306a36Sopenharmony_ci	/* first try to make some room by pushing left and right */
380462306a36Sopenharmony_ci	if (data_size && path->nodes[1]) {
380562306a36Sopenharmony_ci		int space_needed = data_size;
380662306a36Sopenharmony_ci
380762306a36Sopenharmony_ci		if (slot < btrfs_header_nritems(l))
380862306a36Sopenharmony_ci			space_needed -= btrfs_leaf_free_space(l);
380962306a36Sopenharmony_ci
381062306a36Sopenharmony_ci		wret = push_leaf_right(trans, root, path, space_needed,
381162306a36Sopenharmony_ci				       space_needed, 0, 0);
381262306a36Sopenharmony_ci		if (wret < 0)
381362306a36Sopenharmony_ci			return wret;
381462306a36Sopenharmony_ci		if (wret) {
381562306a36Sopenharmony_ci			space_needed = data_size;
381662306a36Sopenharmony_ci			if (slot > 0)
381762306a36Sopenharmony_ci				space_needed -= btrfs_leaf_free_space(l);
381862306a36Sopenharmony_ci			wret = push_leaf_left(trans, root, path, space_needed,
381962306a36Sopenharmony_ci					      space_needed, 0, (u32)-1);
382062306a36Sopenharmony_ci			if (wret < 0)
382162306a36Sopenharmony_ci				return wret;
382262306a36Sopenharmony_ci		}
382362306a36Sopenharmony_ci		l = path->nodes[0];
382462306a36Sopenharmony_ci
382562306a36Sopenharmony_ci		/* did the pushes work? */
382662306a36Sopenharmony_ci		if (btrfs_leaf_free_space(l) >= data_size)
382762306a36Sopenharmony_ci			return 0;
382862306a36Sopenharmony_ci	}
382962306a36Sopenharmony_ci
383062306a36Sopenharmony_ci	if (!path->nodes[1]) {
383162306a36Sopenharmony_ci		ret = insert_new_root(trans, root, path, 1);
383262306a36Sopenharmony_ci		if (ret)
383362306a36Sopenharmony_ci			return ret;
383462306a36Sopenharmony_ci	}
383562306a36Sopenharmony_ciagain:
383662306a36Sopenharmony_ci	split = 1;
383762306a36Sopenharmony_ci	l = path->nodes[0];
383862306a36Sopenharmony_ci	slot = path->slots[0];
383962306a36Sopenharmony_ci	nritems = btrfs_header_nritems(l);
384062306a36Sopenharmony_ci	mid = (nritems + 1) / 2;
384162306a36Sopenharmony_ci
384262306a36Sopenharmony_ci	if (mid <= slot) {
384362306a36Sopenharmony_ci		if (nritems == 1 ||
384462306a36Sopenharmony_ci		    leaf_space_used(l, mid, nritems - mid) + data_size >
384562306a36Sopenharmony_ci			BTRFS_LEAF_DATA_SIZE(fs_info)) {
384662306a36Sopenharmony_ci			if (slot >= nritems) {
384762306a36Sopenharmony_ci				split = 0;
384862306a36Sopenharmony_ci			} else {
384962306a36Sopenharmony_ci				mid = slot;
385062306a36Sopenharmony_ci				if (mid != nritems &&
385162306a36Sopenharmony_ci				    leaf_space_used(l, mid, nritems - mid) +
385262306a36Sopenharmony_ci				    data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
385362306a36Sopenharmony_ci					if (data_size && !tried_avoid_double)
385462306a36Sopenharmony_ci						goto push_for_double;
385562306a36Sopenharmony_ci					split = 2;
385662306a36Sopenharmony_ci				}
385762306a36Sopenharmony_ci			}
385862306a36Sopenharmony_ci		}
385962306a36Sopenharmony_ci	} else {
386062306a36Sopenharmony_ci		if (leaf_space_used(l, 0, mid) + data_size >
386162306a36Sopenharmony_ci			BTRFS_LEAF_DATA_SIZE(fs_info)) {
386262306a36Sopenharmony_ci			if (!extend && data_size && slot == 0) {
386362306a36Sopenharmony_ci				split = 0;
386462306a36Sopenharmony_ci			} else if ((extend || !data_size) && slot == 0) {
386562306a36Sopenharmony_ci				mid = 1;
386662306a36Sopenharmony_ci			} else {
386762306a36Sopenharmony_ci				mid = slot;
386862306a36Sopenharmony_ci				if (mid != nritems &&
386962306a36Sopenharmony_ci				    leaf_space_used(l, mid, nritems - mid) +
387062306a36Sopenharmony_ci				    data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
387162306a36Sopenharmony_ci					if (data_size && !tried_avoid_double)
387262306a36Sopenharmony_ci						goto push_for_double;
387362306a36Sopenharmony_ci					split = 2;
387462306a36Sopenharmony_ci				}
387562306a36Sopenharmony_ci			}
387662306a36Sopenharmony_ci		}
387762306a36Sopenharmony_ci	}
387862306a36Sopenharmony_ci
387962306a36Sopenharmony_ci	if (split == 0)
388062306a36Sopenharmony_ci		btrfs_cpu_key_to_disk(&disk_key, ins_key);
388162306a36Sopenharmony_ci	else
388262306a36Sopenharmony_ci		btrfs_item_key(l, &disk_key, mid);
388362306a36Sopenharmony_ci
388462306a36Sopenharmony_ci	/*
388562306a36Sopenharmony_ci	 * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double
388662306a36Sopenharmony_ci	 * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES
388762306a36Sopenharmony_ci	 * subclasses, which is 8 at the time of this patch, and we've maxed it
388862306a36Sopenharmony_ci	 * out.  In the future we could add a
388962306a36Sopenharmony_ci	 * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
389062306a36Sopenharmony_ci	 * use BTRFS_NESTING_NEW_ROOT.
389162306a36Sopenharmony_ci	 */
389262306a36Sopenharmony_ci	right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
389362306a36Sopenharmony_ci				       &disk_key, 0, l->start, 0,
389462306a36Sopenharmony_ci				       num_doubles ? BTRFS_NESTING_NEW_ROOT :
389562306a36Sopenharmony_ci				       BTRFS_NESTING_SPLIT);
389662306a36Sopenharmony_ci	if (IS_ERR(right))
389762306a36Sopenharmony_ci		return PTR_ERR(right);
389862306a36Sopenharmony_ci
389962306a36Sopenharmony_ci	root_add_used(root, fs_info->nodesize);
390062306a36Sopenharmony_ci
390162306a36Sopenharmony_ci	if (split == 0) {
390262306a36Sopenharmony_ci		if (mid <= slot) {
390362306a36Sopenharmony_ci			btrfs_set_header_nritems(right, 0);
390462306a36Sopenharmony_ci			ret = insert_ptr(trans, path, &disk_key,
390562306a36Sopenharmony_ci					 right->start, path->slots[1] + 1, 1);
390662306a36Sopenharmony_ci			if (ret < 0) {
390762306a36Sopenharmony_ci				btrfs_tree_unlock(right);
390862306a36Sopenharmony_ci				free_extent_buffer(right);
390962306a36Sopenharmony_ci				return ret;
391062306a36Sopenharmony_ci			}
391162306a36Sopenharmony_ci			btrfs_tree_unlock(path->nodes[0]);
391262306a36Sopenharmony_ci			free_extent_buffer(path->nodes[0]);
391362306a36Sopenharmony_ci			path->nodes[0] = right;
391462306a36Sopenharmony_ci			path->slots[0] = 0;
391562306a36Sopenharmony_ci			path->slots[1] += 1;
391662306a36Sopenharmony_ci		} else {
391762306a36Sopenharmony_ci			btrfs_set_header_nritems(right, 0);
391862306a36Sopenharmony_ci			ret = insert_ptr(trans, path, &disk_key,
391962306a36Sopenharmony_ci					 right->start, path->slots[1], 1);
392062306a36Sopenharmony_ci			if (ret < 0) {
392162306a36Sopenharmony_ci				btrfs_tree_unlock(right);
392262306a36Sopenharmony_ci				free_extent_buffer(right);
392362306a36Sopenharmony_ci				return ret;
392462306a36Sopenharmony_ci			}
392562306a36Sopenharmony_ci			btrfs_tree_unlock(path->nodes[0]);
392662306a36Sopenharmony_ci			free_extent_buffer(path->nodes[0]);
392762306a36Sopenharmony_ci			path->nodes[0] = right;
392862306a36Sopenharmony_ci			path->slots[0] = 0;
392962306a36Sopenharmony_ci			if (path->slots[1] == 0)
393062306a36Sopenharmony_ci				fixup_low_keys(trans, path, &disk_key, 1);
393162306a36Sopenharmony_ci		}
393262306a36Sopenharmony_ci		/*
393362306a36Sopenharmony_ci		 * We create a new leaf 'right' for the required ins_len and
393462306a36Sopenharmony_ci		 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
393562306a36Sopenharmony_ci		 * the content of ins_len to 'right'.
393662306a36Sopenharmony_ci		 */
393762306a36Sopenharmony_ci		return ret;
393862306a36Sopenharmony_ci	}
393962306a36Sopenharmony_ci
394062306a36Sopenharmony_ci	ret = copy_for_split(trans, path, l, right, slot, mid, nritems);
394162306a36Sopenharmony_ci	if (ret < 0) {
394262306a36Sopenharmony_ci		btrfs_tree_unlock(right);
394362306a36Sopenharmony_ci		free_extent_buffer(right);
394462306a36Sopenharmony_ci		return ret;
394562306a36Sopenharmony_ci	}
394662306a36Sopenharmony_ci
394762306a36Sopenharmony_ci	if (split == 2) {
394862306a36Sopenharmony_ci		BUG_ON(num_doubles != 0);
394962306a36Sopenharmony_ci		num_doubles++;
395062306a36Sopenharmony_ci		goto again;
395162306a36Sopenharmony_ci	}
395262306a36Sopenharmony_ci
395362306a36Sopenharmony_ci	return 0;
395462306a36Sopenharmony_ci
395562306a36Sopenharmony_cipush_for_double:
395662306a36Sopenharmony_ci	push_for_double_split(trans, root, path, data_size);
395762306a36Sopenharmony_ci	tried_avoid_double = 1;
395862306a36Sopenharmony_ci	if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
395962306a36Sopenharmony_ci		return 0;
396062306a36Sopenharmony_ci	goto again;
396162306a36Sopenharmony_ci}
396262306a36Sopenharmony_ci
396362306a36Sopenharmony_cistatic noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
396462306a36Sopenharmony_ci					 struct btrfs_root *root,
396562306a36Sopenharmony_ci					 struct btrfs_path *path, int ins_len)
396662306a36Sopenharmony_ci{
396762306a36Sopenharmony_ci	struct btrfs_key key;
396862306a36Sopenharmony_ci	struct extent_buffer *leaf;
396962306a36Sopenharmony_ci	struct btrfs_file_extent_item *fi;
397062306a36Sopenharmony_ci	u64 extent_len = 0;
397162306a36Sopenharmony_ci	u32 item_size;
397262306a36Sopenharmony_ci	int ret;
397362306a36Sopenharmony_ci
397462306a36Sopenharmony_ci	leaf = path->nodes[0];
397562306a36Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
397662306a36Sopenharmony_ci
397762306a36Sopenharmony_ci	BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
397862306a36Sopenharmony_ci	       key.type != BTRFS_EXTENT_CSUM_KEY);
397962306a36Sopenharmony_ci
398062306a36Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) >= ins_len)
398162306a36Sopenharmony_ci		return 0;
398262306a36Sopenharmony_ci
398362306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, path->slots[0]);
398462306a36Sopenharmony_ci	if (key.type == BTRFS_EXTENT_DATA_KEY) {
398562306a36Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
398662306a36Sopenharmony_ci				    struct btrfs_file_extent_item);
398762306a36Sopenharmony_ci		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
398862306a36Sopenharmony_ci	}
398962306a36Sopenharmony_ci	btrfs_release_path(path);
399062306a36Sopenharmony_ci
399162306a36Sopenharmony_ci	path->keep_locks = 1;
399262306a36Sopenharmony_ci	path->search_for_split = 1;
399362306a36Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
399462306a36Sopenharmony_ci	path->search_for_split = 0;
399562306a36Sopenharmony_ci	if (ret > 0)
399662306a36Sopenharmony_ci		ret = -EAGAIN;
399762306a36Sopenharmony_ci	if (ret < 0)
399862306a36Sopenharmony_ci		goto err;
399962306a36Sopenharmony_ci
400062306a36Sopenharmony_ci	ret = -EAGAIN;
400162306a36Sopenharmony_ci	leaf = path->nodes[0];
400262306a36Sopenharmony_ci	/* if our item isn't there, return now */
400362306a36Sopenharmony_ci	if (item_size != btrfs_item_size(leaf, path->slots[0]))
400462306a36Sopenharmony_ci		goto err;
400562306a36Sopenharmony_ci
400662306a36Sopenharmony_ci	/* the leaf has  changed, it now has room.  return now */
400762306a36Sopenharmony_ci	if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
400862306a36Sopenharmony_ci		goto err;
400962306a36Sopenharmony_ci
401062306a36Sopenharmony_ci	if (key.type == BTRFS_EXTENT_DATA_KEY) {
401162306a36Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
401262306a36Sopenharmony_ci				    struct btrfs_file_extent_item);
401362306a36Sopenharmony_ci		if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
401462306a36Sopenharmony_ci			goto err;
401562306a36Sopenharmony_ci	}
401662306a36Sopenharmony_ci
401762306a36Sopenharmony_ci	ret = split_leaf(trans, root, &key, path, ins_len, 1);
401862306a36Sopenharmony_ci	if (ret)
401962306a36Sopenharmony_ci		goto err;
402062306a36Sopenharmony_ci
402162306a36Sopenharmony_ci	path->keep_locks = 0;
402262306a36Sopenharmony_ci	btrfs_unlock_up_safe(path, 1);
402362306a36Sopenharmony_ci	return 0;
402462306a36Sopenharmony_cierr:
402562306a36Sopenharmony_ci	path->keep_locks = 0;
402662306a36Sopenharmony_ci	return ret;
402762306a36Sopenharmony_ci}
402862306a36Sopenharmony_ci
402962306a36Sopenharmony_cistatic noinline int split_item(struct btrfs_trans_handle *trans,
403062306a36Sopenharmony_ci			       struct btrfs_path *path,
403162306a36Sopenharmony_ci			       const struct btrfs_key *new_key,
403262306a36Sopenharmony_ci			       unsigned long split_offset)
403362306a36Sopenharmony_ci{
403462306a36Sopenharmony_ci	struct extent_buffer *leaf;
403562306a36Sopenharmony_ci	int orig_slot, slot;
403662306a36Sopenharmony_ci	char *buf;
403762306a36Sopenharmony_ci	u32 nritems;
403862306a36Sopenharmony_ci	u32 item_size;
403962306a36Sopenharmony_ci	u32 orig_offset;
404062306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
404162306a36Sopenharmony_ci
404262306a36Sopenharmony_ci	leaf = path->nodes[0];
404362306a36Sopenharmony_ci	/*
404462306a36Sopenharmony_ci	 * Shouldn't happen because the caller must have previously called
404562306a36Sopenharmony_ci	 * setup_leaf_for_split() to make room for the new item in the leaf.
404662306a36Sopenharmony_ci	 */
404762306a36Sopenharmony_ci	if (WARN_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)))
404862306a36Sopenharmony_ci		return -ENOSPC;
404962306a36Sopenharmony_ci
405062306a36Sopenharmony_ci	orig_slot = path->slots[0];
405162306a36Sopenharmony_ci	orig_offset = btrfs_item_offset(leaf, path->slots[0]);
405262306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, path->slots[0]);
405362306a36Sopenharmony_ci
405462306a36Sopenharmony_ci	buf = kmalloc(item_size, GFP_NOFS);
405562306a36Sopenharmony_ci	if (!buf)
405662306a36Sopenharmony_ci		return -ENOMEM;
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
405962306a36Sopenharmony_ci			    path->slots[0]), item_size);
406062306a36Sopenharmony_ci
406162306a36Sopenharmony_ci	slot = path->slots[0] + 1;
406262306a36Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
406362306a36Sopenharmony_ci	if (slot != nritems) {
406462306a36Sopenharmony_ci		/* shift the items */
406562306a36Sopenharmony_ci		memmove_leaf_items(leaf, slot + 1, slot, nritems - slot);
406662306a36Sopenharmony_ci	}
406762306a36Sopenharmony_ci
406862306a36Sopenharmony_ci	btrfs_cpu_key_to_disk(&disk_key, new_key);
406962306a36Sopenharmony_ci	btrfs_set_item_key(leaf, &disk_key, slot);
407062306a36Sopenharmony_ci
407162306a36Sopenharmony_ci	btrfs_set_item_offset(leaf, slot, orig_offset);
407262306a36Sopenharmony_ci	btrfs_set_item_size(leaf, slot, item_size - split_offset);
407362306a36Sopenharmony_ci
407462306a36Sopenharmony_ci	btrfs_set_item_offset(leaf, orig_slot,
407562306a36Sopenharmony_ci				 orig_offset + item_size - split_offset);
407662306a36Sopenharmony_ci	btrfs_set_item_size(leaf, orig_slot, split_offset);
407762306a36Sopenharmony_ci
407862306a36Sopenharmony_ci	btrfs_set_header_nritems(leaf, nritems + 1);
407962306a36Sopenharmony_ci
408062306a36Sopenharmony_ci	/* write the data for the start of the original item */
408162306a36Sopenharmony_ci	write_extent_buffer(leaf, buf,
408262306a36Sopenharmony_ci			    btrfs_item_ptr_offset(leaf, path->slots[0]),
408362306a36Sopenharmony_ci			    split_offset);
408462306a36Sopenharmony_ci
408562306a36Sopenharmony_ci	/* write the data for the new item */
408662306a36Sopenharmony_ci	write_extent_buffer(leaf, buf + split_offset,
408762306a36Sopenharmony_ci			    btrfs_item_ptr_offset(leaf, slot),
408862306a36Sopenharmony_ci			    item_size - split_offset);
408962306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
409062306a36Sopenharmony_ci
409162306a36Sopenharmony_ci	BUG_ON(btrfs_leaf_free_space(leaf) < 0);
409262306a36Sopenharmony_ci	kfree(buf);
409362306a36Sopenharmony_ci	return 0;
409462306a36Sopenharmony_ci}
409562306a36Sopenharmony_ci
409662306a36Sopenharmony_ci/*
409762306a36Sopenharmony_ci * This function splits a single item into two items,
409862306a36Sopenharmony_ci * giving 'new_key' to the new item and splitting the
409962306a36Sopenharmony_ci * old one at split_offset (from the start of the item).
410062306a36Sopenharmony_ci *
410162306a36Sopenharmony_ci * The path may be released by this operation.  After
410262306a36Sopenharmony_ci * the split, the path is pointing to the old item.  The
410362306a36Sopenharmony_ci * new item is going to be in the same node as the old one.
410462306a36Sopenharmony_ci *
410562306a36Sopenharmony_ci * Note, the item being split must be smaller enough to live alone on
410662306a36Sopenharmony_ci * a tree block with room for one extra struct btrfs_item
410762306a36Sopenharmony_ci *
410862306a36Sopenharmony_ci * This allows us to split the item in place, keeping a lock on the
410962306a36Sopenharmony_ci * leaf the entire time.
411062306a36Sopenharmony_ci */
411162306a36Sopenharmony_ciint btrfs_split_item(struct btrfs_trans_handle *trans,
411262306a36Sopenharmony_ci		     struct btrfs_root *root,
411362306a36Sopenharmony_ci		     struct btrfs_path *path,
411462306a36Sopenharmony_ci		     const struct btrfs_key *new_key,
411562306a36Sopenharmony_ci		     unsigned long split_offset)
411662306a36Sopenharmony_ci{
411762306a36Sopenharmony_ci	int ret;
411862306a36Sopenharmony_ci	ret = setup_leaf_for_split(trans, root, path,
411962306a36Sopenharmony_ci				   sizeof(struct btrfs_item));
412062306a36Sopenharmony_ci	if (ret)
412162306a36Sopenharmony_ci		return ret;
412262306a36Sopenharmony_ci
412362306a36Sopenharmony_ci	ret = split_item(trans, path, new_key, split_offset);
412462306a36Sopenharmony_ci	return ret;
412562306a36Sopenharmony_ci}
412662306a36Sopenharmony_ci
412762306a36Sopenharmony_ci/*
412862306a36Sopenharmony_ci * make the item pointed to by the path smaller.  new_size indicates
412962306a36Sopenharmony_ci * how small to make it, and from_end tells us if we just chop bytes
413062306a36Sopenharmony_ci * off the end of the item or if we shift the item to chop bytes off
413162306a36Sopenharmony_ci * the front.
413262306a36Sopenharmony_ci */
413362306a36Sopenharmony_civoid btrfs_truncate_item(struct btrfs_trans_handle *trans,
413462306a36Sopenharmony_ci			 struct btrfs_path *path, u32 new_size, int from_end)
413562306a36Sopenharmony_ci{
413662306a36Sopenharmony_ci	int slot;
413762306a36Sopenharmony_ci	struct extent_buffer *leaf;
413862306a36Sopenharmony_ci	u32 nritems;
413962306a36Sopenharmony_ci	unsigned int data_end;
414062306a36Sopenharmony_ci	unsigned int old_data_start;
414162306a36Sopenharmony_ci	unsigned int old_size;
414262306a36Sopenharmony_ci	unsigned int size_diff;
414362306a36Sopenharmony_ci	int i;
414462306a36Sopenharmony_ci	struct btrfs_map_token token;
414562306a36Sopenharmony_ci
414662306a36Sopenharmony_ci	leaf = path->nodes[0];
414762306a36Sopenharmony_ci	slot = path->slots[0];
414862306a36Sopenharmony_ci
414962306a36Sopenharmony_ci	old_size = btrfs_item_size(leaf, slot);
415062306a36Sopenharmony_ci	if (old_size == new_size)
415162306a36Sopenharmony_ci		return;
415262306a36Sopenharmony_ci
415362306a36Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
415462306a36Sopenharmony_ci	data_end = leaf_data_end(leaf);
415562306a36Sopenharmony_ci
415662306a36Sopenharmony_ci	old_data_start = btrfs_item_offset(leaf, slot);
415762306a36Sopenharmony_ci
415862306a36Sopenharmony_ci	size_diff = old_size - new_size;
415962306a36Sopenharmony_ci
416062306a36Sopenharmony_ci	BUG_ON(slot < 0);
416162306a36Sopenharmony_ci	BUG_ON(slot >= nritems);
416262306a36Sopenharmony_ci
416362306a36Sopenharmony_ci	/*
416462306a36Sopenharmony_ci	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
416562306a36Sopenharmony_ci	 */
416662306a36Sopenharmony_ci	/* first correct the data pointers */
416762306a36Sopenharmony_ci	btrfs_init_map_token(&token, leaf);
416862306a36Sopenharmony_ci	for (i = slot; i < nritems; i++) {
416962306a36Sopenharmony_ci		u32 ioff;
417062306a36Sopenharmony_ci
417162306a36Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, i);
417262306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, i, ioff + size_diff);
417362306a36Sopenharmony_ci	}
417462306a36Sopenharmony_ci
417562306a36Sopenharmony_ci	/* shift the data */
417662306a36Sopenharmony_ci	if (from_end) {
417762306a36Sopenharmony_ci		memmove_leaf_data(leaf, data_end + size_diff, data_end,
417862306a36Sopenharmony_ci				  old_data_start + new_size - data_end);
417962306a36Sopenharmony_ci	} else {
418062306a36Sopenharmony_ci		struct btrfs_disk_key disk_key;
418162306a36Sopenharmony_ci		u64 offset;
418262306a36Sopenharmony_ci
418362306a36Sopenharmony_ci		btrfs_item_key(leaf, &disk_key, slot);
418462306a36Sopenharmony_ci
418562306a36Sopenharmony_ci		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
418662306a36Sopenharmony_ci			unsigned long ptr;
418762306a36Sopenharmony_ci			struct btrfs_file_extent_item *fi;
418862306a36Sopenharmony_ci
418962306a36Sopenharmony_ci			fi = btrfs_item_ptr(leaf, slot,
419062306a36Sopenharmony_ci					    struct btrfs_file_extent_item);
419162306a36Sopenharmony_ci			fi = (struct btrfs_file_extent_item *)(
419262306a36Sopenharmony_ci			     (unsigned long)fi - size_diff);
419362306a36Sopenharmony_ci
419462306a36Sopenharmony_ci			if (btrfs_file_extent_type(leaf, fi) ==
419562306a36Sopenharmony_ci			    BTRFS_FILE_EXTENT_INLINE) {
419662306a36Sopenharmony_ci				ptr = btrfs_item_ptr_offset(leaf, slot);
419762306a36Sopenharmony_ci				memmove_extent_buffer(leaf, ptr,
419862306a36Sopenharmony_ci				      (unsigned long)fi,
419962306a36Sopenharmony_ci				      BTRFS_FILE_EXTENT_INLINE_DATA_START);
420062306a36Sopenharmony_ci			}
420162306a36Sopenharmony_ci		}
420262306a36Sopenharmony_ci
420362306a36Sopenharmony_ci		memmove_leaf_data(leaf, data_end + size_diff, data_end,
420462306a36Sopenharmony_ci				  old_data_start - data_end);
420562306a36Sopenharmony_ci
420662306a36Sopenharmony_ci		offset = btrfs_disk_key_offset(&disk_key);
420762306a36Sopenharmony_ci		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
420862306a36Sopenharmony_ci		btrfs_set_item_key(leaf, &disk_key, slot);
420962306a36Sopenharmony_ci		if (slot == 0)
421062306a36Sopenharmony_ci			fixup_low_keys(trans, path, &disk_key, 1);
421162306a36Sopenharmony_ci	}
421262306a36Sopenharmony_ci
421362306a36Sopenharmony_ci	btrfs_set_item_size(leaf, slot, new_size);
421462306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
421562306a36Sopenharmony_ci
421662306a36Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < 0) {
421762306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
421862306a36Sopenharmony_ci		BUG();
421962306a36Sopenharmony_ci	}
422062306a36Sopenharmony_ci}
422162306a36Sopenharmony_ci
422262306a36Sopenharmony_ci/*
422362306a36Sopenharmony_ci * make the item pointed to by the path bigger, data_size is the added size.
422462306a36Sopenharmony_ci */
422562306a36Sopenharmony_civoid btrfs_extend_item(struct btrfs_trans_handle *trans,
422662306a36Sopenharmony_ci		       struct btrfs_path *path, u32 data_size)
422762306a36Sopenharmony_ci{
422862306a36Sopenharmony_ci	int slot;
422962306a36Sopenharmony_ci	struct extent_buffer *leaf;
423062306a36Sopenharmony_ci	u32 nritems;
423162306a36Sopenharmony_ci	unsigned int data_end;
423262306a36Sopenharmony_ci	unsigned int old_data;
423362306a36Sopenharmony_ci	unsigned int old_size;
423462306a36Sopenharmony_ci	int i;
423562306a36Sopenharmony_ci	struct btrfs_map_token token;
423662306a36Sopenharmony_ci
423762306a36Sopenharmony_ci	leaf = path->nodes[0];
423862306a36Sopenharmony_ci
423962306a36Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
424062306a36Sopenharmony_ci	data_end = leaf_data_end(leaf);
424162306a36Sopenharmony_ci
424262306a36Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < data_size) {
424362306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
424462306a36Sopenharmony_ci		BUG();
424562306a36Sopenharmony_ci	}
424662306a36Sopenharmony_ci	slot = path->slots[0];
424762306a36Sopenharmony_ci	old_data = btrfs_item_data_end(leaf, slot);
424862306a36Sopenharmony_ci
424962306a36Sopenharmony_ci	BUG_ON(slot < 0);
425062306a36Sopenharmony_ci	if (slot >= nritems) {
425162306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
425262306a36Sopenharmony_ci		btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
425362306a36Sopenharmony_ci			   slot, nritems);
425462306a36Sopenharmony_ci		BUG();
425562306a36Sopenharmony_ci	}
425662306a36Sopenharmony_ci
425762306a36Sopenharmony_ci	/*
425862306a36Sopenharmony_ci	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
425962306a36Sopenharmony_ci	 */
426062306a36Sopenharmony_ci	/* first correct the data pointers */
426162306a36Sopenharmony_ci	btrfs_init_map_token(&token, leaf);
426262306a36Sopenharmony_ci	for (i = slot; i < nritems; i++) {
426362306a36Sopenharmony_ci		u32 ioff;
426462306a36Sopenharmony_ci
426562306a36Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, i);
426662306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, i, ioff - data_size);
426762306a36Sopenharmony_ci	}
426862306a36Sopenharmony_ci
426962306a36Sopenharmony_ci	/* shift the data */
427062306a36Sopenharmony_ci	memmove_leaf_data(leaf, data_end - data_size, data_end,
427162306a36Sopenharmony_ci			  old_data - data_end);
427262306a36Sopenharmony_ci
427362306a36Sopenharmony_ci	data_end = old_data;
427462306a36Sopenharmony_ci	old_size = btrfs_item_size(leaf, slot);
427562306a36Sopenharmony_ci	btrfs_set_item_size(leaf, slot, old_size + data_size);
427662306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
427762306a36Sopenharmony_ci
427862306a36Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < 0) {
427962306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
428062306a36Sopenharmony_ci		BUG();
428162306a36Sopenharmony_ci	}
428262306a36Sopenharmony_ci}
428362306a36Sopenharmony_ci
428462306a36Sopenharmony_ci/*
428562306a36Sopenharmony_ci * Make space in the node before inserting one or more items.
428662306a36Sopenharmony_ci *
428762306a36Sopenharmony_ci * @trans:	transaction handle
428862306a36Sopenharmony_ci * @root:	root we are inserting items to
428962306a36Sopenharmony_ci * @path:	points to the leaf/slot where we are going to insert new items
429062306a36Sopenharmony_ci * @batch:      information about the batch of items to insert
429162306a36Sopenharmony_ci *
429262306a36Sopenharmony_ci * Main purpose is to save stack depth by doing the bulk of the work in a
429362306a36Sopenharmony_ci * function that doesn't call btrfs_search_slot
429462306a36Sopenharmony_ci */
429562306a36Sopenharmony_cistatic void setup_items_for_insert(struct btrfs_trans_handle *trans,
429662306a36Sopenharmony_ci				   struct btrfs_root *root, struct btrfs_path *path,
429762306a36Sopenharmony_ci				   const struct btrfs_item_batch *batch)
429862306a36Sopenharmony_ci{
429962306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
430062306a36Sopenharmony_ci	int i;
430162306a36Sopenharmony_ci	u32 nritems;
430262306a36Sopenharmony_ci	unsigned int data_end;
430362306a36Sopenharmony_ci	struct btrfs_disk_key disk_key;
430462306a36Sopenharmony_ci	struct extent_buffer *leaf;
430562306a36Sopenharmony_ci	int slot;
430662306a36Sopenharmony_ci	struct btrfs_map_token token;
430762306a36Sopenharmony_ci	u32 total_size;
430862306a36Sopenharmony_ci
430962306a36Sopenharmony_ci	/*
431062306a36Sopenharmony_ci	 * Before anything else, update keys in the parent and other ancestors
431162306a36Sopenharmony_ci	 * if needed, then release the write locks on them, so that other tasks
431262306a36Sopenharmony_ci	 * can use them while we modify the leaf.
431362306a36Sopenharmony_ci	 */
431462306a36Sopenharmony_ci	if (path->slots[0] == 0) {
431562306a36Sopenharmony_ci		btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]);
431662306a36Sopenharmony_ci		fixup_low_keys(trans, path, &disk_key, 1);
431762306a36Sopenharmony_ci	}
431862306a36Sopenharmony_ci	btrfs_unlock_up_safe(path, 1);
431962306a36Sopenharmony_ci
432062306a36Sopenharmony_ci	leaf = path->nodes[0];
432162306a36Sopenharmony_ci	slot = path->slots[0];
432262306a36Sopenharmony_ci
432362306a36Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
432462306a36Sopenharmony_ci	data_end = leaf_data_end(leaf);
432562306a36Sopenharmony_ci	total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
432662306a36Sopenharmony_ci
432762306a36Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < total_size) {
432862306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
432962306a36Sopenharmony_ci		btrfs_crit(fs_info, "not enough freespace need %u have %d",
433062306a36Sopenharmony_ci			   total_size, btrfs_leaf_free_space(leaf));
433162306a36Sopenharmony_ci		BUG();
433262306a36Sopenharmony_ci	}
433362306a36Sopenharmony_ci
433462306a36Sopenharmony_ci	btrfs_init_map_token(&token, leaf);
433562306a36Sopenharmony_ci	if (slot != nritems) {
433662306a36Sopenharmony_ci		unsigned int old_data = btrfs_item_data_end(leaf, slot);
433762306a36Sopenharmony_ci
433862306a36Sopenharmony_ci		if (old_data < data_end) {
433962306a36Sopenharmony_ci			btrfs_print_leaf(leaf);
434062306a36Sopenharmony_ci			btrfs_crit(fs_info,
434162306a36Sopenharmony_ci		"item at slot %d with data offset %u beyond data end of leaf %u",
434262306a36Sopenharmony_ci				   slot, old_data, data_end);
434362306a36Sopenharmony_ci			BUG();
434462306a36Sopenharmony_ci		}
434562306a36Sopenharmony_ci		/*
434662306a36Sopenharmony_ci		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
434762306a36Sopenharmony_ci		 */
434862306a36Sopenharmony_ci		/* first correct the data pointers */
434962306a36Sopenharmony_ci		for (i = slot; i < nritems; i++) {
435062306a36Sopenharmony_ci			u32 ioff;
435162306a36Sopenharmony_ci
435262306a36Sopenharmony_ci			ioff = btrfs_token_item_offset(&token, i);
435362306a36Sopenharmony_ci			btrfs_set_token_item_offset(&token, i,
435462306a36Sopenharmony_ci						       ioff - batch->total_data_size);
435562306a36Sopenharmony_ci		}
435662306a36Sopenharmony_ci		/* shift the items */
435762306a36Sopenharmony_ci		memmove_leaf_items(leaf, slot + batch->nr, slot, nritems - slot);
435862306a36Sopenharmony_ci
435962306a36Sopenharmony_ci		/* shift the data */
436062306a36Sopenharmony_ci		memmove_leaf_data(leaf, data_end - batch->total_data_size,
436162306a36Sopenharmony_ci				  data_end, old_data - data_end);
436262306a36Sopenharmony_ci		data_end = old_data;
436362306a36Sopenharmony_ci	}
436462306a36Sopenharmony_ci
436562306a36Sopenharmony_ci	/* setup the item for the new data */
436662306a36Sopenharmony_ci	for (i = 0; i < batch->nr; i++) {
436762306a36Sopenharmony_ci		btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]);
436862306a36Sopenharmony_ci		btrfs_set_item_key(leaf, &disk_key, slot + i);
436962306a36Sopenharmony_ci		data_end -= batch->data_sizes[i];
437062306a36Sopenharmony_ci		btrfs_set_token_item_offset(&token, slot + i, data_end);
437162306a36Sopenharmony_ci		btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]);
437262306a36Sopenharmony_ci	}
437362306a36Sopenharmony_ci
437462306a36Sopenharmony_ci	btrfs_set_header_nritems(leaf, nritems + batch->nr);
437562306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, leaf);
437662306a36Sopenharmony_ci
437762306a36Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < 0) {
437862306a36Sopenharmony_ci		btrfs_print_leaf(leaf);
437962306a36Sopenharmony_ci		BUG();
438062306a36Sopenharmony_ci	}
438162306a36Sopenharmony_ci}
438262306a36Sopenharmony_ci
438362306a36Sopenharmony_ci/*
438462306a36Sopenharmony_ci * Insert a new item into a leaf.
438562306a36Sopenharmony_ci *
438662306a36Sopenharmony_ci * @trans:     Transaction handle.
438762306a36Sopenharmony_ci * @root:      The root of the btree.
438862306a36Sopenharmony_ci * @path:      A path pointing to the target leaf and slot.
438962306a36Sopenharmony_ci * @key:       The key of the new item.
439062306a36Sopenharmony_ci * @data_size: The size of the data associated with the new key.
439162306a36Sopenharmony_ci */
439262306a36Sopenharmony_civoid btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans,
439362306a36Sopenharmony_ci				 struct btrfs_root *root,
439462306a36Sopenharmony_ci				 struct btrfs_path *path,
439562306a36Sopenharmony_ci				 const struct btrfs_key *key,
439662306a36Sopenharmony_ci				 u32 data_size)
439762306a36Sopenharmony_ci{
439862306a36Sopenharmony_ci	struct btrfs_item_batch batch;
439962306a36Sopenharmony_ci
440062306a36Sopenharmony_ci	batch.keys = key;
440162306a36Sopenharmony_ci	batch.data_sizes = &data_size;
440262306a36Sopenharmony_ci	batch.total_data_size = data_size;
440362306a36Sopenharmony_ci	batch.nr = 1;
440462306a36Sopenharmony_ci
440562306a36Sopenharmony_ci	setup_items_for_insert(trans, root, path, &batch);
440662306a36Sopenharmony_ci}
440762306a36Sopenharmony_ci
440862306a36Sopenharmony_ci/*
440962306a36Sopenharmony_ci * Given a key and some data, insert items into the tree.
441062306a36Sopenharmony_ci * This does all the path init required, making room in the tree if needed.
441162306a36Sopenharmony_ci */
441262306a36Sopenharmony_ciint btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
441362306a36Sopenharmony_ci			    struct btrfs_root *root,
441462306a36Sopenharmony_ci			    struct btrfs_path *path,
441562306a36Sopenharmony_ci			    const struct btrfs_item_batch *batch)
441662306a36Sopenharmony_ci{
441762306a36Sopenharmony_ci	int ret = 0;
441862306a36Sopenharmony_ci	int slot;
441962306a36Sopenharmony_ci	u32 total_size;
442062306a36Sopenharmony_ci
442162306a36Sopenharmony_ci	total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item));
442262306a36Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1);
442362306a36Sopenharmony_ci	if (ret == 0)
442462306a36Sopenharmony_ci		return -EEXIST;
442562306a36Sopenharmony_ci	if (ret < 0)
442662306a36Sopenharmony_ci		return ret;
442762306a36Sopenharmony_ci
442862306a36Sopenharmony_ci	slot = path->slots[0];
442962306a36Sopenharmony_ci	BUG_ON(slot < 0);
443062306a36Sopenharmony_ci
443162306a36Sopenharmony_ci	setup_items_for_insert(trans, root, path, batch);
443262306a36Sopenharmony_ci	return 0;
443362306a36Sopenharmony_ci}
443462306a36Sopenharmony_ci
443562306a36Sopenharmony_ci/*
443662306a36Sopenharmony_ci * Given a key and some data, insert an item into the tree.
443762306a36Sopenharmony_ci * This does all the path init required, making room in the tree if needed.
443862306a36Sopenharmony_ci */
443962306a36Sopenharmony_ciint btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
444062306a36Sopenharmony_ci		      const struct btrfs_key *cpu_key, void *data,
444162306a36Sopenharmony_ci		      u32 data_size)
444262306a36Sopenharmony_ci{
444362306a36Sopenharmony_ci	int ret = 0;
444462306a36Sopenharmony_ci	struct btrfs_path *path;
444562306a36Sopenharmony_ci	struct extent_buffer *leaf;
444662306a36Sopenharmony_ci	unsigned long ptr;
444762306a36Sopenharmony_ci
444862306a36Sopenharmony_ci	path = btrfs_alloc_path();
444962306a36Sopenharmony_ci	if (!path)
445062306a36Sopenharmony_ci		return -ENOMEM;
445162306a36Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
445262306a36Sopenharmony_ci	if (!ret) {
445362306a36Sopenharmony_ci		leaf = path->nodes[0];
445462306a36Sopenharmony_ci		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
445562306a36Sopenharmony_ci		write_extent_buffer(leaf, data, ptr, data_size);
445662306a36Sopenharmony_ci		btrfs_mark_buffer_dirty(trans, leaf);
445762306a36Sopenharmony_ci	}
445862306a36Sopenharmony_ci	btrfs_free_path(path);
445962306a36Sopenharmony_ci	return ret;
446062306a36Sopenharmony_ci}
446162306a36Sopenharmony_ci
446262306a36Sopenharmony_ci/*
446362306a36Sopenharmony_ci * This function duplicates an item, giving 'new_key' to the new item.
446462306a36Sopenharmony_ci * It guarantees both items live in the same tree leaf and the new item is
446562306a36Sopenharmony_ci * contiguous with the original item.
446662306a36Sopenharmony_ci *
446762306a36Sopenharmony_ci * This allows us to split a file extent in place, keeping a lock on the leaf
446862306a36Sopenharmony_ci * the entire time.
446962306a36Sopenharmony_ci */
447062306a36Sopenharmony_ciint btrfs_duplicate_item(struct btrfs_trans_handle *trans,
447162306a36Sopenharmony_ci			 struct btrfs_root *root,
447262306a36Sopenharmony_ci			 struct btrfs_path *path,
447362306a36Sopenharmony_ci			 const struct btrfs_key *new_key)
447462306a36Sopenharmony_ci{
447562306a36Sopenharmony_ci	struct extent_buffer *leaf;
447662306a36Sopenharmony_ci	int ret;
447762306a36Sopenharmony_ci	u32 item_size;
447862306a36Sopenharmony_ci
447962306a36Sopenharmony_ci	leaf = path->nodes[0];
448062306a36Sopenharmony_ci	item_size = btrfs_item_size(leaf, path->slots[0]);
448162306a36Sopenharmony_ci	ret = setup_leaf_for_split(trans, root, path,
448262306a36Sopenharmony_ci				   item_size + sizeof(struct btrfs_item));
448362306a36Sopenharmony_ci	if (ret)
448462306a36Sopenharmony_ci		return ret;
448562306a36Sopenharmony_ci
448662306a36Sopenharmony_ci	path->slots[0]++;
448762306a36Sopenharmony_ci	btrfs_setup_item_for_insert(trans, root, path, new_key, item_size);
448862306a36Sopenharmony_ci	leaf = path->nodes[0];
448962306a36Sopenharmony_ci	memcpy_extent_buffer(leaf,
449062306a36Sopenharmony_ci			     btrfs_item_ptr_offset(leaf, path->slots[0]),
449162306a36Sopenharmony_ci			     btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
449262306a36Sopenharmony_ci			     item_size);
449362306a36Sopenharmony_ci	return 0;
449462306a36Sopenharmony_ci}
449562306a36Sopenharmony_ci
449662306a36Sopenharmony_ci/*
449762306a36Sopenharmony_ci * delete the pointer from a given node.
449862306a36Sopenharmony_ci *
449962306a36Sopenharmony_ci * the tree should have been previously balanced so the deletion does not
450062306a36Sopenharmony_ci * empty a node.
450162306a36Sopenharmony_ci *
450262306a36Sopenharmony_ci * This is exported for use inside btrfs-progs, don't un-export it.
450362306a36Sopenharmony_ci */
450462306a36Sopenharmony_ciint btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
450562306a36Sopenharmony_ci		  struct btrfs_path *path, int level, int slot)
450662306a36Sopenharmony_ci{
450762306a36Sopenharmony_ci	struct extent_buffer *parent = path->nodes[level];
450862306a36Sopenharmony_ci	u32 nritems;
450962306a36Sopenharmony_ci	int ret;
451062306a36Sopenharmony_ci
451162306a36Sopenharmony_ci	nritems = btrfs_header_nritems(parent);
451262306a36Sopenharmony_ci	if (slot != nritems - 1) {
451362306a36Sopenharmony_ci		if (level) {
451462306a36Sopenharmony_ci			ret = btrfs_tree_mod_log_insert_move(parent, slot,
451562306a36Sopenharmony_ci					slot + 1, nritems - slot - 1);
451662306a36Sopenharmony_ci			if (ret < 0) {
451762306a36Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
451862306a36Sopenharmony_ci				return ret;
451962306a36Sopenharmony_ci			}
452062306a36Sopenharmony_ci		}
452162306a36Sopenharmony_ci		memmove_extent_buffer(parent,
452262306a36Sopenharmony_ci			      btrfs_node_key_ptr_offset(parent, slot),
452362306a36Sopenharmony_ci			      btrfs_node_key_ptr_offset(parent, slot + 1),
452462306a36Sopenharmony_ci			      sizeof(struct btrfs_key_ptr) *
452562306a36Sopenharmony_ci			      (nritems - slot - 1));
452662306a36Sopenharmony_ci	} else if (level) {
452762306a36Sopenharmony_ci		ret = btrfs_tree_mod_log_insert_key(parent, slot,
452862306a36Sopenharmony_ci						    BTRFS_MOD_LOG_KEY_REMOVE);
452962306a36Sopenharmony_ci		if (ret < 0) {
453062306a36Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
453162306a36Sopenharmony_ci			return ret;
453262306a36Sopenharmony_ci		}
453362306a36Sopenharmony_ci	}
453462306a36Sopenharmony_ci
453562306a36Sopenharmony_ci	nritems--;
453662306a36Sopenharmony_ci	btrfs_set_header_nritems(parent, nritems);
453762306a36Sopenharmony_ci	if (nritems == 0 && parent == root->node) {
453862306a36Sopenharmony_ci		BUG_ON(btrfs_header_level(root->node) != 1);
453962306a36Sopenharmony_ci		/* just turn the root into a leaf and break */
454062306a36Sopenharmony_ci		btrfs_set_header_level(root->node, 0);
454162306a36Sopenharmony_ci	} else if (slot == 0) {
454262306a36Sopenharmony_ci		struct btrfs_disk_key disk_key;
454362306a36Sopenharmony_ci
454462306a36Sopenharmony_ci		btrfs_node_key(parent, &disk_key, 0);
454562306a36Sopenharmony_ci		fixup_low_keys(trans, path, &disk_key, level + 1);
454662306a36Sopenharmony_ci	}
454762306a36Sopenharmony_ci	btrfs_mark_buffer_dirty(trans, parent);
454862306a36Sopenharmony_ci	return 0;
454962306a36Sopenharmony_ci}
455062306a36Sopenharmony_ci
455162306a36Sopenharmony_ci/*
455262306a36Sopenharmony_ci * a helper function to delete the leaf pointed to by path->slots[1] and
455362306a36Sopenharmony_ci * path->nodes[1].
455462306a36Sopenharmony_ci *
455562306a36Sopenharmony_ci * This deletes the pointer in path->nodes[1] and frees the leaf
455662306a36Sopenharmony_ci * block extent.  zero is returned if it all worked out, < 0 otherwise.
455762306a36Sopenharmony_ci *
455862306a36Sopenharmony_ci * The path must have already been setup for deleting the leaf, including
455962306a36Sopenharmony_ci * all the proper balancing.  path->nodes[1] must be locked.
456062306a36Sopenharmony_ci */
456162306a36Sopenharmony_cistatic noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
456262306a36Sopenharmony_ci				   struct btrfs_root *root,
456362306a36Sopenharmony_ci				   struct btrfs_path *path,
456462306a36Sopenharmony_ci				   struct extent_buffer *leaf)
456562306a36Sopenharmony_ci{
456662306a36Sopenharmony_ci	int ret;
456762306a36Sopenharmony_ci
456862306a36Sopenharmony_ci	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
456962306a36Sopenharmony_ci	ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
457062306a36Sopenharmony_ci	if (ret < 0)
457162306a36Sopenharmony_ci		return ret;
457262306a36Sopenharmony_ci
457362306a36Sopenharmony_ci	/*
457462306a36Sopenharmony_ci	 * btrfs_free_extent is expensive, we want to make sure we
457562306a36Sopenharmony_ci	 * aren't holding any locks when we call it
457662306a36Sopenharmony_ci	 */
457762306a36Sopenharmony_ci	btrfs_unlock_up_safe(path, 0);
457862306a36Sopenharmony_ci
457962306a36Sopenharmony_ci	root_sub_used(root, leaf->len);
458062306a36Sopenharmony_ci
458162306a36Sopenharmony_ci	atomic_inc(&leaf->refs);
458262306a36Sopenharmony_ci	btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
458362306a36Sopenharmony_ci	free_extent_buffer_stale(leaf);
458462306a36Sopenharmony_ci	return 0;
458562306a36Sopenharmony_ci}
458662306a36Sopenharmony_ci/*
458762306a36Sopenharmony_ci * delete the item at the leaf level in path.  If that empties
458862306a36Sopenharmony_ci * the leaf, remove it from the tree
458962306a36Sopenharmony_ci */
459062306a36Sopenharmony_ciint btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
459162306a36Sopenharmony_ci		    struct btrfs_path *path, int slot, int nr)
459262306a36Sopenharmony_ci{
459362306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
459462306a36Sopenharmony_ci	struct extent_buffer *leaf;
459562306a36Sopenharmony_ci	int ret = 0;
459662306a36Sopenharmony_ci	int wret;
459762306a36Sopenharmony_ci	u32 nritems;
459862306a36Sopenharmony_ci
459962306a36Sopenharmony_ci	leaf = path->nodes[0];
460062306a36Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
460162306a36Sopenharmony_ci
460262306a36Sopenharmony_ci	if (slot + nr != nritems) {
460362306a36Sopenharmony_ci		const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
460462306a36Sopenharmony_ci		const int data_end = leaf_data_end(leaf);
460562306a36Sopenharmony_ci		struct btrfs_map_token token;
460662306a36Sopenharmony_ci		u32 dsize = 0;
460762306a36Sopenharmony_ci		int i;
460862306a36Sopenharmony_ci
460962306a36Sopenharmony_ci		for (i = 0; i < nr; i++)
461062306a36Sopenharmony_ci			dsize += btrfs_item_size(leaf, slot + i);
461162306a36Sopenharmony_ci
461262306a36Sopenharmony_ci		memmove_leaf_data(leaf, data_end + dsize, data_end,
461362306a36Sopenharmony_ci				  last_off - data_end);
461462306a36Sopenharmony_ci
461562306a36Sopenharmony_ci		btrfs_init_map_token(&token, leaf);
461662306a36Sopenharmony_ci		for (i = slot + nr; i < nritems; i++) {
461762306a36Sopenharmony_ci			u32 ioff;
461862306a36Sopenharmony_ci
461962306a36Sopenharmony_ci			ioff = btrfs_token_item_offset(&token, i);
462062306a36Sopenharmony_ci			btrfs_set_token_item_offset(&token, i, ioff + dsize);
462162306a36Sopenharmony_ci		}
462262306a36Sopenharmony_ci
462362306a36Sopenharmony_ci		memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr);
462462306a36Sopenharmony_ci	}
462562306a36Sopenharmony_ci	btrfs_set_header_nritems(leaf, nritems - nr);
462662306a36Sopenharmony_ci	nritems -= nr;
462762306a36Sopenharmony_ci
462862306a36Sopenharmony_ci	/* delete the leaf if we've emptied it */
462962306a36Sopenharmony_ci	if (nritems == 0) {
463062306a36Sopenharmony_ci		if (leaf == root->node) {
463162306a36Sopenharmony_ci			btrfs_set_header_level(leaf, 0);
463262306a36Sopenharmony_ci		} else {
463362306a36Sopenharmony_ci			btrfs_clear_buffer_dirty(trans, leaf);
463462306a36Sopenharmony_ci			ret = btrfs_del_leaf(trans, root, path, leaf);
463562306a36Sopenharmony_ci			if (ret < 0)
463662306a36Sopenharmony_ci				return ret;
463762306a36Sopenharmony_ci		}
463862306a36Sopenharmony_ci	} else {
463962306a36Sopenharmony_ci		int used = leaf_space_used(leaf, 0, nritems);
464062306a36Sopenharmony_ci		if (slot == 0) {
464162306a36Sopenharmony_ci			struct btrfs_disk_key disk_key;
464262306a36Sopenharmony_ci
464362306a36Sopenharmony_ci			btrfs_item_key(leaf, &disk_key, 0);
464462306a36Sopenharmony_ci			fixup_low_keys(trans, path, &disk_key, 1);
464562306a36Sopenharmony_ci		}
464662306a36Sopenharmony_ci
464762306a36Sopenharmony_ci		/*
464862306a36Sopenharmony_ci		 * Try to delete the leaf if it is mostly empty. We do this by
464962306a36Sopenharmony_ci		 * trying to move all its items into its left and right neighbours.
465062306a36Sopenharmony_ci		 * If we can't move all the items, then we don't delete it - it's
465162306a36Sopenharmony_ci		 * not ideal, but future insertions might fill the leaf with more
465262306a36Sopenharmony_ci		 * items, or items from other leaves might be moved later into our
465362306a36Sopenharmony_ci		 * leaf due to deletions on those leaves.
465462306a36Sopenharmony_ci		 */
465562306a36Sopenharmony_ci		if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
465662306a36Sopenharmony_ci			u32 min_push_space;
465762306a36Sopenharmony_ci
465862306a36Sopenharmony_ci			/* push_leaf_left fixes the path.
465962306a36Sopenharmony_ci			 * make sure the path still points to our leaf
466062306a36Sopenharmony_ci			 * for possible call to btrfs_del_ptr below
466162306a36Sopenharmony_ci			 */
466262306a36Sopenharmony_ci			slot = path->slots[1];
466362306a36Sopenharmony_ci			atomic_inc(&leaf->refs);
466462306a36Sopenharmony_ci			/*
466562306a36Sopenharmony_ci			 * We want to be able to at least push one item to the
466662306a36Sopenharmony_ci			 * left neighbour leaf, and that's the first item.
466762306a36Sopenharmony_ci			 */
466862306a36Sopenharmony_ci			min_push_space = sizeof(struct btrfs_item) +
466962306a36Sopenharmony_ci				btrfs_item_size(leaf, 0);
467062306a36Sopenharmony_ci			wret = push_leaf_left(trans, root, path, 0,
467162306a36Sopenharmony_ci					      min_push_space, 1, (u32)-1);
467262306a36Sopenharmony_ci			if (wret < 0 && wret != -ENOSPC)
467362306a36Sopenharmony_ci				ret = wret;
467462306a36Sopenharmony_ci
467562306a36Sopenharmony_ci			if (path->nodes[0] == leaf &&
467662306a36Sopenharmony_ci			    btrfs_header_nritems(leaf)) {
467762306a36Sopenharmony_ci				/*
467862306a36Sopenharmony_ci				 * If we were not able to push all items from our
467962306a36Sopenharmony_ci				 * leaf to its left neighbour, then attempt to
468062306a36Sopenharmony_ci				 * either push all the remaining items to the
468162306a36Sopenharmony_ci				 * right neighbour or none. There's no advantage
468262306a36Sopenharmony_ci				 * in pushing only some items, instead of all, as
468362306a36Sopenharmony_ci				 * it's pointless to end up with a leaf having
468462306a36Sopenharmony_ci				 * too few items while the neighbours can be full
468562306a36Sopenharmony_ci				 * or nearly full.
468662306a36Sopenharmony_ci				 */
468762306a36Sopenharmony_ci				nritems = btrfs_header_nritems(leaf);
468862306a36Sopenharmony_ci				min_push_space = leaf_space_used(leaf, 0, nritems);
468962306a36Sopenharmony_ci				wret = push_leaf_right(trans, root, path, 0,
469062306a36Sopenharmony_ci						       min_push_space, 1, 0);
469162306a36Sopenharmony_ci				if (wret < 0 && wret != -ENOSPC)
469262306a36Sopenharmony_ci					ret = wret;
469362306a36Sopenharmony_ci			}
469462306a36Sopenharmony_ci
469562306a36Sopenharmony_ci			if (btrfs_header_nritems(leaf) == 0) {
469662306a36Sopenharmony_ci				path->slots[1] = slot;
469762306a36Sopenharmony_ci				ret = btrfs_del_leaf(trans, root, path, leaf);
469862306a36Sopenharmony_ci				if (ret < 0)
469962306a36Sopenharmony_ci					return ret;
470062306a36Sopenharmony_ci				free_extent_buffer(leaf);
470162306a36Sopenharmony_ci				ret = 0;
470262306a36Sopenharmony_ci			} else {
470362306a36Sopenharmony_ci				/* if we're still in the path, make sure
470462306a36Sopenharmony_ci				 * we're dirty.  Otherwise, one of the
470562306a36Sopenharmony_ci				 * push_leaf functions must have already
470662306a36Sopenharmony_ci				 * dirtied this buffer
470762306a36Sopenharmony_ci				 */
470862306a36Sopenharmony_ci				if (path->nodes[0] == leaf)
470962306a36Sopenharmony_ci					btrfs_mark_buffer_dirty(trans, leaf);
471062306a36Sopenharmony_ci				free_extent_buffer(leaf);
471162306a36Sopenharmony_ci			}
471262306a36Sopenharmony_ci		} else {
471362306a36Sopenharmony_ci			btrfs_mark_buffer_dirty(trans, leaf);
471462306a36Sopenharmony_ci		}
471562306a36Sopenharmony_ci	}
471662306a36Sopenharmony_ci	return ret;
471762306a36Sopenharmony_ci}
471862306a36Sopenharmony_ci
471962306a36Sopenharmony_ci/*
472062306a36Sopenharmony_ci * A helper function to walk down the tree starting at min_key, and looking
472162306a36Sopenharmony_ci * for nodes or leaves that are have a minimum transaction id.
472262306a36Sopenharmony_ci * This is used by the btree defrag code, and tree logging
472362306a36Sopenharmony_ci *
472462306a36Sopenharmony_ci * This does not cow, but it does stuff the starting key it finds back
472562306a36Sopenharmony_ci * into min_key, so you can call btrfs_search_slot with cow=1 on the
472662306a36Sopenharmony_ci * key and get a writable path.
472762306a36Sopenharmony_ci *
472862306a36Sopenharmony_ci * This honors path->lowest_level to prevent descent past a given level
472962306a36Sopenharmony_ci * of the tree.
473062306a36Sopenharmony_ci *
473162306a36Sopenharmony_ci * min_trans indicates the oldest transaction that you are interested
473262306a36Sopenharmony_ci * in walking through.  Any nodes or leaves older than min_trans are
473362306a36Sopenharmony_ci * skipped over (without reading them).
473462306a36Sopenharmony_ci *
473562306a36Sopenharmony_ci * returns zero if something useful was found, < 0 on error and 1 if there
473662306a36Sopenharmony_ci * was nothing in the tree that matched the search criteria.
473762306a36Sopenharmony_ci */
473862306a36Sopenharmony_ciint btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
473962306a36Sopenharmony_ci			 struct btrfs_path *path,
474062306a36Sopenharmony_ci			 u64 min_trans)
474162306a36Sopenharmony_ci{
474262306a36Sopenharmony_ci	struct extent_buffer *cur;
474362306a36Sopenharmony_ci	struct btrfs_key found_key;
474462306a36Sopenharmony_ci	int slot;
474562306a36Sopenharmony_ci	int sret;
474662306a36Sopenharmony_ci	u32 nritems;
474762306a36Sopenharmony_ci	int level;
474862306a36Sopenharmony_ci	int ret = 1;
474962306a36Sopenharmony_ci	int keep_locks = path->keep_locks;
475062306a36Sopenharmony_ci
475162306a36Sopenharmony_ci	ASSERT(!path->nowait);
475262306a36Sopenharmony_ci	path->keep_locks = 1;
475362306a36Sopenharmony_ciagain:
475462306a36Sopenharmony_ci	cur = btrfs_read_lock_root_node(root);
475562306a36Sopenharmony_ci	level = btrfs_header_level(cur);
475662306a36Sopenharmony_ci	WARN_ON(path->nodes[level]);
475762306a36Sopenharmony_ci	path->nodes[level] = cur;
475862306a36Sopenharmony_ci	path->locks[level] = BTRFS_READ_LOCK;
475962306a36Sopenharmony_ci
476062306a36Sopenharmony_ci	if (btrfs_header_generation(cur) < min_trans) {
476162306a36Sopenharmony_ci		ret = 1;
476262306a36Sopenharmony_ci		goto out;
476362306a36Sopenharmony_ci	}
476462306a36Sopenharmony_ci	while (1) {
476562306a36Sopenharmony_ci		nritems = btrfs_header_nritems(cur);
476662306a36Sopenharmony_ci		level = btrfs_header_level(cur);
476762306a36Sopenharmony_ci		sret = btrfs_bin_search(cur, 0, min_key, &slot);
476862306a36Sopenharmony_ci		if (sret < 0) {
476962306a36Sopenharmony_ci			ret = sret;
477062306a36Sopenharmony_ci			goto out;
477162306a36Sopenharmony_ci		}
477262306a36Sopenharmony_ci
477362306a36Sopenharmony_ci		/* at the lowest level, we're done, setup the path and exit */
477462306a36Sopenharmony_ci		if (level == path->lowest_level) {
477562306a36Sopenharmony_ci			if (slot >= nritems)
477662306a36Sopenharmony_ci				goto find_next_key;
477762306a36Sopenharmony_ci			ret = 0;
477862306a36Sopenharmony_ci			path->slots[level] = slot;
477962306a36Sopenharmony_ci			btrfs_item_key_to_cpu(cur, &found_key, slot);
478062306a36Sopenharmony_ci			goto out;
478162306a36Sopenharmony_ci		}
478262306a36Sopenharmony_ci		if (sret && slot > 0)
478362306a36Sopenharmony_ci			slot--;
478462306a36Sopenharmony_ci		/*
478562306a36Sopenharmony_ci		 * check this node pointer against the min_trans parameters.
478662306a36Sopenharmony_ci		 * If it is too old, skip to the next one.
478762306a36Sopenharmony_ci		 */
478862306a36Sopenharmony_ci		while (slot < nritems) {
478962306a36Sopenharmony_ci			u64 gen;
479062306a36Sopenharmony_ci
479162306a36Sopenharmony_ci			gen = btrfs_node_ptr_generation(cur, slot);
479262306a36Sopenharmony_ci			if (gen < min_trans) {
479362306a36Sopenharmony_ci				slot++;
479462306a36Sopenharmony_ci				continue;
479562306a36Sopenharmony_ci			}
479662306a36Sopenharmony_ci			break;
479762306a36Sopenharmony_ci		}
479862306a36Sopenharmony_cifind_next_key:
479962306a36Sopenharmony_ci		/*
480062306a36Sopenharmony_ci		 * we didn't find a candidate key in this node, walk forward
480162306a36Sopenharmony_ci		 * and find another one
480262306a36Sopenharmony_ci		 */
480362306a36Sopenharmony_ci		if (slot >= nritems) {
480462306a36Sopenharmony_ci			path->slots[level] = slot;
480562306a36Sopenharmony_ci			sret = btrfs_find_next_key(root, path, min_key, level,
480662306a36Sopenharmony_ci						  min_trans);
480762306a36Sopenharmony_ci			if (sret == 0) {
480862306a36Sopenharmony_ci				btrfs_release_path(path);
480962306a36Sopenharmony_ci				goto again;
481062306a36Sopenharmony_ci			} else {
481162306a36Sopenharmony_ci				goto out;
481262306a36Sopenharmony_ci			}
481362306a36Sopenharmony_ci		}
481462306a36Sopenharmony_ci		/* save our key for returning back */
481562306a36Sopenharmony_ci		btrfs_node_key_to_cpu(cur, &found_key, slot);
481662306a36Sopenharmony_ci		path->slots[level] = slot;
481762306a36Sopenharmony_ci		if (level == path->lowest_level) {
481862306a36Sopenharmony_ci			ret = 0;
481962306a36Sopenharmony_ci			goto out;
482062306a36Sopenharmony_ci		}
482162306a36Sopenharmony_ci		cur = btrfs_read_node_slot(cur, slot);
482262306a36Sopenharmony_ci		if (IS_ERR(cur)) {
482362306a36Sopenharmony_ci			ret = PTR_ERR(cur);
482462306a36Sopenharmony_ci			goto out;
482562306a36Sopenharmony_ci		}
482662306a36Sopenharmony_ci
482762306a36Sopenharmony_ci		btrfs_tree_read_lock(cur);
482862306a36Sopenharmony_ci
482962306a36Sopenharmony_ci		path->locks[level - 1] = BTRFS_READ_LOCK;
483062306a36Sopenharmony_ci		path->nodes[level - 1] = cur;
483162306a36Sopenharmony_ci		unlock_up(path, level, 1, 0, NULL);
483262306a36Sopenharmony_ci	}
483362306a36Sopenharmony_ciout:
483462306a36Sopenharmony_ci	path->keep_locks = keep_locks;
483562306a36Sopenharmony_ci	if (ret == 0) {
483662306a36Sopenharmony_ci		btrfs_unlock_up_safe(path, path->lowest_level + 1);
483762306a36Sopenharmony_ci		memcpy(min_key, &found_key, sizeof(found_key));
483862306a36Sopenharmony_ci	}
483962306a36Sopenharmony_ci	return ret;
484062306a36Sopenharmony_ci}
484162306a36Sopenharmony_ci
484262306a36Sopenharmony_ci/*
484362306a36Sopenharmony_ci * this is similar to btrfs_next_leaf, but does not try to preserve
484462306a36Sopenharmony_ci * and fixup the path.  It looks for and returns the next key in the
484562306a36Sopenharmony_ci * tree based on the current path and the min_trans parameters.
484662306a36Sopenharmony_ci *
484762306a36Sopenharmony_ci * 0 is returned if another key is found, < 0 if there are any errors
484862306a36Sopenharmony_ci * and 1 is returned if there are no higher keys in the tree
484962306a36Sopenharmony_ci *
485062306a36Sopenharmony_ci * path->keep_locks should be set to 1 on the search made before
485162306a36Sopenharmony_ci * calling this function.
485262306a36Sopenharmony_ci */
485362306a36Sopenharmony_ciint btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
485462306a36Sopenharmony_ci			struct btrfs_key *key, int level, u64 min_trans)
485562306a36Sopenharmony_ci{
485662306a36Sopenharmony_ci	int slot;
485762306a36Sopenharmony_ci	struct extent_buffer *c;
485862306a36Sopenharmony_ci
485962306a36Sopenharmony_ci	WARN_ON(!path->keep_locks && !path->skip_locking);
486062306a36Sopenharmony_ci	while (level < BTRFS_MAX_LEVEL) {
486162306a36Sopenharmony_ci		if (!path->nodes[level])
486262306a36Sopenharmony_ci			return 1;
486362306a36Sopenharmony_ci
486462306a36Sopenharmony_ci		slot = path->slots[level] + 1;
486562306a36Sopenharmony_ci		c = path->nodes[level];
486662306a36Sopenharmony_cinext:
486762306a36Sopenharmony_ci		if (slot >= btrfs_header_nritems(c)) {
486862306a36Sopenharmony_ci			int ret;
486962306a36Sopenharmony_ci			int orig_lowest;
487062306a36Sopenharmony_ci			struct btrfs_key cur_key;
487162306a36Sopenharmony_ci			if (level + 1 >= BTRFS_MAX_LEVEL ||
487262306a36Sopenharmony_ci			    !path->nodes[level + 1])
487362306a36Sopenharmony_ci				return 1;
487462306a36Sopenharmony_ci
487562306a36Sopenharmony_ci			if (path->locks[level + 1] || path->skip_locking) {
487662306a36Sopenharmony_ci				level++;
487762306a36Sopenharmony_ci				continue;
487862306a36Sopenharmony_ci			}
487962306a36Sopenharmony_ci
488062306a36Sopenharmony_ci			slot = btrfs_header_nritems(c) - 1;
488162306a36Sopenharmony_ci			if (level == 0)
488262306a36Sopenharmony_ci				btrfs_item_key_to_cpu(c, &cur_key, slot);
488362306a36Sopenharmony_ci			else
488462306a36Sopenharmony_ci				btrfs_node_key_to_cpu(c, &cur_key, slot);
488562306a36Sopenharmony_ci
488662306a36Sopenharmony_ci			orig_lowest = path->lowest_level;
488762306a36Sopenharmony_ci			btrfs_release_path(path);
488862306a36Sopenharmony_ci			path->lowest_level = level;
488962306a36Sopenharmony_ci			ret = btrfs_search_slot(NULL, root, &cur_key, path,
489062306a36Sopenharmony_ci						0, 0);
489162306a36Sopenharmony_ci			path->lowest_level = orig_lowest;
489262306a36Sopenharmony_ci			if (ret < 0)
489362306a36Sopenharmony_ci				return ret;
489462306a36Sopenharmony_ci
489562306a36Sopenharmony_ci			c = path->nodes[level];
489662306a36Sopenharmony_ci			slot = path->slots[level];
489762306a36Sopenharmony_ci			if (ret == 0)
489862306a36Sopenharmony_ci				slot++;
489962306a36Sopenharmony_ci			goto next;
490062306a36Sopenharmony_ci		}
490162306a36Sopenharmony_ci
490262306a36Sopenharmony_ci		if (level == 0)
490362306a36Sopenharmony_ci			btrfs_item_key_to_cpu(c, key, slot);
490462306a36Sopenharmony_ci		else {
490562306a36Sopenharmony_ci			u64 gen = btrfs_node_ptr_generation(c, slot);
490662306a36Sopenharmony_ci
490762306a36Sopenharmony_ci			if (gen < min_trans) {
490862306a36Sopenharmony_ci				slot++;
490962306a36Sopenharmony_ci				goto next;
491062306a36Sopenharmony_ci			}
491162306a36Sopenharmony_ci			btrfs_node_key_to_cpu(c, key, slot);
491262306a36Sopenharmony_ci		}
491362306a36Sopenharmony_ci		return 0;
491462306a36Sopenharmony_ci	}
491562306a36Sopenharmony_ci	return 1;
491662306a36Sopenharmony_ci}
491762306a36Sopenharmony_ci
491862306a36Sopenharmony_ciint btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
491962306a36Sopenharmony_ci			u64 time_seq)
492062306a36Sopenharmony_ci{
492162306a36Sopenharmony_ci	int slot;
492262306a36Sopenharmony_ci	int level;
492362306a36Sopenharmony_ci	struct extent_buffer *c;
492462306a36Sopenharmony_ci	struct extent_buffer *next;
492562306a36Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
492662306a36Sopenharmony_ci	struct btrfs_key key;
492762306a36Sopenharmony_ci	bool need_commit_sem = false;
492862306a36Sopenharmony_ci	u32 nritems;
492962306a36Sopenharmony_ci	int ret;
493062306a36Sopenharmony_ci	int i;
493162306a36Sopenharmony_ci
493262306a36Sopenharmony_ci	/*
493362306a36Sopenharmony_ci	 * The nowait semantics are used only for write paths, where we don't
493462306a36Sopenharmony_ci	 * use the tree mod log and sequence numbers.
493562306a36Sopenharmony_ci	 */
493662306a36Sopenharmony_ci	if (time_seq)
493762306a36Sopenharmony_ci		ASSERT(!path->nowait);
493862306a36Sopenharmony_ci
493962306a36Sopenharmony_ci	nritems = btrfs_header_nritems(path->nodes[0]);
494062306a36Sopenharmony_ci	if (nritems == 0)
494162306a36Sopenharmony_ci		return 1;
494262306a36Sopenharmony_ci
494362306a36Sopenharmony_ci	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
494462306a36Sopenharmony_ciagain:
494562306a36Sopenharmony_ci	level = 1;
494662306a36Sopenharmony_ci	next = NULL;
494762306a36Sopenharmony_ci	btrfs_release_path(path);
494862306a36Sopenharmony_ci
494962306a36Sopenharmony_ci	path->keep_locks = 1;
495062306a36Sopenharmony_ci
495162306a36Sopenharmony_ci	if (time_seq) {
495262306a36Sopenharmony_ci		ret = btrfs_search_old_slot(root, &key, path, time_seq);
495362306a36Sopenharmony_ci	} else {
495462306a36Sopenharmony_ci		if (path->need_commit_sem) {
495562306a36Sopenharmony_ci			path->need_commit_sem = 0;
495662306a36Sopenharmony_ci			need_commit_sem = true;
495762306a36Sopenharmony_ci			if (path->nowait) {
495862306a36Sopenharmony_ci				if (!down_read_trylock(&fs_info->commit_root_sem)) {
495962306a36Sopenharmony_ci					ret = -EAGAIN;
496062306a36Sopenharmony_ci					goto done;
496162306a36Sopenharmony_ci				}
496262306a36Sopenharmony_ci			} else {
496362306a36Sopenharmony_ci				down_read(&fs_info->commit_root_sem);
496462306a36Sopenharmony_ci			}
496562306a36Sopenharmony_ci		}
496662306a36Sopenharmony_ci		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
496762306a36Sopenharmony_ci	}
496862306a36Sopenharmony_ci	path->keep_locks = 0;
496962306a36Sopenharmony_ci
497062306a36Sopenharmony_ci	if (ret < 0)
497162306a36Sopenharmony_ci		goto done;
497262306a36Sopenharmony_ci
497362306a36Sopenharmony_ci	nritems = btrfs_header_nritems(path->nodes[0]);
497462306a36Sopenharmony_ci	/*
497562306a36Sopenharmony_ci	 * by releasing the path above we dropped all our locks.  A balance
497662306a36Sopenharmony_ci	 * could have added more items next to the key that used to be
497762306a36Sopenharmony_ci	 * at the very end of the block.  So, check again here and
497862306a36Sopenharmony_ci	 * advance the path if there are now more items available.
497962306a36Sopenharmony_ci	 */
498062306a36Sopenharmony_ci	if (nritems > 0 && path->slots[0] < nritems - 1) {
498162306a36Sopenharmony_ci		if (ret == 0)
498262306a36Sopenharmony_ci			path->slots[0]++;
498362306a36Sopenharmony_ci		ret = 0;
498462306a36Sopenharmony_ci		goto done;
498562306a36Sopenharmony_ci	}
498662306a36Sopenharmony_ci	/*
498762306a36Sopenharmony_ci	 * So the above check misses one case:
498862306a36Sopenharmony_ci	 * - after releasing the path above, someone has removed the item that
498962306a36Sopenharmony_ci	 *   used to be at the very end of the block, and balance between leafs
499062306a36Sopenharmony_ci	 *   gets another one with bigger key.offset to replace it.
499162306a36Sopenharmony_ci	 *
499262306a36Sopenharmony_ci	 * This one should be returned as well, or we can get leaf corruption
499362306a36Sopenharmony_ci	 * later(esp. in __btrfs_drop_extents()).
499462306a36Sopenharmony_ci	 *
499562306a36Sopenharmony_ci	 * And a bit more explanation about this check,
499662306a36Sopenharmony_ci	 * with ret > 0, the key isn't found, the path points to the slot
499762306a36Sopenharmony_ci	 * where it should be inserted, so the path->slots[0] item must be the
499862306a36Sopenharmony_ci	 * bigger one.
499962306a36Sopenharmony_ci	 */
500062306a36Sopenharmony_ci	if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
500162306a36Sopenharmony_ci		ret = 0;
500262306a36Sopenharmony_ci		goto done;
500362306a36Sopenharmony_ci	}
500462306a36Sopenharmony_ci
500562306a36Sopenharmony_ci	while (level < BTRFS_MAX_LEVEL) {
500662306a36Sopenharmony_ci		if (!path->nodes[level]) {
500762306a36Sopenharmony_ci			ret = 1;
500862306a36Sopenharmony_ci			goto done;
500962306a36Sopenharmony_ci		}
501062306a36Sopenharmony_ci
501162306a36Sopenharmony_ci		slot = path->slots[level] + 1;
501262306a36Sopenharmony_ci		c = path->nodes[level];
501362306a36Sopenharmony_ci		if (slot >= btrfs_header_nritems(c)) {
501462306a36Sopenharmony_ci			level++;
501562306a36Sopenharmony_ci			if (level == BTRFS_MAX_LEVEL) {
501662306a36Sopenharmony_ci				ret = 1;
501762306a36Sopenharmony_ci				goto done;
501862306a36Sopenharmony_ci			}
501962306a36Sopenharmony_ci			continue;
502062306a36Sopenharmony_ci		}
502162306a36Sopenharmony_ci
502262306a36Sopenharmony_ci
502362306a36Sopenharmony_ci		/*
502462306a36Sopenharmony_ci		 * Our current level is where we're going to start from, and to
502562306a36Sopenharmony_ci		 * make sure lockdep doesn't complain we need to drop our locks
502662306a36Sopenharmony_ci		 * and nodes from 0 to our current level.
502762306a36Sopenharmony_ci		 */
502862306a36Sopenharmony_ci		for (i = 0; i < level; i++) {
502962306a36Sopenharmony_ci			if (path->locks[level]) {
503062306a36Sopenharmony_ci				btrfs_tree_read_unlock(path->nodes[i]);
503162306a36Sopenharmony_ci				path->locks[i] = 0;
503262306a36Sopenharmony_ci			}
503362306a36Sopenharmony_ci			free_extent_buffer(path->nodes[i]);
503462306a36Sopenharmony_ci			path->nodes[i] = NULL;
503562306a36Sopenharmony_ci		}
503662306a36Sopenharmony_ci
503762306a36Sopenharmony_ci		next = c;
503862306a36Sopenharmony_ci		ret = read_block_for_search(root, path, &next, level,
503962306a36Sopenharmony_ci					    slot, &key);
504062306a36Sopenharmony_ci		if (ret == -EAGAIN && !path->nowait)
504162306a36Sopenharmony_ci			goto again;
504262306a36Sopenharmony_ci
504362306a36Sopenharmony_ci		if (ret < 0) {
504462306a36Sopenharmony_ci			btrfs_release_path(path);
504562306a36Sopenharmony_ci			goto done;
504662306a36Sopenharmony_ci		}
504762306a36Sopenharmony_ci
504862306a36Sopenharmony_ci		if (!path->skip_locking) {
504962306a36Sopenharmony_ci			ret = btrfs_try_tree_read_lock(next);
505062306a36Sopenharmony_ci			if (!ret && path->nowait) {
505162306a36Sopenharmony_ci				ret = -EAGAIN;
505262306a36Sopenharmony_ci				goto done;
505362306a36Sopenharmony_ci			}
505462306a36Sopenharmony_ci			if (!ret && time_seq) {
505562306a36Sopenharmony_ci				/*
505662306a36Sopenharmony_ci				 * If we don't get the lock, we may be racing
505762306a36Sopenharmony_ci				 * with push_leaf_left, holding that lock while
505862306a36Sopenharmony_ci				 * itself waiting for the leaf we've currently
505962306a36Sopenharmony_ci				 * locked. To solve this situation, we give up
506062306a36Sopenharmony_ci				 * on our lock and cycle.
506162306a36Sopenharmony_ci				 */
506262306a36Sopenharmony_ci				free_extent_buffer(next);
506362306a36Sopenharmony_ci				btrfs_release_path(path);
506462306a36Sopenharmony_ci				cond_resched();
506562306a36Sopenharmony_ci				goto again;
506662306a36Sopenharmony_ci			}
506762306a36Sopenharmony_ci			if (!ret)
506862306a36Sopenharmony_ci				btrfs_tree_read_lock(next);
506962306a36Sopenharmony_ci		}
507062306a36Sopenharmony_ci		break;
507162306a36Sopenharmony_ci	}
507262306a36Sopenharmony_ci	path->slots[level] = slot;
507362306a36Sopenharmony_ci	while (1) {
507462306a36Sopenharmony_ci		level--;
507562306a36Sopenharmony_ci		path->nodes[level] = next;
507662306a36Sopenharmony_ci		path->slots[level] = 0;
507762306a36Sopenharmony_ci		if (!path->skip_locking)
507862306a36Sopenharmony_ci			path->locks[level] = BTRFS_READ_LOCK;
507962306a36Sopenharmony_ci		if (!level)
508062306a36Sopenharmony_ci			break;
508162306a36Sopenharmony_ci
508262306a36Sopenharmony_ci		ret = read_block_for_search(root, path, &next, level,
508362306a36Sopenharmony_ci					    0, &key);
508462306a36Sopenharmony_ci		if (ret == -EAGAIN && !path->nowait)
508562306a36Sopenharmony_ci			goto again;
508662306a36Sopenharmony_ci
508762306a36Sopenharmony_ci		if (ret < 0) {
508862306a36Sopenharmony_ci			btrfs_release_path(path);
508962306a36Sopenharmony_ci			goto done;
509062306a36Sopenharmony_ci		}
509162306a36Sopenharmony_ci
509262306a36Sopenharmony_ci		if (!path->skip_locking) {
509362306a36Sopenharmony_ci			if (path->nowait) {
509462306a36Sopenharmony_ci				if (!btrfs_try_tree_read_lock(next)) {
509562306a36Sopenharmony_ci					ret = -EAGAIN;
509662306a36Sopenharmony_ci					goto done;
509762306a36Sopenharmony_ci				}
509862306a36Sopenharmony_ci			} else {
509962306a36Sopenharmony_ci				btrfs_tree_read_lock(next);
510062306a36Sopenharmony_ci			}
510162306a36Sopenharmony_ci		}
510262306a36Sopenharmony_ci	}
510362306a36Sopenharmony_ci	ret = 0;
510462306a36Sopenharmony_cidone:
510562306a36Sopenharmony_ci	unlock_up(path, 0, 1, 0, NULL);
510662306a36Sopenharmony_ci	if (need_commit_sem) {
510762306a36Sopenharmony_ci		int ret2;
510862306a36Sopenharmony_ci
510962306a36Sopenharmony_ci		path->need_commit_sem = 1;
511062306a36Sopenharmony_ci		ret2 = finish_need_commit_sem_search(path);
511162306a36Sopenharmony_ci		up_read(&fs_info->commit_root_sem);
511262306a36Sopenharmony_ci		if (ret2)
511362306a36Sopenharmony_ci			ret = ret2;
511462306a36Sopenharmony_ci	}
511562306a36Sopenharmony_ci
511662306a36Sopenharmony_ci	return ret;
511762306a36Sopenharmony_ci}
511862306a36Sopenharmony_ci
511962306a36Sopenharmony_ciint btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq)
512062306a36Sopenharmony_ci{
512162306a36Sopenharmony_ci	path->slots[0]++;
512262306a36Sopenharmony_ci	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0]))
512362306a36Sopenharmony_ci		return btrfs_next_old_leaf(root, path, time_seq);
512462306a36Sopenharmony_ci	return 0;
512562306a36Sopenharmony_ci}
512662306a36Sopenharmony_ci
512762306a36Sopenharmony_ci/*
512862306a36Sopenharmony_ci * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
512962306a36Sopenharmony_ci * searching until it gets past min_objectid or finds an item of 'type'
513062306a36Sopenharmony_ci *
513162306a36Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error
513262306a36Sopenharmony_ci */
513362306a36Sopenharmony_ciint btrfs_previous_item(struct btrfs_root *root,
513462306a36Sopenharmony_ci			struct btrfs_path *path, u64 min_objectid,
513562306a36Sopenharmony_ci			int type)
513662306a36Sopenharmony_ci{
513762306a36Sopenharmony_ci	struct btrfs_key found_key;
513862306a36Sopenharmony_ci	struct extent_buffer *leaf;
513962306a36Sopenharmony_ci	u32 nritems;
514062306a36Sopenharmony_ci	int ret;
514162306a36Sopenharmony_ci
514262306a36Sopenharmony_ci	while (1) {
514362306a36Sopenharmony_ci		if (path->slots[0] == 0) {
514462306a36Sopenharmony_ci			ret = btrfs_prev_leaf(root, path);
514562306a36Sopenharmony_ci			if (ret != 0)
514662306a36Sopenharmony_ci				return ret;
514762306a36Sopenharmony_ci		} else {
514862306a36Sopenharmony_ci			path->slots[0]--;
514962306a36Sopenharmony_ci		}
515062306a36Sopenharmony_ci		leaf = path->nodes[0];
515162306a36Sopenharmony_ci		nritems = btrfs_header_nritems(leaf);
515262306a36Sopenharmony_ci		if (nritems == 0)
515362306a36Sopenharmony_ci			return 1;
515462306a36Sopenharmony_ci		if (path->slots[0] == nritems)
515562306a36Sopenharmony_ci			path->slots[0]--;
515662306a36Sopenharmony_ci
515762306a36Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
515862306a36Sopenharmony_ci		if (found_key.objectid < min_objectid)
515962306a36Sopenharmony_ci			break;
516062306a36Sopenharmony_ci		if (found_key.type == type)
516162306a36Sopenharmony_ci			return 0;
516262306a36Sopenharmony_ci		if (found_key.objectid == min_objectid &&
516362306a36Sopenharmony_ci		    found_key.type < type)
516462306a36Sopenharmony_ci			break;
516562306a36Sopenharmony_ci	}
516662306a36Sopenharmony_ci	return 1;
516762306a36Sopenharmony_ci}
516862306a36Sopenharmony_ci
516962306a36Sopenharmony_ci/*
517062306a36Sopenharmony_ci * search in extent tree to find a previous Metadata/Data extent item with
517162306a36Sopenharmony_ci * min objecitd.
517262306a36Sopenharmony_ci *
517362306a36Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error
517462306a36Sopenharmony_ci */
517562306a36Sopenharmony_ciint btrfs_previous_extent_item(struct btrfs_root *root,
517662306a36Sopenharmony_ci			struct btrfs_path *path, u64 min_objectid)
517762306a36Sopenharmony_ci{
517862306a36Sopenharmony_ci	struct btrfs_key found_key;
517962306a36Sopenharmony_ci	struct extent_buffer *leaf;
518062306a36Sopenharmony_ci	u32 nritems;
518162306a36Sopenharmony_ci	int ret;
518262306a36Sopenharmony_ci
518362306a36Sopenharmony_ci	while (1) {
518462306a36Sopenharmony_ci		if (path->slots[0] == 0) {
518562306a36Sopenharmony_ci			ret = btrfs_prev_leaf(root, path);
518662306a36Sopenharmony_ci			if (ret != 0)
518762306a36Sopenharmony_ci				return ret;
518862306a36Sopenharmony_ci		} else {
518962306a36Sopenharmony_ci			path->slots[0]--;
519062306a36Sopenharmony_ci		}
519162306a36Sopenharmony_ci		leaf = path->nodes[0];
519262306a36Sopenharmony_ci		nritems = btrfs_header_nritems(leaf);
519362306a36Sopenharmony_ci		if (nritems == 0)
519462306a36Sopenharmony_ci			return 1;
519562306a36Sopenharmony_ci		if (path->slots[0] == nritems)
519662306a36Sopenharmony_ci			path->slots[0]--;
519762306a36Sopenharmony_ci
519862306a36Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
519962306a36Sopenharmony_ci		if (found_key.objectid < min_objectid)
520062306a36Sopenharmony_ci			break;
520162306a36Sopenharmony_ci		if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
520262306a36Sopenharmony_ci		    found_key.type == BTRFS_METADATA_ITEM_KEY)
520362306a36Sopenharmony_ci			return 0;
520462306a36Sopenharmony_ci		if (found_key.objectid == min_objectid &&
520562306a36Sopenharmony_ci		    found_key.type < BTRFS_EXTENT_ITEM_KEY)
520662306a36Sopenharmony_ci			break;
520762306a36Sopenharmony_ci	}
520862306a36Sopenharmony_ci	return 1;
520962306a36Sopenharmony_ci}
521062306a36Sopenharmony_ci
521162306a36Sopenharmony_ciint __init btrfs_ctree_init(void)
521262306a36Sopenharmony_ci{
521362306a36Sopenharmony_ci	btrfs_path_cachep = kmem_cache_create("btrfs_path",
521462306a36Sopenharmony_ci			sizeof(struct btrfs_path), 0,
521562306a36Sopenharmony_ci			SLAB_MEM_SPREAD, NULL);
521662306a36Sopenharmony_ci	if (!btrfs_path_cachep)
521762306a36Sopenharmony_ci		return -ENOMEM;
521862306a36Sopenharmony_ci	return 0;
521962306a36Sopenharmony_ci}
522062306a36Sopenharmony_ci
522162306a36Sopenharmony_civoid __cold btrfs_ctree_exit(void)
522262306a36Sopenharmony_ci{
522362306a36Sopenharmony_ci	kmem_cache_destroy(btrfs_path_cachep);
522462306a36Sopenharmony_ci}
5225