xref: /kernel/linux/linux-5.10/fs/btrfs/ctree.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2007,2008 Oracle.  All rights reserved.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/sched.h>
78c2ecf20Sopenharmony_ci#include <linux/slab.h>
88c2ecf20Sopenharmony_ci#include <linux/rbtree.h>
98c2ecf20Sopenharmony_ci#include <linux/mm.h>
108c2ecf20Sopenharmony_ci#include "ctree.h"
118c2ecf20Sopenharmony_ci#include "disk-io.h"
128c2ecf20Sopenharmony_ci#include "transaction.h"
138c2ecf20Sopenharmony_ci#include "print-tree.h"
148c2ecf20Sopenharmony_ci#include "locking.h"
158c2ecf20Sopenharmony_ci#include "volumes.h"
168c2ecf20Sopenharmony_ci#include "qgroup.h"
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_cistatic int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
198c2ecf20Sopenharmony_ci		      *root, struct btrfs_path *path, int level);
208c2ecf20Sopenharmony_cistatic int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root,
218c2ecf20Sopenharmony_ci		      const struct btrfs_key *ins_key, struct btrfs_path *path,
228c2ecf20Sopenharmony_ci		      int data_size, int extend);
238c2ecf20Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans,
248c2ecf20Sopenharmony_ci			  struct extent_buffer *dst,
258c2ecf20Sopenharmony_ci			  struct extent_buffer *src, int empty);
268c2ecf20Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans,
278c2ecf20Sopenharmony_ci			      struct extent_buffer *dst_buf,
288c2ecf20Sopenharmony_ci			      struct extent_buffer *src_buf);
298c2ecf20Sopenharmony_cistatic void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
308c2ecf20Sopenharmony_ci		    int level, int slot);
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_cistatic const struct btrfs_csums {
338c2ecf20Sopenharmony_ci	u16		size;
348c2ecf20Sopenharmony_ci	const char	name[10];
358c2ecf20Sopenharmony_ci	const char	driver[12];
368c2ecf20Sopenharmony_ci} btrfs_csums[] = {
378c2ecf20Sopenharmony_ci	[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
388c2ecf20Sopenharmony_ci	[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
398c2ecf20Sopenharmony_ci	[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
408c2ecf20Sopenharmony_ci	[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
418c2ecf20Sopenharmony_ci				     .driver = "blake2b-256" },
428c2ecf20Sopenharmony_ci};
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ciint btrfs_super_csum_size(const struct btrfs_super_block *s)
458c2ecf20Sopenharmony_ci{
468c2ecf20Sopenharmony_ci	u16 t = btrfs_super_csum_type(s);
478c2ecf20Sopenharmony_ci	/*
488c2ecf20Sopenharmony_ci	 * csum type is validated at mount time
498c2ecf20Sopenharmony_ci	 */
508c2ecf20Sopenharmony_ci	return btrfs_csums[t].size;
518c2ecf20Sopenharmony_ci}
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ciconst char *btrfs_super_csum_name(u16 csum_type)
548c2ecf20Sopenharmony_ci{
558c2ecf20Sopenharmony_ci	/* csum type is validated at mount time */
568c2ecf20Sopenharmony_ci	return btrfs_csums[csum_type].name;
578c2ecf20Sopenharmony_ci}
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci/*
608c2ecf20Sopenharmony_ci * Return driver name if defined, otherwise the name that's also a valid driver
618c2ecf20Sopenharmony_ci * name
628c2ecf20Sopenharmony_ci */
638c2ecf20Sopenharmony_ciconst char *btrfs_super_csum_driver(u16 csum_type)
648c2ecf20Sopenharmony_ci{
658c2ecf20Sopenharmony_ci	/* csum type is validated at mount time */
668c2ecf20Sopenharmony_ci	return btrfs_csums[csum_type].driver[0] ?
678c2ecf20Sopenharmony_ci		btrfs_csums[csum_type].driver :
688c2ecf20Sopenharmony_ci		btrfs_csums[csum_type].name;
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cisize_t __attribute_const__ btrfs_get_num_csums(void)
728c2ecf20Sopenharmony_ci{
738c2ecf20Sopenharmony_ci	return ARRAY_SIZE(btrfs_csums);
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistruct btrfs_path *btrfs_alloc_path(void)
778c2ecf20Sopenharmony_ci{
788c2ecf20Sopenharmony_ci	return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
798c2ecf20Sopenharmony_ci}
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci/* this also releases the path */
828c2ecf20Sopenharmony_civoid btrfs_free_path(struct btrfs_path *p)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	if (!p)
858c2ecf20Sopenharmony_ci		return;
868c2ecf20Sopenharmony_ci	btrfs_release_path(p);
878c2ecf20Sopenharmony_ci	kmem_cache_free(btrfs_path_cachep, p);
888c2ecf20Sopenharmony_ci}
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci/*
918c2ecf20Sopenharmony_ci * path release drops references on the extent buffers in the path
928c2ecf20Sopenharmony_ci * and it drops any locks held by this path
938c2ecf20Sopenharmony_ci *
948c2ecf20Sopenharmony_ci * It is safe to call this on paths that no locks or extent buffers held.
958c2ecf20Sopenharmony_ci */
968c2ecf20Sopenharmony_cinoinline void btrfs_release_path(struct btrfs_path *p)
978c2ecf20Sopenharmony_ci{
988c2ecf20Sopenharmony_ci	int i;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
1018c2ecf20Sopenharmony_ci		p->slots[i] = 0;
1028c2ecf20Sopenharmony_ci		if (!p->nodes[i])
1038c2ecf20Sopenharmony_ci			continue;
1048c2ecf20Sopenharmony_ci		if (p->locks[i]) {
1058c2ecf20Sopenharmony_ci			btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]);
1068c2ecf20Sopenharmony_ci			p->locks[i] = 0;
1078c2ecf20Sopenharmony_ci		}
1088c2ecf20Sopenharmony_ci		free_extent_buffer(p->nodes[i]);
1098c2ecf20Sopenharmony_ci		p->nodes[i] = NULL;
1108c2ecf20Sopenharmony_ci	}
1118c2ecf20Sopenharmony_ci}
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci/*
1148c2ecf20Sopenharmony_ci * safely gets a reference on the root node of a tree.  A lock
1158c2ecf20Sopenharmony_ci * is not taken, so a concurrent writer may put a different node
1168c2ecf20Sopenharmony_ci * at the root of the tree.  See btrfs_lock_root_node for the
1178c2ecf20Sopenharmony_ci * looping required.
1188c2ecf20Sopenharmony_ci *
1198c2ecf20Sopenharmony_ci * The extent buffer returned by this has a reference taken, so
1208c2ecf20Sopenharmony_ci * it won't disappear.  It may stop being the root of the tree
1218c2ecf20Sopenharmony_ci * at any time because there are no locks held.
1228c2ecf20Sopenharmony_ci */
1238c2ecf20Sopenharmony_cistruct extent_buffer *btrfs_root_node(struct btrfs_root *root)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	struct extent_buffer *eb;
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_ci	while (1) {
1288c2ecf20Sopenharmony_ci		rcu_read_lock();
1298c2ecf20Sopenharmony_ci		eb = rcu_dereference(root->node);
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci		/*
1328c2ecf20Sopenharmony_ci		 * RCU really hurts here, we could free up the root node because
1338c2ecf20Sopenharmony_ci		 * it was COWed but we may not get the new root node yet so do
1348c2ecf20Sopenharmony_ci		 * the inc_not_zero dance and if it doesn't work then
1358c2ecf20Sopenharmony_ci		 * synchronize_rcu and try again.
1368c2ecf20Sopenharmony_ci		 */
1378c2ecf20Sopenharmony_ci		if (atomic_inc_not_zero(&eb->refs)) {
1388c2ecf20Sopenharmony_ci			rcu_read_unlock();
1398c2ecf20Sopenharmony_ci			break;
1408c2ecf20Sopenharmony_ci		}
1418c2ecf20Sopenharmony_ci		rcu_read_unlock();
1428c2ecf20Sopenharmony_ci		synchronize_rcu();
1438c2ecf20Sopenharmony_ci	}
1448c2ecf20Sopenharmony_ci	return eb;
1458c2ecf20Sopenharmony_ci}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci/*
1488c2ecf20Sopenharmony_ci * Cowonly root (not-shareable trees, everything not subvolume or reloc roots),
1498c2ecf20Sopenharmony_ci * just get put onto a simple dirty list.  Transaction walks this list to make
1508c2ecf20Sopenharmony_ci * sure they get properly updated on disk.
1518c2ecf20Sopenharmony_ci */
1528c2ecf20Sopenharmony_cistatic void add_root_to_dirty_list(struct btrfs_root *root)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	if (test_bit(BTRFS_ROOT_DIRTY, &root->state) ||
1578c2ecf20Sopenharmony_ci	    !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state))
1588c2ecf20Sopenharmony_ci		return;
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_ci	spin_lock(&fs_info->trans_lock);
1618c2ecf20Sopenharmony_ci	if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
1628c2ecf20Sopenharmony_ci		/* Want the extent tree to be the last on the list */
1638c2ecf20Sopenharmony_ci		if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
1648c2ecf20Sopenharmony_ci			list_move_tail(&root->dirty_list,
1658c2ecf20Sopenharmony_ci				       &fs_info->dirty_cowonly_roots);
1668c2ecf20Sopenharmony_ci		else
1678c2ecf20Sopenharmony_ci			list_move(&root->dirty_list,
1688c2ecf20Sopenharmony_ci				  &fs_info->dirty_cowonly_roots);
1698c2ecf20Sopenharmony_ci	}
1708c2ecf20Sopenharmony_ci	spin_unlock(&fs_info->trans_lock);
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci/*
1748c2ecf20Sopenharmony_ci * used by snapshot creation to make a copy of a root for a tree with
1758c2ecf20Sopenharmony_ci * a given objectid.  The buffer with the new root node is returned in
1768c2ecf20Sopenharmony_ci * cow_ret, and this func returns zero on success or a negative error code.
1778c2ecf20Sopenharmony_ci */
1788c2ecf20Sopenharmony_ciint btrfs_copy_root(struct btrfs_trans_handle *trans,
1798c2ecf20Sopenharmony_ci		      struct btrfs_root *root,
1808c2ecf20Sopenharmony_ci		      struct extent_buffer *buf,
1818c2ecf20Sopenharmony_ci		      struct extent_buffer **cow_ret, u64 new_root_objectid)
1828c2ecf20Sopenharmony_ci{
1838c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
1848c2ecf20Sopenharmony_ci	struct extent_buffer *cow;
1858c2ecf20Sopenharmony_ci	int ret = 0;
1868c2ecf20Sopenharmony_ci	int level;
1878c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
1908c2ecf20Sopenharmony_ci		trans->transid != fs_info->running_transaction->transid);
1918c2ecf20Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
1928c2ecf20Sopenharmony_ci		trans->transid != root->last_trans);
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	level = btrfs_header_level(buf);
1958c2ecf20Sopenharmony_ci	if (level == 0)
1968c2ecf20Sopenharmony_ci		btrfs_item_key(buf, &disk_key, 0);
1978c2ecf20Sopenharmony_ci	else
1988c2ecf20Sopenharmony_ci		btrfs_node_key(buf, &disk_key, 0);
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid,
2018c2ecf20Sopenharmony_ci				     &disk_key, level, buf->start, 0,
2028c2ecf20Sopenharmony_ci				     BTRFS_NESTING_NEW_ROOT);
2038c2ecf20Sopenharmony_ci	if (IS_ERR(cow))
2048c2ecf20Sopenharmony_ci		return PTR_ERR(cow);
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci	copy_extent_buffer_full(cow, buf);
2078c2ecf20Sopenharmony_ci	btrfs_set_header_bytenr(cow, cow->start);
2088c2ecf20Sopenharmony_ci	btrfs_set_header_generation(cow, trans->transid);
2098c2ecf20Sopenharmony_ci	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
2108c2ecf20Sopenharmony_ci	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
2118c2ecf20Sopenharmony_ci				     BTRFS_HEADER_FLAG_RELOC);
2128c2ecf20Sopenharmony_ci	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
2138c2ecf20Sopenharmony_ci		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
2148c2ecf20Sopenharmony_ci	else
2158c2ecf20Sopenharmony_ci		btrfs_set_header_owner(cow, new_root_objectid);
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(buf) > trans->transid);
2208c2ecf20Sopenharmony_ci	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
2218c2ecf20Sopenharmony_ci		ret = btrfs_inc_ref(trans, root, cow, 1);
2228c2ecf20Sopenharmony_ci	else
2238c2ecf20Sopenharmony_ci		ret = btrfs_inc_ref(trans, root, cow, 0);
2248c2ecf20Sopenharmony_ci	if (ret) {
2258c2ecf20Sopenharmony_ci		btrfs_tree_unlock(cow);
2268c2ecf20Sopenharmony_ci		free_extent_buffer(cow);
2278c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
2288c2ecf20Sopenharmony_ci		return ret;
2298c2ecf20Sopenharmony_ci	}
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(cow);
2328c2ecf20Sopenharmony_ci	*cow_ret = cow;
2338c2ecf20Sopenharmony_ci	return 0;
2348c2ecf20Sopenharmony_ci}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_cienum mod_log_op {
2378c2ecf20Sopenharmony_ci	MOD_LOG_KEY_REPLACE,
2388c2ecf20Sopenharmony_ci	MOD_LOG_KEY_ADD,
2398c2ecf20Sopenharmony_ci	MOD_LOG_KEY_REMOVE,
2408c2ecf20Sopenharmony_ci	MOD_LOG_KEY_REMOVE_WHILE_FREEING,
2418c2ecf20Sopenharmony_ci	MOD_LOG_KEY_REMOVE_WHILE_MOVING,
2428c2ecf20Sopenharmony_ci	MOD_LOG_MOVE_KEYS,
2438c2ecf20Sopenharmony_ci	MOD_LOG_ROOT_REPLACE,
2448c2ecf20Sopenharmony_ci};
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_cistruct tree_mod_root {
2478c2ecf20Sopenharmony_ci	u64 logical;
2488c2ecf20Sopenharmony_ci	u8 level;
2498c2ecf20Sopenharmony_ci};
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_cistruct tree_mod_elem {
2528c2ecf20Sopenharmony_ci	struct rb_node node;
2538c2ecf20Sopenharmony_ci	u64 logical;
2548c2ecf20Sopenharmony_ci	u64 seq;
2558c2ecf20Sopenharmony_ci	enum mod_log_op op;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	/* this is used for MOD_LOG_KEY_* and MOD_LOG_MOVE_KEYS operations */
2588c2ecf20Sopenharmony_ci	int slot;
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	/* this is used for MOD_LOG_KEY* and MOD_LOG_ROOT_REPLACE */
2618c2ecf20Sopenharmony_ci	u64 generation;
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	/* those are used for op == MOD_LOG_KEY_{REPLACE,REMOVE} */
2648c2ecf20Sopenharmony_ci	struct btrfs_disk_key key;
2658c2ecf20Sopenharmony_ci	u64 blockptr;
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_ci	/* this is used for op == MOD_LOG_MOVE_KEYS */
2688c2ecf20Sopenharmony_ci	struct {
2698c2ecf20Sopenharmony_ci		int dst_slot;
2708c2ecf20Sopenharmony_ci		int nr_items;
2718c2ecf20Sopenharmony_ci	} move;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	/* this is used for op == MOD_LOG_ROOT_REPLACE */
2748c2ecf20Sopenharmony_ci	struct tree_mod_root old_root;
2758c2ecf20Sopenharmony_ci};
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci/*
2788c2ecf20Sopenharmony_ci * Pull a new tree mod seq number for our operation.
2798c2ecf20Sopenharmony_ci */
2808c2ecf20Sopenharmony_cistatic inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
2818c2ecf20Sopenharmony_ci{
2828c2ecf20Sopenharmony_ci	return atomic64_inc_return(&fs_info->tree_mod_seq);
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci/*
2868c2ecf20Sopenharmony_ci * This adds a new blocker to the tree mod log's blocker list if the @elem
2878c2ecf20Sopenharmony_ci * passed does not already have a sequence number set. So when a caller expects
2888c2ecf20Sopenharmony_ci * to record tree modifications, it should ensure to set elem->seq to zero
2898c2ecf20Sopenharmony_ci * before calling btrfs_get_tree_mod_seq.
2908c2ecf20Sopenharmony_ci * Returns a fresh, unused tree log modification sequence number, even if no new
2918c2ecf20Sopenharmony_ci * blocker was added.
2928c2ecf20Sopenharmony_ci */
2938c2ecf20Sopenharmony_ciu64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
2948c2ecf20Sopenharmony_ci			   struct seq_list *elem)
2958c2ecf20Sopenharmony_ci{
2968c2ecf20Sopenharmony_ci	write_lock(&fs_info->tree_mod_log_lock);
2978c2ecf20Sopenharmony_ci	if (!elem->seq) {
2988c2ecf20Sopenharmony_ci		elem->seq = btrfs_inc_tree_mod_seq(fs_info);
2998c2ecf20Sopenharmony_ci		list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
3008c2ecf20Sopenharmony_ci	}
3018c2ecf20Sopenharmony_ci	write_unlock(&fs_info->tree_mod_log_lock);
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	return elem->seq;
3048c2ecf20Sopenharmony_ci}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_civoid btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
3078c2ecf20Sopenharmony_ci			    struct seq_list *elem)
3088c2ecf20Sopenharmony_ci{
3098c2ecf20Sopenharmony_ci	struct rb_root *tm_root;
3108c2ecf20Sopenharmony_ci	struct rb_node *node;
3118c2ecf20Sopenharmony_ci	struct rb_node *next;
3128c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
3138c2ecf20Sopenharmony_ci	u64 min_seq = (u64)-1;
3148c2ecf20Sopenharmony_ci	u64 seq_putting = elem->seq;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	if (!seq_putting)
3178c2ecf20Sopenharmony_ci		return;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	write_lock(&fs_info->tree_mod_log_lock);
3208c2ecf20Sopenharmony_ci	list_del(&elem->list);
3218c2ecf20Sopenharmony_ci	elem->seq = 0;
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	if (!list_empty(&fs_info->tree_mod_seq_list)) {
3248c2ecf20Sopenharmony_ci		struct seq_list *first;
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci		first = list_first_entry(&fs_info->tree_mod_seq_list,
3278c2ecf20Sopenharmony_ci					 struct seq_list, list);
3288c2ecf20Sopenharmony_ci		if (seq_putting > first->seq) {
3298c2ecf20Sopenharmony_ci			/*
3308c2ecf20Sopenharmony_ci			 * Blocker with lower sequence number exists, we
3318c2ecf20Sopenharmony_ci			 * cannot remove anything from the log.
3328c2ecf20Sopenharmony_ci			 */
3338c2ecf20Sopenharmony_ci			write_unlock(&fs_info->tree_mod_log_lock);
3348c2ecf20Sopenharmony_ci			return;
3358c2ecf20Sopenharmony_ci		}
3368c2ecf20Sopenharmony_ci		min_seq = first->seq;
3378c2ecf20Sopenharmony_ci	}
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	/*
3408c2ecf20Sopenharmony_ci	 * anything that's lower than the lowest existing (read: blocked)
3418c2ecf20Sopenharmony_ci	 * sequence number can be removed from the tree.
3428c2ecf20Sopenharmony_ci	 */
3438c2ecf20Sopenharmony_ci	tm_root = &fs_info->tree_mod_log;
3448c2ecf20Sopenharmony_ci	for (node = rb_first(tm_root); node; node = next) {
3458c2ecf20Sopenharmony_ci		next = rb_next(node);
3468c2ecf20Sopenharmony_ci		tm = rb_entry(node, struct tree_mod_elem, node);
3478c2ecf20Sopenharmony_ci		if (tm->seq >= min_seq)
3488c2ecf20Sopenharmony_ci			continue;
3498c2ecf20Sopenharmony_ci		rb_erase(node, tm_root);
3508c2ecf20Sopenharmony_ci		kfree(tm);
3518c2ecf20Sopenharmony_ci	}
3528c2ecf20Sopenharmony_ci	write_unlock(&fs_info->tree_mod_log_lock);
3538c2ecf20Sopenharmony_ci}
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci/*
3568c2ecf20Sopenharmony_ci * key order of the log:
3578c2ecf20Sopenharmony_ci *       node/leaf start address -> sequence
3588c2ecf20Sopenharmony_ci *
3598c2ecf20Sopenharmony_ci * The 'start address' is the logical address of the *new* root node
3608c2ecf20Sopenharmony_ci * for root replace operations, or the logical address of the affected
3618c2ecf20Sopenharmony_ci * block for all other operations.
3628c2ecf20Sopenharmony_ci */
3638c2ecf20Sopenharmony_cistatic noinline int
3648c2ecf20Sopenharmony_ci__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
3658c2ecf20Sopenharmony_ci{
3668c2ecf20Sopenharmony_ci	struct rb_root *tm_root;
3678c2ecf20Sopenharmony_ci	struct rb_node **new;
3688c2ecf20Sopenharmony_ci	struct rb_node *parent = NULL;
3698c2ecf20Sopenharmony_ci	struct tree_mod_elem *cur;
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	lockdep_assert_held_write(&fs_info->tree_mod_log_lock);
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	tm->seq = btrfs_inc_tree_mod_seq(fs_info);
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	tm_root = &fs_info->tree_mod_log;
3768c2ecf20Sopenharmony_ci	new = &tm_root->rb_node;
3778c2ecf20Sopenharmony_ci	while (*new) {
3788c2ecf20Sopenharmony_ci		cur = rb_entry(*new, struct tree_mod_elem, node);
3798c2ecf20Sopenharmony_ci		parent = *new;
3808c2ecf20Sopenharmony_ci		if (cur->logical < tm->logical)
3818c2ecf20Sopenharmony_ci			new = &((*new)->rb_left);
3828c2ecf20Sopenharmony_ci		else if (cur->logical > tm->logical)
3838c2ecf20Sopenharmony_ci			new = &((*new)->rb_right);
3848c2ecf20Sopenharmony_ci		else if (cur->seq < tm->seq)
3858c2ecf20Sopenharmony_ci			new = &((*new)->rb_left);
3868c2ecf20Sopenharmony_ci		else if (cur->seq > tm->seq)
3878c2ecf20Sopenharmony_ci			new = &((*new)->rb_right);
3888c2ecf20Sopenharmony_ci		else
3898c2ecf20Sopenharmony_ci			return -EEXIST;
3908c2ecf20Sopenharmony_ci	}
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	rb_link_node(&tm->node, parent, new);
3938c2ecf20Sopenharmony_ci	rb_insert_color(&tm->node, tm_root);
3948c2ecf20Sopenharmony_ci	return 0;
3958c2ecf20Sopenharmony_ci}
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci/*
3988c2ecf20Sopenharmony_ci * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it
3998c2ecf20Sopenharmony_ci * returns zero with the tree_mod_log_lock acquired. The caller must hold
4008c2ecf20Sopenharmony_ci * this until all tree mod log insertions are recorded in the rb tree and then
4018c2ecf20Sopenharmony_ci * write unlock fs_info::tree_mod_log_lock.
4028c2ecf20Sopenharmony_ci */
4038c2ecf20Sopenharmony_cistatic inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
4048c2ecf20Sopenharmony_ci				    struct extent_buffer *eb) {
4058c2ecf20Sopenharmony_ci	smp_mb();
4068c2ecf20Sopenharmony_ci	if (list_empty(&(fs_info)->tree_mod_seq_list))
4078c2ecf20Sopenharmony_ci		return 1;
4088c2ecf20Sopenharmony_ci	if (eb && btrfs_header_level(eb) == 0)
4098c2ecf20Sopenharmony_ci		return 1;
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	write_lock(&fs_info->tree_mod_log_lock);
4128c2ecf20Sopenharmony_ci	if (list_empty(&(fs_info)->tree_mod_seq_list)) {
4138c2ecf20Sopenharmony_ci		write_unlock(&fs_info->tree_mod_log_lock);
4148c2ecf20Sopenharmony_ci		return 1;
4158c2ecf20Sopenharmony_ci	}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	return 0;
4188c2ecf20Sopenharmony_ci}
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
4218c2ecf20Sopenharmony_cistatic inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
4228c2ecf20Sopenharmony_ci				    struct extent_buffer *eb)
4238c2ecf20Sopenharmony_ci{
4248c2ecf20Sopenharmony_ci	smp_mb();
4258c2ecf20Sopenharmony_ci	if (list_empty(&(fs_info)->tree_mod_seq_list))
4268c2ecf20Sopenharmony_ci		return 0;
4278c2ecf20Sopenharmony_ci	if (eb && btrfs_header_level(eb) == 0)
4288c2ecf20Sopenharmony_ci		return 0;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	return 1;
4318c2ecf20Sopenharmony_ci}
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_cistatic struct tree_mod_elem *
4348c2ecf20Sopenharmony_cialloc_tree_mod_elem(struct extent_buffer *eb, int slot,
4358c2ecf20Sopenharmony_ci		    enum mod_log_op op, gfp_t flags)
4368c2ecf20Sopenharmony_ci{
4378c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	tm = kzalloc(sizeof(*tm), flags);
4408c2ecf20Sopenharmony_ci	if (!tm)
4418c2ecf20Sopenharmony_ci		return NULL;
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	tm->logical = eb->start;
4448c2ecf20Sopenharmony_ci	if (op != MOD_LOG_KEY_ADD) {
4458c2ecf20Sopenharmony_ci		btrfs_node_key(eb, &tm->key, slot);
4468c2ecf20Sopenharmony_ci		tm->blockptr = btrfs_node_blockptr(eb, slot);
4478c2ecf20Sopenharmony_ci	}
4488c2ecf20Sopenharmony_ci	tm->op = op;
4498c2ecf20Sopenharmony_ci	tm->slot = slot;
4508c2ecf20Sopenharmony_ci	tm->generation = btrfs_node_ptr_generation(eb, slot);
4518c2ecf20Sopenharmony_ci	RB_CLEAR_NODE(&tm->node);
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	return tm;
4548c2ecf20Sopenharmony_ci}
4558c2ecf20Sopenharmony_ci
4568c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
4578c2ecf20Sopenharmony_ci		enum mod_log_op op, gfp_t flags)
4588c2ecf20Sopenharmony_ci{
4598c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
4608c2ecf20Sopenharmony_ci	int ret;
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	if (!tree_mod_need_log(eb->fs_info, eb))
4638c2ecf20Sopenharmony_ci		return 0;
4648c2ecf20Sopenharmony_ci
4658c2ecf20Sopenharmony_ci	tm = alloc_tree_mod_elem(eb, slot, op, flags);
4668c2ecf20Sopenharmony_ci	if (!tm)
4678c2ecf20Sopenharmony_ci		return -ENOMEM;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	if (tree_mod_dont_log(eb->fs_info, eb)) {
4708c2ecf20Sopenharmony_ci		kfree(tm);
4718c2ecf20Sopenharmony_ci		return 0;
4728c2ecf20Sopenharmony_ci	}
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	ret = __tree_mod_log_insert(eb->fs_info, tm);
4758c2ecf20Sopenharmony_ci	write_unlock(&eb->fs_info->tree_mod_log_lock);
4768c2ecf20Sopenharmony_ci	if (ret)
4778c2ecf20Sopenharmony_ci		kfree(tm);
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_ci	return ret;
4808c2ecf20Sopenharmony_ci}
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_insert_move(struct extent_buffer *eb,
4838c2ecf20Sopenharmony_ci		int dst_slot, int src_slot, int nr_items)
4848c2ecf20Sopenharmony_ci{
4858c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm = NULL;
4868c2ecf20Sopenharmony_ci	struct tree_mod_elem **tm_list = NULL;
4878c2ecf20Sopenharmony_ci	int ret = 0;
4888c2ecf20Sopenharmony_ci	int i;
4898c2ecf20Sopenharmony_ci	int locked = 0;
4908c2ecf20Sopenharmony_ci
4918c2ecf20Sopenharmony_ci	if (!tree_mod_need_log(eb->fs_info, eb))
4928c2ecf20Sopenharmony_ci		return 0;
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
4958c2ecf20Sopenharmony_ci	if (!tm_list)
4968c2ecf20Sopenharmony_ci		return -ENOMEM;
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci	tm = kzalloc(sizeof(*tm), GFP_NOFS);
4998c2ecf20Sopenharmony_ci	if (!tm) {
5008c2ecf20Sopenharmony_ci		ret = -ENOMEM;
5018c2ecf20Sopenharmony_ci		goto free_tms;
5028c2ecf20Sopenharmony_ci	}
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	tm->logical = eb->start;
5058c2ecf20Sopenharmony_ci	tm->slot = src_slot;
5068c2ecf20Sopenharmony_ci	tm->move.dst_slot = dst_slot;
5078c2ecf20Sopenharmony_ci	tm->move.nr_items = nr_items;
5088c2ecf20Sopenharmony_ci	tm->op = MOD_LOG_MOVE_KEYS;
5098c2ecf20Sopenharmony_ci
5108c2ecf20Sopenharmony_ci	for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
5118c2ecf20Sopenharmony_ci		tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
5128c2ecf20Sopenharmony_ci		    MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
5138c2ecf20Sopenharmony_ci		if (!tm_list[i]) {
5148c2ecf20Sopenharmony_ci			ret = -ENOMEM;
5158c2ecf20Sopenharmony_ci			goto free_tms;
5168c2ecf20Sopenharmony_ci		}
5178c2ecf20Sopenharmony_ci	}
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	if (tree_mod_dont_log(eb->fs_info, eb))
5208c2ecf20Sopenharmony_ci		goto free_tms;
5218c2ecf20Sopenharmony_ci	locked = 1;
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci	/*
5248c2ecf20Sopenharmony_ci	 * When we override something during the move, we log these removals.
5258c2ecf20Sopenharmony_ci	 * This can only happen when we move towards the beginning of the
5268c2ecf20Sopenharmony_ci	 * buffer, i.e. dst_slot < src_slot.
5278c2ecf20Sopenharmony_ci	 */
5288c2ecf20Sopenharmony_ci	for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
5298c2ecf20Sopenharmony_ci		ret = __tree_mod_log_insert(eb->fs_info, tm_list[i]);
5308c2ecf20Sopenharmony_ci		if (ret)
5318c2ecf20Sopenharmony_ci			goto free_tms;
5328c2ecf20Sopenharmony_ci	}
5338c2ecf20Sopenharmony_ci
5348c2ecf20Sopenharmony_ci	ret = __tree_mod_log_insert(eb->fs_info, tm);
5358c2ecf20Sopenharmony_ci	if (ret)
5368c2ecf20Sopenharmony_ci		goto free_tms;
5378c2ecf20Sopenharmony_ci	write_unlock(&eb->fs_info->tree_mod_log_lock);
5388c2ecf20Sopenharmony_ci	kfree(tm_list);
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci	return 0;
5418c2ecf20Sopenharmony_cifree_tms:
5428c2ecf20Sopenharmony_ci	for (i = 0; i < nr_items; i++) {
5438c2ecf20Sopenharmony_ci		if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
5448c2ecf20Sopenharmony_ci			rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
5458c2ecf20Sopenharmony_ci		kfree(tm_list[i]);
5468c2ecf20Sopenharmony_ci	}
5478c2ecf20Sopenharmony_ci	if (locked)
5488c2ecf20Sopenharmony_ci		write_unlock(&eb->fs_info->tree_mod_log_lock);
5498c2ecf20Sopenharmony_ci	kfree(tm_list);
5508c2ecf20Sopenharmony_ci	kfree(tm);
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	return ret;
5538c2ecf20Sopenharmony_ci}
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_cistatic inline int
5568c2ecf20Sopenharmony_ci__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
5578c2ecf20Sopenharmony_ci		       struct tree_mod_elem **tm_list,
5588c2ecf20Sopenharmony_ci		       int nritems)
5598c2ecf20Sopenharmony_ci{
5608c2ecf20Sopenharmony_ci	int i, j;
5618c2ecf20Sopenharmony_ci	int ret;
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	for (i = nritems - 1; i >= 0; i--) {
5648c2ecf20Sopenharmony_ci		ret = __tree_mod_log_insert(fs_info, tm_list[i]);
5658c2ecf20Sopenharmony_ci		if (ret) {
5668c2ecf20Sopenharmony_ci			for (j = nritems - 1; j > i; j--)
5678c2ecf20Sopenharmony_ci				rb_erase(&tm_list[j]->node,
5688c2ecf20Sopenharmony_ci					 &fs_info->tree_mod_log);
5698c2ecf20Sopenharmony_ci			return ret;
5708c2ecf20Sopenharmony_ci		}
5718c2ecf20Sopenharmony_ci	}
5728c2ecf20Sopenharmony_ci
5738c2ecf20Sopenharmony_ci	return 0;
5748c2ecf20Sopenharmony_ci}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_insert_root(struct extent_buffer *old_root,
5778c2ecf20Sopenharmony_ci			 struct extent_buffer *new_root, int log_removal)
5788c2ecf20Sopenharmony_ci{
5798c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = old_root->fs_info;
5808c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm = NULL;
5818c2ecf20Sopenharmony_ci	struct tree_mod_elem **tm_list = NULL;
5828c2ecf20Sopenharmony_ci	int nritems = 0;
5838c2ecf20Sopenharmony_ci	int ret = 0;
5848c2ecf20Sopenharmony_ci	int i;
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	if (!tree_mod_need_log(fs_info, NULL))
5878c2ecf20Sopenharmony_ci		return 0;
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	if (log_removal && btrfs_header_level(old_root) > 0) {
5908c2ecf20Sopenharmony_ci		nritems = btrfs_header_nritems(old_root);
5918c2ecf20Sopenharmony_ci		tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *),
5928c2ecf20Sopenharmony_ci				  GFP_NOFS);
5938c2ecf20Sopenharmony_ci		if (!tm_list) {
5948c2ecf20Sopenharmony_ci			ret = -ENOMEM;
5958c2ecf20Sopenharmony_ci			goto free_tms;
5968c2ecf20Sopenharmony_ci		}
5978c2ecf20Sopenharmony_ci		for (i = 0; i < nritems; i++) {
5988c2ecf20Sopenharmony_ci			tm_list[i] = alloc_tree_mod_elem(old_root, i,
5998c2ecf20Sopenharmony_ci			    MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
6008c2ecf20Sopenharmony_ci			if (!tm_list[i]) {
6018c2ecf20Sopenharmony_ci				ret = -ENOMEM;
6028c2ecf20Sopenharmony_ci				goto free_tms;
6038c2ecf20Sopenharmony_ci			}
6048c2ecf20Sopenharmony_ci		}
6058c2ecf20Sopenharmony_ci	}
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	tm = kzalloc(sizeof(*tm), GFP_NOFS);
6088c2ecf20Sopenharmony_ci	if (!tm) {
6098c2ecf20Sopenharmony_ci		ret = -ENOMEM;
6108c2ecf20Sopenharmony_ci		goto free_tms;
6118c2ecf20Sopenharmony_ci	}
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci	tm->logical = new_root->start;
6148c2ecf20Sopenharmony_ci	tm->old_root.logical = old_root->start;
6158c2ecf20Sopenharmony_ci	tm->old_root.level = btrfs_header_level(old_root);
6168c2ecf20Sopenharmony_ci	tm->generation = btrfs_header_generation(old_root);
6178c2ecf20Sopenharmony_ci	tm->op = MOD_LOG_ROOT_REPLACE;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	if (tree_mod_dont_log(fs_info, NULL))
6208c2ecf20Sopenharmony_ci		goto free_tms;
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_ci	if (tm_list)
6238c2ecf20Sopenharmony_ci		ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
6248c2ecf20Sopenharmony_ci	if (!ret)
6258c2ecf20Sopenharmony_ci		ret = __tree_mod_log_insert(fs_info, tm);
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci	write_unlock(&fs_info->tree_mod_log_lock);
6288c2ecf20Sopenharmony_ci	if (ret)
6298c2ecf20Sopenharmony_ci		goto free_tms;
6308c2ecf20Sopenharmony_ci	kfree(tm_list);
6318c2ecf20Sopenharmony_ci
6328c2ecf20Sopenharmony_ci	return ret;
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_cifree_tms:
6358c2ecf20Sopenharmony_ci	if (tm_list) {
6368c2ecf20Sopenharmony_ci		for (i = 0; i < nritems; i++)
6378c2ecf20Sopenharmony_ci			kfree(tm_list[i]);
6388c2ecf20Sopenharmony_ci		kfree(tm_list);
6398c2ecf20Sopenharmony_ci	}
6408c2ecf20Sopenharmony_ci	kfree(tm);
6418c2ecf20Sopenharmony_ci
6428c2ecf20Sopenharmony_ci	return ret;
6438c2ecf20Sopenharmony_ci}
6448c2ecf20Sopenharmony_ci
6458c2ecf20Sopenharmony_cistatic struct tree_mod_elem *
6468c2ecf20Sopenharmony_ci__tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
6478c2ecf20Sopenharmony_ci		      int smallest)
6488c2ecf20Sopenharmony_ci{
6498c2ecf20Sopenharmony_ci	struct rb_root *tm_root;
6508c2ecf20Sopenharmony_ci	struct rb_node *node;
6518c2ecf20Sopenharmony_ci	struct tree_mod_elem *cur = NULL;
6528c2ecf20Sopenharmony_ci	struct tree_mod_elem *found = NULL;
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	read_lock(&fs_info->tree_mod_log_lock);
6558c2ecf20Sopenharmony_ci	tm_root = &fs_info->tree_mod_log;
6568c2ecf20Sopenharmony_ci	node = tm_root->rb_node;
6578c2ecf20Sopenharmony_ci	while (node) {
6588c2ecf20Sopenharmony_ci		cur = rb_entry(node, struct tree_mod_elem, node);
6598c2ecf20Sopenharmony_ci		if (cur->logical < start) {
6608c2ecf20Sopenharmony_ci			node = node->rb_left;
6618c2ecf20Sopenharmony_ci		} else if (cur->logical > start) {
6628c2ecf20Sopenharmony_ci			node = node->rb_right;
6638c2ecf20Sopenharmony_ci		} else if (cur->seq < min_seq) {
6648c2ecf20Sopenharmony_ci			node = node->rb_left;
6658c2ecf20Sopenharmony_ci		} else if (!smallest) {
6668c2ecf20Sopenharmony_ci			/* we want the node with the highest seq */
6678c2ecf20Sopenharmony_ci			if (found)
6688c2ecf20Sopenharmony_ci				BUG_ON(found->seq > cur->seq);
6698c2ecf20Sopenharmony_ci			found = cur;
6708c2ecf20Sopenharmony_ci			node = node->rb_left;
6718c2ecf20Sopenharmony_ci		} else if (cur->seq > min_seq) {
6728c2ecf20Sopenharmony_ci			/* we want the node with the smallest seq */
6738c2ecf20Sopenharmony_ci			if (found)
6748c2ecf20Sopenharmony_ci				BUG_ON(found->seq < cur->seq);
6758c2ecf20Sopenharmony_ci			found = cur;
6768c2ecf20Sopenharmony_ci			node = node->rb_right;
6778c2ecf20Sopenharmony_ci		} else {
6788c2ecf20Sopenharmony_ci			found = cur;
6798c2ecf20Sopenharmony_ci			break;
6808c2ecf20Sopenharmony_ci		}
6818c2ecf20Sopenharmony_ci	}
6828c2ecf20Sopenharmony_ci	read_unlock(&fs_info->tree_mod_log_lock);
6838c2ecf20Sopenharmony_ci
6848c2ecf20Sopenharmony_ci	return found;
6858c2ecf20Sopenharmony_ci}
6868c2ecf20Sopenharmony_ci
6878c2ecf20Sopenharmony_ci/*
6888c2ecf20Sopenharmony_ci * this returns the element from the log with the smallest time sequence
6898c2ecf20Sopenharmony_ci * value that's in the log (the oldest log item). any element with a time
6908c2ecf20Sopenharmony_ci * sequence lower than min_seq will be ignored.
6918c2ecf20Sopenharmony_ci */
6928c2ecf20Sopenharmony_cistatic struct tree_mod_elem *
6938c2ecf20Sopenharmony_citree_mod_log_search_oldest(struct btrfs_fs_info *fs_info, u64 start,
6948c2ecf20Sopenharmony_ci			   u64 min_seq)
6958c2ecf20Sopenharmony_ci{
6968c2ecf20Sopenharmony_ci	return __tree_mod_log_search(fs_info, start, min_seq, 1);
6978c2ecf20Sopenharmony_ci}
6988c2ecf20Sopenharmony_ci
6998c2ecf20Sopenharmony_ci/*
7008c2ecf20Sopenharmony_ci * this returns the element from the log with the largest time sequence
7018c2ecf20Sopenharmony_ci * value that's in the log (the most recent log item). any element with
7028c2ecf20Sopenharmony_ci * a time sequence lower than min_seq will be ignored.
7038c2ecf20Sopenharmony_ci */
7048c2ecf20Sopenharmony_cistatic struct tree_mod_elem *
7058c2ecf20Sopenharmony_citree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
7068c2ecf20Sopenharmony_ci{
7078c2ecf20Sopenharmony_ci	return __tree_mod_log_search(fs_info, start, min_seq, 0);
7088c2ecf20Sopenharmony_ci}
7098c2ecf20Sopenharmony_ci
7108c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_eb_copy(struct extent_buffer *dst,
7118c2ecf20Sopenharmony_ci		     struct extent_buffer *src, unsigned long dst_offset,
7128c2ecf20Sopenharmony_ci		     unsigned long src_offset, int nr_items)
7138c2ecf20Sopenharmony_ci{
7148c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = dst->fs_info;
7158c2ecf20Sopenharmony_ci	int ret = 0;
7168c2ecf20Sopenharmony_ci	struct tree_mod_elem **tm_list = NULL;
7178c2ecf20Sopenharmony_ci	struct tree_mod_elem **tm_list_add, **tm_list_rem;
7188c2ecf20Sopenharmony_ci	int i;
7198c2ecf20Sopenharmony_ci	int locked = 0;
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	if (!tree_mod_need_log(fs_info, NULL))
7228c2ecf20Sopenharmony_ci		return 0;
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci	if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
7258c2ecf20Sopenharmony_ci		return 0;
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
7288c2ecf20Sopenharmony_ci			  GFP_NOFS);
7298c2ecf20Sopenharmony_ci	if (!tm_list)
7308c2ecf20Sopenharmony_ci		return -ENOMEM;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci	tm_list_add = tm_list;
7338c2ecf20Sopenharmony_ci	tm_list_rem = tm_list + nr_items;
7348c2ecf20Sopenharmony_ci	for (i = 0; i < nr_items; i++) {
7358c2ecf20Sopenharmony_ci		tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
7368c2ecf20Sopenharmony_ci		    MOD_LOG_KEY_REMOVE, GFP_NOFS);
7378c2ecf20Sopenharmony_ci		if (!tm_list_rem[i]) {
7388c2ecf20Sopenharmony_ci			ret = -ENOMEM;
7398c2ecf20Sopenharmony_ci			goto free_tms;
7408c2ecf20Sopenharmony_ci		}
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci		tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
7438c2ecf20Sopenharmony_ci		    MOD_LOG_KEY_ADD, GFP_NOFS);
7448c2ecf20Sopenharmony_ci		if (!tm_list_add[i]) {
7458c2ecf20Sopenharmony_ci			ret = -ENOMEM;
7468c2ecf20Sopenharmony_ci			goto free_tms;
7478c2ecf20Sopenharmony_ci		}
7488c2ecf20Sopenharmony_ci	}
7498c2ecf20Sopenharmony_ci
7508c2ecf20Sopenharmony_ci	if (tree_mod_dont_log(fs_info, NULL))
7518c2ecf20Sopenharmony_ci		goto free_tms;
7528c2ecf20Sopenharmony_ci	locked = 1;
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	for (i = 0; i < nr_items; i++) {
7558c2ecf20Sopenharmony_ci		ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
7568c2ecf20Sopenharmony_ci		if (ret)
7578c2ecf20Sopenharmony_ci			goto free_tms;
7588c2ecf20Sopenharmony_ci		ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
7598c2ecf20Sopenharmony_ci		if (ret)
7608c2ecf20Sopenharmony_ci			goto free_tms;
7618c2ecf20Sopenharmony_ci	}
7628c2ecf20Sopenharmony_ci
7638c2ecf20Sopenharmony_ci	write_unlock(&fs_info->tree_mod_log_lock);
7648c2ecf20Sopenharmony_ci	kfree(tm_list);
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	return 0;
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_cifree_tms:
7698c2ecf20Sopenharmony_ci	for (i = 0; i < nr_items * 2; i++) {
7708c2ecf20Sopenharmony_ci		if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
7718c2ecf20Sopenharmony_ci			rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
7728c2ecf20Sopenharmony_ci		kfree(tm_list[i]);
7738c2ecf20Sopenharmony_ci	}
7748c2ecf20Sopenharmony_ci	if (locked)
7758c2ecf20Sopenharmony_ci		write_unlock(&fs_info->tree_mod_log_lock);
7768c2ecf20Sopenharmony_ci	kfree(tm_list);
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	return ret;
7798c2ecf20Sopenharmony_ci}
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_cistatic noinline int tree_mod_log_free_eb(struct extent_buffer *eb)
7828c2ecf20Sopenharmony_ci{
7838c2ecf20Sopenharmony_ci	struct tree_mod_elem **tm_list = NULL;
7848c2ecf20Sopenharmony_ci	int nritems = 0;
7858c2ecf20Sopenharmony_ci	int i;
7868c2ecf20Sopenharmony_ci	int ret = 0;
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci	if (btrfs_header_level(eb) == 0)
7898c2ecf20Sopenharmony_ci		return 0;
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	if (!tree_mod_need_log(eb->fs_info, NULL))
7928c2ecf20Sopenharmony_ci		return 0;
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(eb);
7958c2ecf20Sopenharmony_ci	tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
7968c2ecf20Sopenharmony_ci	if (!tm_list)
7978c2ecf20Sopenharmony_ci		return -ENOMEM;
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci	for (i = 0; i < nritems; i++) {
8008c2ecf20Sopenharmony_ci		tm_list[i] = alloc_tree_mod_elem(eb, i,
8018c2ecf20Sopenharmony_ci		    MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
8028c2ecf20Sopenharmony_ci		if (!tm_list[i]) {
8038c2ecf20Sopenharmony_ci			ret = -ENOMEM;
8048c2ecf20Sopenharmony_ci			goto free_tms;
8058c2ecf20Sopenharmony_ci		}
8068c2ecf20Sopenharmony_ci	}
8078c2ecf20Sopenharmony_ci
8088c2ecf20Sopenharmony_ci	if (tree_mod_dont_log(eb->fs_info, eb))
8098c2ecf20Sopenharmony_ci		goto free_tms;
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci	ret = __tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
8128c2ecf20Sopenharmony_ci	write_unlock(&eb->fs_info->tree_mod_log_lock);
8138c2ecf20Sopenharmony_ci	if (ret)
8148c2ecf20Sopenharmony_ci		goto free_tms;
8158c2ecf20Sopenharmony_ci	kfree(tm_list);
8168c2ecf20Sopenharmony_ci
8178c2ecf20Sopenharmony_ci	return 0;
8188c2ecf20Sopenharmony_ci
8198c2ecf20Sopenharmony_cifree_tms:
8208c2ecf20Sopenharmony_ci	for (i = 0; i < nritems; i++)
8218c2ecf20Sopenharmony_ci		kfree(tm_list[i]);
8228c2ecf20Sopenharmony_ci	kfree(tm_list);
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	return ret;
8258c2ecf20Sopenharmony_ci}
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci/*
8288c2ecf20Sopenharmony_ci * check if the tree block can be shared by multiple trees
8298c2ecf20Sopenharmony_ci */
8308c2ecf20Sopenharmony_ciint btrfs_block_can_be_shared(struct btrfs_root *root,
8318c2ecf20Sopenharmony_ci			      struct extent_buffer *buf)
8328c2ecf20Sopenharmony_ci{
8338c2ecf20Sopenharmony_ci	/*
8348c2ecf20Sopenharmony_ci	 * Tree blocks not in shareable trees and tree roots are never shared.
8358c2ecf20Sopenharmony_ci	 * If a block was allocated after the last snapshot and the block was
8368c2ecf20Sopenharmony_ci	 * not allocated by tree relocation, we know the block is not shared.
8378c2ecf20Sopenharmony_ci	 */
8388c2ecf20Sopenharmony_ci	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
8398c2ecf20Sopenharmony_ci	    buf != root->node && buf != root->commit_root &&
8408c2ecf20Sopenharmony_ci	    (btrfs_header_generation(buf) <=
8418c2ecf20Sopenharmony_ci	     btrfs_root_last_snapshot(&root->root_item) ||
8428c2ecf20Sopenharmony_ci	     btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
8438c2ecf20Sopenharmony_ci		return 1;
8448c2ecf20Sopenharmony_ci
8458c2ecf20Sopenharmony_ci	return 0;
8468c2ecf20Sopenharmony_ci}
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_cistatic noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
8498c2ecf20Sopenharmony_ci				       struct btrfs_root *root,
8508c2ecf20Sopenharmony_ci				       struct extent_buffer *buf,
8518c2ecf20Sopenharmony_ci				       struct extent_buffer *cow,
8528c2ecf20Sopenharmony_ci				       int *last_ref)
8538c2ecf20Sopenharmony_ci{
8548c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
8558c2ecf20Sopenharmony_ci	u64 refs;
8568c2ecf20Sopenharmony_ci	u64 owner;
8578c2ecf20Sopenharmony_ci	u64 flags;
8588c2ecf20Sopenharmony_ci	u64 new_flags = 0;
8598c2ecf20Sopenharmony_ci	int ret;
8608c2ecf20Sopenharmony_ci
8618c2ecf20Sopenharmony_ci	/*
8628c2ecf20Sopenharmony_ci	 * Backrefs update rules:
8638c2ecf20Sopenharmony_ci	 *
8648c2ecf20Sopenharmony_ci	 * Always use full backrefs for extent pointers in tree block
8658c2ecf20Sopenharmony_ci	 * allocated by tree relocation.
8668c2ecf20Sopenharmony_ci	 *
8678c2ecf20Sopenharmony_ci	 * If a shared tree block is no longer referenced by its owner
8688c2ecf20Sopenharmony_ci	 * tree (btrfs_header_owner(buf) == root->root_key.objectid),
8698c2ecf20Sopenharmony_ci	 * use full backrefs for extent pointers in tree block.
8708c2ecf20Sopenharmony_ci	 *
8718c2ecf20Sopenharmony_ci	 * If a tree block is been relocating
8728c2ecf20Sopenharmony_ci	 * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
8738c2ecf20Sopenharmony_ci	 * use full backrefs for extent pointers in tree block.
8748c2ecf20Sopenharmony_ci	 * The reason for this is some operations (such as drop tree)
8758c2ecf20Sopenharmony_ci	 * are only allowed for blocks use full backrefs.
8768c2ecf20Sopenharmony_ci	 */
8778c2ecf20Sopenharmony_ci
8788c2ecf20Sopenharmony_ci	if (btrfs_block_can_be_shared(root, buf)) {
8798c2ecf20Sopenharmony_ci		ret = btrfs_lookup_extent_info(trans, fs_info, buf->start,
8808c2ecf20Sopenharmony_ci					       btrfs_header_level(buf), 1,
8818c2ecf20Sopenharmony_ci					       &refs, &flags);
8828c2ecf20Sopenharmony_ci		if (ret)
8838c2ecf20Sopenharmony_ci			return ret;
8848c2ecf20Sopenharmony_ci		if (refs == 0) {
8858c2ecf20Sopenharmony_ci			ret = -EROFS;
8868c2ecf20Sopenharmony_ci			btrfs_handle_fs_error(fs_info, ret, NULL);
8878c2ecf20Sopenharmony_ci			return ret;
8888c2ecf20Sopenharmony_ci		}
8898c2ecf20Sopenharmony_ci	} else {
8908c2ecf20Sopenharmony_ci		refs = 1;
8918c2ecf20Sopenharmony_ci		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
8928c2ecf20Sopenharmony_ci		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
8938c2ecf20Sopenharmony_ci			flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
8948c2ecf20Sopenharmony_ci		else
8958c2ecf20Sopenharmony_ci			flags = 0;
8968c2ecf20Sopenharmony_ci	}
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	owner = btrfs_header_owner(buf);
8998c2ecf20Sopenharmony_ci	BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
9008c2ecf20Sopenharmony_ci	       !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	if (refs > 1) {
9038c2ecf20Sopenharmony_ci		if ((owner == root->root_key.objectid ||
9048c2ecf20Sopenharmony_ci		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
9058c2ecf20Sopenharmony_ci		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
9068c2ecf20Sopenharmony_ci			ret = btrfs_inc_ref(trans, root, buf, 1);
9078c2ecf20Sopenharmony_ci			if (ret)
9088c2ecf20Sopenharmony_ci				return ret;
9098c2ecf20Sopenharmony_ci
9108c2ecf20Sopenharmony_ci			if (root->root_key.objectid ==
9118c2ecf20Sopenharmony_ci			    BTRFS_TREE_RELOC_OBJECTID) {
9128c2ecf20Sopenharmony_ci				ret = btrfs_dec_ref(trans, root, buf, 0);
9138c2ecf20Sopenharmony_ci				if (ret)
9148c2ecf20Sopenharmony_ci					return ret;
9158c2ecf20Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 1);
9168c2ecf20Sopenharmony_ci				if (ret)
9178c2ecf20Sopenharmony_ci					return ret;
9188c2ecf20Sopenharmony_ci			}
9198c2ecf20Sopenharmony_ci			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
9208c2ecf20Sopenharmony_ci		} else {
9218c2ecf20Sopenharmony_ci
9228c2ecf20Sopenharmony_ci			if (root->root_key.objectid ==
9238c2ecf20Sopenharmony_ci			    BTRFS_TREE_RELOC_OBJECTID)
9248c2ecf20Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 1);
9258c2ecf20Sopenharmony_ci			else
9268c2ecf20Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 0);
9278c2ecf20Sopenharmony_ci			if (ret)
9288c2ecf20Sopenharmony_ci				return ret;
9298c2ecf20Sopenharmony_ci		}
9308c2ecf20Sopenharmony_ci		if (new_flags != 0) {
9318c2ecf20Sopenharmony_ci			int level = btrfs_header_level(buf);
9328c2ecf20Sopenharmony_ci
9338c2ecf20Sopenharmony_ci			ret = btrfs_set_disk_extent_flags(trans, buf,
9348c2ecf20Sopenharmony_ci							  new_flags, level, 0);
9358c2ecf20Sopenharmony_ci			if (ret)
9368c2ecf20Sopenharmony_ci				return ret;
9378c2ecf20Sopenharmony_ci		}
9388c2ecf20Sopenharmony_ci	} else {
9398c2ecf20Sopenharmony_ci		if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
9408c2ecf20Sopenharmony_ci			if (root->root_key.objectid ==
9418c2ecf20Sopenharmony_ci			    BTRFS_TREE_RELOC_OBJECTID)
9428c2ecf20Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 1);
9438c2ecf20Sopenharmony_ci			else
9448c2ecf20Sopenharmony_ci				ret = btrfs_inc_ref(trans, root, cow, 0);
9458c2ecf20Sopenharmony_ci			if (ret)
9468c2ecf20Sopenharmony_ci				return ret;
9478c2ecf20Sopenharmony_ci			ret = btrfs_dec_ref(trans, root, buf, 1);
9488c2ecf20Sopenharmony_ci			if (ret)
9498c2ecf20Sopenharmony_ci				return ret;
9508c2ecf20Sopenharmony_ci		}
9518c2ecf20Sopenharmony_ci		btrfs_clean_tree_block(buf);
9528c2ecf20Sopenharmony_ci		*last_ref = 1;
9538c2ecf20Sopenharmony_ci	}
9548c2ecf20Sopenharmony_ci	return 0;
9558c2ecf20Sopenharmony_ci}
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_cistatic struct extent_buffer *alloc_tree_block_no_bg_flush(
9588c2ecf20Sopenharmony_ci					  struct btrfs_trans_handle *trans,
9598c2ecf20Sopenharmony_ci					  struct btrfs_root *root,
9608c2ecf20Sopenharmony_ci					  u64 parent_start,
9618c2ecf20Sopenharmony_ci					  const struct btrfs_disk_key *disk_key,
9628c2ecf20Sopenharmony_ci					  int level,
9638c2ecf20Sopenharmony_ci					  u64 hint,
9648c2ecf20Sopenharmony_ci					  u64 empty_size,
9658c2ecf20Sopenharmony_ci					  enum btrfs_lock_nesting nest)
9668c2ecf20Sopenharmony_ci{
9678c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
9688c2ecf20Sopenharmony_ci	struct extent_buffer *ret;
9698c2ecf20Sopenharmony_ci
9708c2ecf20Sopenharmony_ci	/*
9718c2ecf20Sopenharmony_ci	 * If we are COWing a node/leaf from the extent, chunk, device or free
9728c2ecf20Sopenharmony_ci	 * space trees, make sure that we do not finish block group creation of
9738c2ecf20Sopenharmony_ci	 * pending block groups. We do this to avoid a deadlock.
9748c2ecf20Sopenharmony_ci	 * COWing can result in allocation of a new chunk, and flushing pending
9758c2ecf20Sopenharmony_ci	 * block groups (btrfs_create_pending_block_groups()) can be triggered
9768c2ecf20Sopenharmony_ci	 * when finishing allocation of a new chunk. Creation of a pending block
9778c2ecf20Sopenharmony_ci	 * group modifies the extent, chunk, device and free space trees,
9788c2ecf20Sopenharmony_ci	 * therefore we could deadlock with ourselves since we are holding a
9798c2ecf20Sopenharmony_ci	 * lock on an extent buffer that btrfs_create_pending_block_groups() may
9808c2ecf20Sopenharmony_ci	 * try to COW later.
9818c2ecf20Sopenharmony_ci	 * For similar reasons, we also need to delay flushing pending block
9828c2ecf20Sopenharmony_ci	 * groups when splitting a leaf or node, from one of those trees, since
9838c2ecf20Sopenharmony_ci	 * we are holding a write lock on it and its parent or when inserting a
9848c2ecf20Sopenharmony_ci	 * new root node for one of those trees.
9858c2ecf20Sopenharmony_ci	 */
9868c2ecf20Sopenharmony_ci	if (root == fs_info->extent_root ||
9878c2ecf20Sopenharmony_ci	    root == fs_info->chunk_root ||
9888c2ecf20Sopenharmony_ci	    root == fs_info->dev_root ||
9898c2ecf20Sopenharmony_ci	    root == fs_info->free_space_root)
9908c2ecf20Sopenharmony_ci		trans->can_flush_pending_bgs = false;
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci	ret = btrfs_alloc_tree_block(trans, root, parent_start,
9938c2ecf20Sopenharmony_ci				     root->root_key.objectid, disk_key, level,
9948c2ecf20Sopenharmony_ci				     hint, empty_size, nest);
9958c2ecf20Sopenharmony_ci	trans->can_flush_pending_bgs = true;
9968c2ecf20Sopenharmony_ci
9978c2ecf20Sopenharmony_ci	return ret;
9988c2ecf20Sopenharmony_ci}
9998c2ecf20Sopenharmony_ci
10008c2ecf20Sopenharmony_ci/*
10018c2ecf20Sopenharmony_ci * does the dirty work in cow of a single block.  The parent block (if
10028c2ecf20Sopenharmony_ci * supplied) is updated to point to the new cow copy.  The new buffer is marked
10038c2ecf20Sopenharmony_ci * dirty and returned locked.  If you modify the block it needs to be marked
10048c2ecf20Sopenharmony_ci * dirty again.
10058c2ecf20Sopenharmony_ci *
10068c2ecf20Sopenharmony_ci * search_start -- an allocation hint for the new block
10078c2ecf20Sopenharmony_ci *
10088c2ecf20Sopenharmony_ci * empty_size -- a hint that you plan on doing more cow.  This is the size in
10098c2ecf20Sopenharmony_ci * bytes the allocator should try to find free next to the block it returns.
10108c2ecf20Sopenharmony_ci * This is just a hint and may be ignored by the allocator.
10118c2ecf20Sopenharmony_ci */
10128c2ecf20Sopenharmony_cistatic noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
10138c2ecf20Sopenharmony_ci			     struct btrfs_root *root,
10148c2ecf20Sopenharmony_ci			     struct extent_buffer *buf,
10158c2ecf20Sopenharmony_ci			     struct extent_buffer *parent, int parent_slot,
10168c2ecf20Sopenharmony_ci			     struct extent_buffer **cow_ret,
10178c2ecf20Sopenharmony_ci			     u64 search_start, u64 empty_size,
10188c2ecf20Sopenharmony_ci			     enum btrfs_lock_nesting nest)
10198c2ecf20Sopenharmony_ci{
10208c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
10218c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
10228c2ecf20Sopenharmony_ci	struct extent_buffer *cow;
10238c2ecf20Sopenharmony_ci	int level, ret;
10248c2ecf20Sopenharmony_ci	int last_ref = 0;
10258c2ecf20Sopenharmony_ci	int unlock_orig = 0;
10268c2ecf20Sopenharmony_ci	u64 parent_start = 0;
10278c2ecf20Sopenharmony_ci
10288c2ecf20Sopenharmony_ci	if (*cow_ret == buf)
10298c2ecf20Sopenharmony_ci		unlock_orig = 1;
10308c2ecf20Sopenharmony_ci
10318c2ecf20Sopenharmony_ci	btrfs_assert_tree_locked(buf);
10328c2ecf20Sopenharmony_ci
10338c2ecf20Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
10348c2ecf20Sopenharmony_ci		trans->transid != fs_info->running_transaction->transid);
10358c2ecf20Sopenharmony_ci	WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
10368c2ecf20Sopenharmony_ci		trans->transid != root->last_trans);
10378c2ecf20Sopenharmony_ci
10388c2ecf20Sopenharmony_ci	level = btrfs_header_level(buf);
10398c2ecf20Sopenharmony_ci
10408c2ecf20Sopenharmony_ci	if (level == 0)
10418c2ecf20Sopenharmony_ci		btrfs_item_key(buf, &disk_key, 0);
10428c2ecf20Sopenharmony_ci	else
10438c2ecf20Sopenharmony_ci		btrfs_node_key(buf, &disk_key, 0);
10448c2ecf20Sopenharmony_ci
10458c2ecf20Sopenharmony_ci	if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
10468c2ecf20Sopenharmony_ci		parent_start = parent->start;
10478c2ecf20Sopenharmony_ci
10488c2ecf20Sopenharmony_ci	cow = alloc_tree_block_no_bg_flush(trans, root, parent_start, &disk_key,
10498c2ecf20Sopenharmony_ci					   level, search_start, empty_size, nest);
10508c2ecf20Sopenharmony_ci	if (IS_ERR(cow))
10518c2ecf20Sopenharmony_ci		return PTR_ERR(cow);
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	/* cow is set to blocking by btrfs_init_new_buffer */
10548c2ecf20Sopenharmony_ci
10558c2ecf20Sopenharmony_ci	copy_extent_buffer_full(cow, buf);
10568c2ecf20Sopenharmony_ci	btrfs_set_header_bytenr(cow, cow->start);
10578c2ecf20Sopenharmony_ci	btrfs_set_header_generation(cow, trans->transid);
10588c2ecf20Sopenharmony_ci	btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
10598c2ecf20Sopenharmony_ci	btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
10608c2ecf20Sopenharmony_ci				     BTRFS_HEADER_FLAG_RELOC);
10618c2ecf20Sopenharmony_ci	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
10628c2ecf20Sopenharmony_ci		btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
10638c2ecf20Sopenharmony_ci	else
10648c2ecf20Sopenharmony_ci		btrfs_set_header_owner(cow, root->root_key.objectid);
10658c2ecf20Sopenharmony_ci
10668c2ecf20Sopenharmony_ci	write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
10678c2ecf20Sopenharmony_ci
10688c2ecf20Sopenharmony_ci	ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
10698c2ecf20Sopenharmony_ci	if (ret) {
10708c2ecf20Sopenharmony_ci		btrfs_tree_unlock(cow);
10718c2ecf20Sopenharmony_ci		free_extent_buffer(cow);
10728c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
10738c2ecf20Sopenharmony_ci		return ret;
10748c2ecf20Sopenharmony_ci	}
10758c2ecf20Sopenharmony_ci
10768c2ecf20Sopenharmony_ci	if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
10778c2ecf20Sopenharmony_ci		ret = btrfs_reloc_cow_block(trans, root, buf, cow);
10788c2ecf20Sopenharmony_ci		if (ret) {
10798c2ecf20Sopenharmony_ci			btrfs_tree_unlock(cow);
10808c2ecf20Sopenharmony_ci			free_extent_buffer(cow);
10818c2ecf20Sopenharmony_ci			btrfs_abort_transaction(trans, ret);
10828c2ecf20Sopenharmony_ci			return ret;
10838c2ecf20Sopenharmony_ci		}
10848c2ecf20Sopenharmony_ci	}
10858c2ecf20Sopenharmony_ci
10868c2ecf20Sopenharmony_ci	if (buf == root->node) {
10878c2ecf20Sopenharmony_ci		WARN_ON(parent && parent != buf);
10888c2ecf20Sopenharmony_ci		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
10898c2ecf20Sopenharmony_ci		    btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
10908c2ecf20Sopenharmony_ci			parent_start = buf->start;
10918c2ecf20Sopenharmony_ci
10928c2ecf20Sopenharmony_ci		atomic_inc(&cow->refs);
10938c2ecf20Sopenharmony_ci		ret = tree_mod_log_insert_root(root->node, cow, 1);
10948c2ecf20Sopenharmony_ci		BUG_ON(ret < 0);
10958c2ecf20Sopenharmony_ci		rcu_assign_pointer(root->node, cow);
10968c2ecf20Sopenharmony_ci
10978c2ecf20Sopenharmony_ci		btrfs_free_tree_block(trans, root, buf, parent_start,
10988c2ecf20Sopenharmony_ci				      last_ref);
10998c2ecf20Sopenharmony_ci		free_extent_buffer(buf);
11008c2ecf20Sopenharmony_ci		add_root_to_dirty_list(root);
11018c2ecf20Sopenharmony_ci	} else {
11028c2ecf20Sopenharmony_ci		WARN_ON(trans->transid != btrfs_header_generation(parent));
11038c2ecf20Sopenharmony_ci		tree_mod_log_insert_key(parent, parent_slot,
11048c2ecf20Sopenharmony_ci					MOD_LOG_KEY_REPLACE, GFP_NOFS);
11058c2ecf20Sopenharmony_ci		btrfs_set_node_blockptr(parent, parent_slot,
11068c2ecf20Sopenharmony_ci					cow->start);
11078c2ecf20Sopenharmony_ci		btrfs_set_node_ptr_generation(parent, parent_slot,
11088c2ecf20Sopenharmony_ci					      trans->transid);
11098c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(parent);
11108c2ecf20Sopenharmony_ci		if (last_ref) {
11118c2ecf20Sopenharmony_ci			ret = tree_mod_log_free_eb(buf);
11128c2ecf20Sopenharmony_ci			if (ret) {
11138c2ecf20Sopenharmony_ci				btrfs_tree_unlock(cow);
11148c2ecf20Sopenharmony_ci				free_extent_buffer(cow);
11158c2ecf20Sopenharmony_ci				btrfs_abort_transaction(trans, ret);
11168c2ecf20Sopenharmony_ci				return ret;
11178c2ecf20Sopenharmony_ci			}
11188c2ecf20Sopenharmony_ci		}
11198c2ecf20Sopenharmony_ci		btrfs_free_tree_block(trans, root, buf, parent_start,
11208c2ecf20Sopenharmony_ci				      last_ref);
11218c2ecf20Sopenharmony_ci	}
11228c2ecf20Sopenharmony_ci	if (unlock_orig)
11238c2ecf20Sopenharmony_ci		btrfs_tree_unlock(buf);
11248c2ecf20Sopenharmony_ci	free_extent_buffer_stale(buf);
11258c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(cow);
11268c2ecf20Sopenharmony_ci	*cow_ret = cow;
11278c2ecf20Sopenharmony_ci	return 0;
11288c2ecf20Sopenharmony_ci}
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci/*
11318c2ecf20Sopenharmony_ci * returns the logical address of the oldest predecessor of the given root.
11328c2ecf20Sopenharmony_ci * entries older than time_seq are ignored.
11338c2ecf20Sopenharmony_ci */
11348c2ecf20Sopenharmony_cistatic struct tree_mod_elem *__tree_mod_log_oldest_root(
11358c2ecf20Sopenharmony_ci		struct extent_buffer *eb_root, u64 time_seq)
11368c2ecf20Sopenharmony_ci{
11378c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
11388c2ecf20Sopenharmony_ci	struct tree_mod_elem *found = NULL;
11398c2ecf20Sopenharmony_ci	u64 root_logical = eb_root->start;
11408c2ecf20Sopenharmony_ci	int looped = 0;
11418c2ecf20Sopenharmony_ci
11428c2ecf20Sopenharmony_ci	if (!time_seq)
11438c2ecf20Sopenharmony_ci		return NULL;
11448c2ecf20Sopenharmony_ci
11458c2ecf20Sopenharmony_ci	/*
11468c2ecf20Sopenharmony_ci	 * the very last operation that's logged for a root is the
11478c2ecf20Sopenharmony_ci	 * replacement operation (if it is replaced at all). this has
11488c2ecf20Sopenharmony_ci	 * the logical address of the *new* root, making it the very
11498c2ecf20Sopenharmony_ci	 * first operation that's logged for this root.
11508c2ecf20Sopenharmony_ci	 */
11518c2ecf20Sopenharmony_ci	while (1) {
11528c2ecf20Sopenharmony_ci		tm = tree_mod_log_search_oldest(eb_root->fs_info, root_logical,
11538c2ecf20Sopenharmony_ci						time_seq);
11548c2ecf20Sopenharmony_ci		if (!looped && !tm)
11558c2ecf20Sopenharmony_ci			return NULL;
11568c2ecf20Sopenharmony_ci		/*
11578c2ecf20Sopenharmony_ci		 * if there are no tree operation for the oldest root, we simply
11588c2ecf20Sopenharmony_ci		 * return it. this should only happen if that (old) root is at
11598c2ecf20Sopenharmony_ci		 * level 0.
11608c2ecf20Sopenharmony_ci		 */
11618c2ecf20Sopenharmony_ci		if (!tm)
11628c2ecf20Sopenharmony_ci			break;
11638c2ecf20Sopenharmony_ci
11648c2ecf20Sopenharmony_ci		/*
11658c2ecf20Sopenharmony_ci		 * if there's an operation that's not a root replacement, we
11668c2ecf20Sopenharmony_ci		 * found the oldest version of our root. normally, we'll find a
11678c2ecf20Sopenharmony_ci		 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
11688c2ecf20Sopenharmony_ci		 */
11698c2ecf20Sopenharmony_ci		if (tm->op != MOD_LOG_ROOT_REPLACE)
11708c2ecf20Sopenharmony_ci			break;
11718c2ecf20Sopenharmony_ci
11728c2ecf20Sopenharmony_ci		found = tm;
11738c2ecf20Sopenharmony_ci		root_logical = tm->old_root.logical;
11748c2ecf20Sopenharmony_ci		looped = 1;
11758c2ecf20Sopenharmony_ci	}
11768c2ecf20Sopenharmony_ci
11778c2ecf20Sopenharmony_ci	/* if there's no old root to return, return what we found instead */
11788c2ecf20Sopenharmony_ci	if (!found)
11798c2ecf20Sopenharmony_ci		found = tm;
11808c2ecf20Sopenharmony_ci
11818c2ecf20Sopenharmony_ci	return found;
11828c2ecf20Sopenharmony_ci}
11838c2ecf20Sopenharmony_ci
11848c2ecf20Sopenharmony_ci/*
11858c2ecf20Sopenharmony_ci * tm is a pointer to the first operation to rewind within eb. then, all
11868c2ecf20Sopenharmony_ci * previous operations will be rewound (until we reach something older than
11878c2ecf20Sopenharmony_ci * time_seq).
11888c2ecf20Sopenharmony_ci */
11898c2ecf20Sopenharmony_cistatic void
11908c2ecf20Sopenharmony_ci__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
11918c2ecf20Sopenharmony_ci		      u64 time_seq, struct tree_mod_elem *first_tm)
11928c2ecf20Sopenharmony_ci{
11938c2ecf20Sopenharmony_ci	u32 n;
11948c2ecf20Sopenharmony_ci	struct rb_node *next;
11958c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm = first_tm;
11968c2ecf20Sopenharmony_ci	unsigned long o_dst;
11978c2ecf20Sopenharmony_ci	unsigned long o_src;
11988c2ecf20Sopenharmony_ci	unsigned long p_size = sizeof(struct btrfs_key_ptr);
11998c2ecf20Sopenharmony_ci
12008c2ecf20Sopenharmony_ci	n = btrfs_header_nritems(eb);
12018c2ecf20Sopenharmony_ci	read_lock(&fs_info->tree_mod_log_lock);
12028c2ecf20Sopenharmony_ci	while (tm && tm->seq >= time_seq) {
12038c2ecf20Sopenharmony_ci		/*
12048c2ecf20Sopenharmony_ci		 * all the operations are recorded with the operator used for
12058c2ecf20Sopenharmony_ci		 * the modification. as we're going backwards, we do the
12068c2ecf20Sopenharmony_ci		 * opposite of each operation here.
12078c2ecf20Sopenharmony_ci		 */
12088c2ecf20Sopenharmony_ci		switch (tm->op) {
12098c2ecf20Sopenharmony_ci		case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
12108c2ecf20Sopenharmony_ci			BUG_ON(tm->slot < n);
12118c2ecf20Sopenharmony_ci			fallthrough;
12128c2ecf20Sopenharmony_ci		case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
12138c2ecf20Sopenharmony_ci		case MOD_LOG_KEY_REMOVE:
12148c2ecf20Sopenharmony_ci			btrfs_set_node_key(eb, &tm->key, tm->slot);
12158c2ecf20Sopenharmony_ci			btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
12168c2ecf20Sopenharmony_ci			btrfs_set_node_ptr_generation(eb, tm->slot,
12178c2ecf20Sopenharmony_ci						      tm->generation);
12188c2ecf20Sopenharmony_ci			n++;
12198c2ecf20Sopenharmony_ci			break;
12208c2ecf20Sopenharmony_ci		case MOD_LOG_KEY_REPLACE:
12218c2ecf20Sopenharmony_ci			BUG_ON(tm->slot >= n);
12228c2ecf20Sopenharmony_ci			btrfs_set_node_key(eb, &tm->key, tm->slot);
12238c2ecf20Sopenharmony_ci			btrfs_set_node_blockptr(eb, tm->slot, tm->blockptr);
12248c2ecf20Sopenharmony_ci			btrfs_set_node_ptr_generation(eb, tm->slot,
12258c2ecf20Sopenharmony_ci						      tm->generation);
12268c2ecf20Sopenharmony_ci			break;
12278c2ecf20Sopenharmony_ci		case MOD_LOG_KEY_ADD:
12288c2ecf20Sopenharmony_ci			/* if a move operation is needed it's in the log */
12298c2ecf20Sopenharmony_ci			n--;
12308c2ecf20Sopenharmony_ci			break;
12318c2ecf20Sopenharmony_ci		case MOD_LOG_MOVE_KEYS:
12328c2ecf20Sopenharmony_ci			o_dst = btrfs_node_key_ptr_offset(tm->slot);
12338c2ecf20Sopenharmony_ci			o_src = btrfs_node_key_ptr_offset(tm->move.dst_slot);
12348c2ecf20Sopenharmony_ci			memmove_extent_buffer(eb, o_dst, o_src,
12358c2ecf20Sopenharmony_ci					      tm->move.nr_items * p_size);
12368c2ecf20Sopenharmony_ci			break;
12378c2ecf20Sopenharmony_ci		case MOD_LOG_ROOT_REPLACE:
12388c2ecf20Sopenharmony_ci			/*
12398c2ecf20Sopenharmony_ci			 * this operation is special. for roots, this must be
12408c2ecf20Sopenharmony_ci			 * handled explicitly before rewinding.
12418c2ecf20Sopenharmony_ci			 * for non-roots, this operation may exist if the node
12428c2ecf20Sopenharmony_ci			 * was a root: root A -> child B; then A gets empty and
12438c2ecf20Sopenharmony_ci			 * B is promoted to the new root. in the mod log, we'll
12448c2ecf20Sopenharmony_ci			 * have a root-replace operation for B, a tree block
12458c2ecf20Sopenharmony_ci			 * that is no root. we simply ignore that operation.
12468c2ecf20Sopenharmony_ci			 */
12478c2ecf20Sopenharmony_ci			break;
12488c2ecf20Sopenharmony_ci		}
12498c2ecf20Sopenharmony_ci		next = rb_next(&tm->node);
12508c2ecf20Sopenharmony_ci		if (!next)
12518c2ecf20Sopenharmony_ci			break;
12528c2ecf20Sopenharmony_ci		tm = rb_entry(next, struct tree_mod_elem, node);
12538c2ecf20Sopenharmony_ci		if (tm->logical != first_tm->logical)
12548c2ecf20Sopenharmony_ci			break;
12558c2ecf20Sopenharmony_ci	}
12568c2ecf20Sopenharmony_ci	read_unlock(&fs_info->tree_mod_log_lock);
12578c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(eb, n);
12588c2ecf20Sopenharmony_ci}
12598c2ecf20Sopenharmony_ci
12608c2ecf20Sopenharmony_ci/*
12618c2ecf20Sopenharmony_ci * Called with eb read locked. If the buffer cannot be rewound, the same buffer
12628c2ecf20Sopenharmony_ci * is returned. If rewind operations happen, a fresh buffer is returned. The
12638c2ecf20Sopenharmony_ci * returned buffer is always read-locked. If the returned buffer is not the
12648c2ecf20Sopenharmony_ci * input buffer, the lock on the input buffer is released and the input buffer
12658c2ecf20Sopenharmony_ci * is freed (its refcount is decremented).
12668c2ecf20Sopenharmony_ci */
12678c2ecf20Sopenharmony_cistatic struct extent_buffer *
12688c2ecf20Sopenharmony_citree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
12698c2ecf20Sopenharmony_ci		    struct extent_buffer *eb, u64 time_seq)
12708c2ecf20Sopenharmony_ci{
12718c2ecf20Sopenharmony_ci	struct extent_buffer *eb_rewin;
12728c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
12738c2ecf20Sopenharmony_ci
12748c2ecf20Sopenharmony_ci	if (!time_seq)
12758c2ecf20Sopenharmony_ci		return eb;
12768c2ecf20Sopenharmony_ci
12778c2ecf20Sopenharmony_ci	if (btrfs_header_level(eb) == 0)
12788c2ecf20Sopenharmony_ci		return eb;
12798c2ecf20Sopenharmony_ci
12808c2ecf20Sopenharmony_ci	tm = tree_mod_log_search(fs_info, eb->start, time_seq);
12818c2ecf20Sopenharmony_ci	if (!tm)
12828c2ecf20Sopenharmony_ci		return eb;
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci	btrfs_set_path_blocking(path);
12858c2ecf20Sopenharmony_ci	btrfs_set_lock_blocking_read(eb);
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_ci	if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
12888c2ecf20Sopenharmony_ci		BUG_ON(tm->slot != 0);
12898c2ecf20Sopenharmony_ci		eb_rewin = alloc_dummy_extent_buffer(fs_info, eb->start);
12908c2ecf20Sopenharmony_ci		if (!eb_rewin) {
12918c2ecf20Sopenharmony_ci			btrfs_tree_read_unlock_blocking(eb);
12928c2ecf20Sopenharmony_ci			free_extent_buffer(eb);
12938c2ecf20Sopenharmony_ci			return NULL;
12948c2ecf20Sopenharmony_ci		}
12958c2ecf20Sopenharmony_ci		btrfs_set_header_bytenr(eb_rewin, eb->start);
12968c2ecf20Sopenharmony_ci		btrfs_set_header_backref_rev(eb_rewin,
12978c2ecf20Sopenharmony_ci					     btrfs_header_backref_rev(eb));
12988c2ecf20Sopenharmony_ci		btrfs_set_header_owner(eb_rewin, btrfs_header_owner(eb));
12998c2ecf20Sopenharmony_ci		btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
13008c2ecf20Sopenharmony_ci	} else {
13018c2ecf20Sopenharmony_ci		eb_rewin = btrfs_clone_extent_buffer(eb);
13028c2ecf20Sopenharmony_ci		if (!eb_rewin) {
13038c2ecf20Sopenharmony_ci			btrfs_tree_read_unlock_blocking(eb);
13048c2ecf20Sopenharmony_ci			free_extent_buffer(eb);
13058c2ecf20Sopenharmony_ci			return NULL;
13068c2ecf20Sopenharmony_ci		}
13078c2ecf20Sopenharmony_ci	}
13088c2ecf20Sopenharmony_ci
13098c2ecf20Sopenharmony_ci	btrfs_tree_read_unlock_blocking(eb);
13108c2ecf20Sopenharmony_ci	free_extent_buffer(eb);
13118c2ecf20Sopenharmony_ci
13128c2ecf20Sopenharmony_ci	btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb_rewin),
13138c2ecf20Sopenharmony_ci				       eb_rewin, btrfs_header_level(eb_rewin));
13148c2ecf20Sopenharmony_ci	btrfs_tree_read_lock(eb_rewin);
13158c2ecf20Sopenharmony_ci	__tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
13168c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_nritems(eb_rewin) >
13178c2ecf20Sopenharmony_ci		BTRFS_NODEPTRS_PER_BLOCK(fs_info));
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_ci	return eb_rewin;
13208c2ecf20Sopenharmony_ci}
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci/*
13238c2ecf20Sopenharmony_ci * get_old_root() rewinds the state of @root's root node to the given @time_seq
13248c2ecf20Sopenharmony_ci * value. If there are no changes, the current root->root_node is returned. If
13258c2ecf20Sopenharmony_ci * anything changed in between, there's a fresh buffer allocated on which the
13268c2ecf20Sopenharmony_ci * rewind operations are done. In any case, the returned buffer is read locked.
13278c2ecf20Sopenharmony_ci * Returns NULL on error (with no locks held).
13288c2ecf20Sopenharmony_ci */
13298c2ecf20Sopenharmony_cistatic inline struct extent_buffer *
13308c2ecf20Sopenharmony_ciget_old_root(struct btrfs_root *root, u64 time_seq)
13318c2ecf20Sopenharmony_ci{
13328c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
13338c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
13348c2ecf20Sopenharmony_ci	struct extent_buffer *eb = NULL;
13358c2ecf20Sopenharmony_ci	struct extent_buffer *eb_root;
13368c2ecf20Sopenharmony_ci	u64 eb_root_owner = 0;
13378c2ecf20Sopenharmony_ci	struct extent_buffer *old;
13388c2ecf20Sopenharmony_ci	struct tree_mod_root *old_root = NULL;
13398c2ecf20Sopenharmony_ci	u64 old_generation = 0;
13408c2ecf20Sopenharmony_ci	u64 logical;
13418c2ecf20Sopenharmony_ci	int level;
13428c2ecf20Sopenharmony_ci
13438c2ecf20Sopenharmony_ci	eb_root = btrfs_read_lock_root_node(root);
13448c2ecf20Sopenharmony_ci	tm = __tree_mod_log_oldest_root(eb_root, time_seq);
13458c2ecf20Sopenharmony_ci	if (!tm)
13468c2ecf20Sopenharmony_ci		return eb_root;
13478c2ecf20Sopenharmony_ci
13488c2ecf20Sopenharmony_ci	if (tm->op == MOD_LOG_ROOT_REPLACE) {
13498c2ecf20Sopenharmony_ci		old_root = &tm->old_root;
13508c2ecf20Sopenharmony_ci		old_generation = tm->generation;
13518c2ecf20Sopenharmony_ci		logical = old_root->logical;
13528c2ecf20Sopenharmony_ci		level = old_root->level;
13538c2ecf20Sopenharmony_ci	} else {
13548c2ecf20Sopenharmony_ci		logical = eb_root->start;
13558c2ecf20Sopenharmony_ci		level = btrfs_header_level(eb_root);
13568c2ecf20Sopenharmony_ci	}
13578c2ecf20Sopenharmony_ci
13588c2ecf20Sopenharmony_ci	tm = tree_mod_log_search(fs_info, logical, time_seq);
13598c2ecf20Sopenharmony_ci	if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
13608c2ecf20Sopenharmony_ci		btrfs_tree_read_unlock(eb_root);
13618c2ecf20Sopenharmony_ci		free_extent_buffer(eb_root);
13628c2ecf20Sopenharmony_ci		old = read_tree_block(fs_info, logical, 0, level, NULL);
13638c2ecf20Sopenharmony_ci		if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) {
13648c2ecf20Sopenharmony_ci			if (!IS_ERR(old))
13658c2ecf20Sopenharmony_ci				free_extent_buffer(old);
13668c2ecf20Sopenharmony_ci			btrfs_warn(fs_info,
13678c2ecf20Sopenharmony_ci				   "failed to read tree block %llu from get_old_root",
13688c2ecf20Sopenharmony_ci				   logical);
13698c2ecf20Sopenharmony_ci		} else {
13708c2ecf20Sopenharmony_ci			struct tree_mod_elem *tm2;
13718c2ecf20Sopenharmony_ci
13728c2ecf20Sopenharmony_ci			btrfs_tree_read_lock(old);
13738c2ecf20Sopenharmony_ci			eb = btrfs_clone_extent_buffer(old);
13748c2ecf20Sopenharmony_ci			/*
13758c2ecf20Sopenharmony_ci			 * After the lookup for the most recent tree mod operation
13768c2ecf20Sopenharmony_ci			 * above and before we locked and cloned the extent buffer
13778c2ecf20Sopenharmony_ci			 * 'old', a new tree mod log operation may have been added.
13788c2ecf20Sopenharmony_ci			 * So lookup for a more recent one to make sure the number
13798c2ecf20Sopenharmony_ci			 * of mod log operations we replay is consistent with the
13808c2ecf20Sopenharmony_ci			 * number of items we have in the cloned extent buffer,
13818c2ecf20Sopenharmony_ci			 * otherwise we can hit a BUG_ON when rewinding the extent
13828c2ecf20Sopenharmony_ci			 * buffer.
13838c2ecf20Sopenharmony_ci			 */
13848c2ecf20Sopenharmony_ci			tm2 = tree_mod_log_search(fs_info, logical, time_seq);
13858c2ecf20Sopenharmony_ci			btrfs_tree_read_unlock(old);
13868c2ecf20Sopenharmony_ci			free_extent_buffer(old);
13878c2ecf20Sopenharmony_ci			ASSERT(tm2);
13888c2ecf20Sopenharmony_ci			ASSERT(tm2 == tm || tm2->seq > tm->seq);
13898c2ecf20Sopenharmony_ci			if (!tm2 || tm2->seq < tm->seq) {
13908c2ecf20Sopenharmony_ci				free_extent_buffer(eb);
13918c2ecf20Sopenharmony_ci				return NULL;
13928c2ecf20Sopenharmony_ci			}
13938c2ecf20Sopenharmony_ci			tm = tm2;
13948c2ecf20Sopenharmony_ci		}
13958c2ecf20Sopenharmony_ci	} else if (old_root) {
13968c2ecf20Sopenharmony_ci		eb_root_owner = btrfs_header_owner(eb_root);
13978c2ecf20Sopenharmony_ci		btrfs_tree_read_unlock(eb_root);
13988c2ecf20Sopenharmony_ci		free_extent_buffer(eb_root);
13998c2ecf20Sopenharmony_ci		eb = alloc_dummy_extent_buffer(fs_info, logical);
14008c2ecf20Sopenharmony_ci	} else {
14018c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_read(eb_root);
14028c2ecf20Sopenharmony_ci		eb = btrfs_clone_extent_buffer(eb_root);
14038c2ecf20Sopenharmony_ci		btrfs_tree_read_unlock_blocking(eb_root);
14048c2ecf20Sopenharmony_ci		free_extent_buffer(eb_root);
14058c2ecf20Sopenharmony_ci	}
14068c2ecf20Sopenharmony_ci
14078c2ecf20Sopenharmony_ci	if (!eb)
14088c2ecf20Sopenharmony_ci		return NULL;
14098c2ecf20Sopenharmony_ci	if (old_root) {
14108c2ecf20Sopenharmony_ci		btrfs_set_header_bytenr(eb, eb->start);
14118c2ecf20Sopenharmony_ci		btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
14128c2ecf20Sopenharmony_ci		btrfs_set_header_owner(eb, eb_root_owner);
14138c2ecf20Sopenharmony_ci		btrfs_set_header_level(eb, old_root->level);
14148c2ecf20Sopenharmony_ci		btrfs_set_header_generation(eb, old_generation);
14158c2ecf20Sopenharmony_ci	}
14168c2ecf20Sopenharmony_ci	btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), eb,
14178c2ecf20Sopenharmony_ci				       btrfs_header_level(eb));
14188c2ecf20Sopenharmony_ci	btrfs_tree_read_lock(eb);
14198c2ecf20Sopenharmony_ci	if (tm)
14208c2ecf20Sopenharmony_ci		__tree_mod_log_rewind(fs_info, eb, time_seq, tm);
14218c2ecf20Sopenharmony_ci	else
14228c2ecf20Sopenharmony_ci		WARN_ON(btrfs_header_level(eb) != 0);
14238c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(fs_info));
14248c2ecf20Sopenharmony_ci
14258c2ecf20Sopenharmony_ci	return eb;
14268c2ecf20Sopenharmony_ci}
14278c2ecf20Sopenharmony_ci
14288c2ecf20Sopenharmony_ciint btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
14298c2ecf20Sopenharmony_ci{
14308c2ecf20Sopenharmony_ci	struct tree_mod_elem *tm;
14318c2ecf20Sopenharmony_ci	int level;
14328c2ecf20Sopenharmony_ci	struct extent_buffer *eb_root = btrfs_root_node(root);
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ci	tm = __tree_mod_log_oldest_root(eb_root, time_seq);
14358c2ecf20Sopenharmony_ci	if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
14368c2ecf20Sopenharmony_ci		level = tm->old_root.level;
14378c2ecf20Sopenharmony_ci	} else {
14388c2ecf20Sopenharmony_ci		level = btrfs_header_level(eb_root);
14398c2ecf20Sopenharmony_ci	}
14408c2ecf20Sopenharmony_ci	free_extent_buffer(eb_root);
14418c2ecf20Sopenharmony_ci
14428c2ecf20Sopenharmony_ci	return level;
14438c2ecf20Sopenharmony_ci}
14448c2ecf20Sopenharmony_ci
14458c2ecf20Sopenharmony_cistatic inline int should_cow_block(struct btrfs_trans_handle *trans,
14468c2ecf20Sopenharmony_ci				   struct btrfs_root *root,
14478c2ecf20Sopenharmony_ci				   struct extent_buffer *buf)
14488c2ecf20Sopenharmony_ci{
14498c2ecf20Sopenharmony_ci	if (btrfs_is_testing(root->fs_info))
14508c2ecf20Sopenharmony_ci		return 0;
14518c2ecf20Sopenharmony_ci
14528c2ecf20Sopenharmony_ci	/* Ensure we can see the FORCE_COW bit */
14538c2ecf20Sopenharmony_ci	smp_mb__before_atomic();
14548c2ecf20Sopenharmony_ci
14558c2ecf20Sopenharmony_ci	/*
14568c2ecf20Sopenharmony_ci	 * We do not need to cow a block if
14578c2ecf20Sopenharmony_ci	 * 1) this block is not created or changed in this transaction;
14588c2ecf20Sopenharmony_ci	 * 2) this block does not belong to TREE_RELOC tree;
14598c2ecf20Sopenharmony_ci	 * 3) the root is not forced COW.
14608c2ecf20Sopenharmony_ci	 *
14618c2ecf20Sopenharmony_ci	 * What is forced COW:
14628c2ecf20Sopenharmony_ci	 *    when we create snapshot during committing the transaction,
14638c2ecf20Sopenharmony_ci	 *    after we've finished copying src root, we must COW the shared
14648c2ecf20Sopenharmony_ci	 *    block to ensure the metadata consistency.
14658c2ecf20Sopenharmony_ci	 */
14668c2ecf20Sopenharmony_ci	if (btrfs_header_generation(buf) == trans->transid &&
14678c2ecf20Sopenharmony_ci	    !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
14688c2ecf20Sopenharmony_ci	    !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
14698c2ecf20Sopenharmony_ci	      btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
14708c2ecf20Sopenharmony_ci	    !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
14718c2ecf20Sopenharmony_ci		return 0;
14728c2ecf20Sopenharmony_ci	return 1;
14738c2ecf20Sopenharmony_ci}
14748c2ecf20Sopenharmony_ci
14758c2ecf20Sopenharmony_ci/*
14768c2ecf20Sopenharmony_ci * cows a single block, see __btrfs_cow_block for the real work.
14778c2ecf20Sopenharmony_ci * This version of it has extra checks so that a block isn't COWed more than
14788c2ecf20Sopenharmony_ci * once per transaction, as long as it hasn't been written yet
14798c2ecf20Sopenharmony_ci */
14808c2ecf20Sopenharmony_cinoinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
14818c2ecf20Sopenharmony_ci		    struct btrfs_root *root, struct extent_buffer *buf,
14828c2ecf20Sopenharmony_ci		    struct extent_buffer *parent, int parent_slot,
14838c2ecf20Sopenharmony_ci		    struct extent_buffer **cow_ret,
14848c2ecf20Sopenharmony_ci		    enum btrfs_lock_nesting nest)
14858c2ecf20Sopenharmony_ci{
14868c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
14878c2ecf20Sopenharmony_ci	u64 search_start;
14888c2ecf20Sopenharmony_ci	int ret;
14898c2ecf20Sopenharmony_ci
14908c2ecf20Sopenharmony_ci	if (test_bit(BTRFS_ROOT_DELETING, &root->state))
14918c2ecf20Sopenharmony_ci		btrfs_err(fs_info,
14928c2ecf20Sopenharmony_ci			"COW'ing blocks on a fs root that's being dropped");
14938c2ecf20Sopenharmony_ci
14948c2ecf20Sopenharmony_ci	if (trans->transaction != fs_info->running_transaction)
14958c2ecf20Sopenharmony_ci		WARN(1, KERN_CRIT "trans %llu running %llu\n",
14968c2ecf20Sopenharmony_ci		       trans->transid,
14978c2ecf20Sopenharmony_ci		       fs_info->running_transaction->transid);
14988c2ecf20Sopenharmony_ci
14998c2ecf20Sopenharmony_ci	if (trans->transid != fs_info->generation)
15008c2ecf20Sopenharmony_ci		WARN(1, KERN_CRIT "trans %llu running %llu\n",
15018c2ecf20Sopenharmony_ci		       trans->transid, fs_info->generation);
15028c2ecf20Sopenharmony_ci
15038c2ecf20Sopenharmony_ci	if (!should_cow_block(trans, root, buf)) {
15048c2ecf20Sopenharmony_ci		trans->dirty = true;
15058c2ecf20Sopenharmony_ci		*cow_ret = buf;
15068c2ecf20Sopenharmony_ci		return 0;
15078c2ecf20Sopenharmony_ci	}
15088c2ecf20Sopenharmony_ci
15098c2ecf20Sopenharmony_ci	search_start = buf->start & ~((u64)SZ_1G - 1);
15108c2ecf20Sopenharmony_ci
15118c2ecf20Sopenharmony_ci	if (parent)
15128c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(parent);
15138c2ecf20Sopenharmony_ci	btrfs_set_lock_blocking_write(buf);
15148c2ecf20Sopenharmony_ci
15158c2ecf20Sopenharmony_ci	/*
15168c2ecf20Sopenharmony_ci	 * Before CoWing this block for later modification, check if it's
15178c2ecf20Sopenharmony_ci	 * the subtree root and do the delayed subtree trace if needed.
15188c2ecf20Sopenharmony_ci	 *
15198c2ecf20Sopenharmony_ci	 * Also We don't care about the error, as it's handled internally.
15208c2ecf20Sopenharmony_ci	 */
15218c2ecf20Sopenharmony_ci	btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
15228c2ecf20Sopenharmony_ci	ret = __btrfs_cow_block(trans, root, buf, parent,
15238c2ecf20Sopenharmony_ci				 parent_slot, cow_ret, search_start, 0, nest);
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_ci	trace_btrfs_cow_block(root, buf, *cow_ret);
15268c2ecf20Sopenharmony_ci
15278c2ecf20Sopenharmony_ci	return ret;
15288c2ecf20Sopenharmony_ci}
15298c2ecf20Sopenharmony_ci
15308c2ecf20Sopenharmony_ci/*
15318c2ecf20Sopenharmony_ci * helper function for defrag to decide if two blocks pointed to by a
15328c2ecf20Sopenharmony_ci * node are actually close by
15338c2ecf20Sopenharmony_ci */
15348c2ecf20Sopenharmony_cistatic int close_blocks(u64 blocknr, u64 other, u32 blocksize)
15358c2ecf20Sopenharmony_ci{
15368c2ecf20Sopenharmony_ci	if (blocknr < other && other - (blocknr + blocksize) < 32768)
15378c2ecf20Sopenharmony_ci		return 1;
15388c2ecf20Sopenharmony_ci	if (blocknr > other && blocknr - (other + blocksize) < 32768)
15398c2ecf20Sopenharmony_ci		return 1;
15408c2ecf20Sopenharmony_ci	return 0;
15418c2ecf20Sopenharmony_ci}
15428c2ecf20Sopenharmony_ci
15438c2ecf20Sopenharmony_ci#ifdef __LITTLE_ENDIAN
15448c2ecf20Sopenharmony_ci
15458c2ecf20Sopenharmony_ci/*
15468c2ecf20Sopenharmony_ci * Compare two keys, on little-endian the disk order is same as CPU order and
15478c2ecf20Sopenharmony_ci * we can avoid the conversion.
15488c2ecf20Sopenharmony_ci */
15498c2ecf20Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk_key,
15508c2ecf20Sopenharmony_ci		     const struct btrfs_key *k2)
15518c2ecf20Sopenharmony_ci{
15528c2ecf20Sopenharmony_ci	const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key;
15538c2ecf20Sopenharmony_ci
15548c2ecf20Sopenharmony_ci	return btrfs_comp_cpu_keys(k1, k2);
15558c2ecf20Sopenharmony_ci}
15568c2ecf20Sopenharmony_ci
15578c2ecf20Sopenharmony_ci#else
15588c2ecf20Sopenharmony_ci
15598c2ecf20Sopenharmony_ci/*
15608c2ecf20Sopenharmony_ci * compare two keys in a memcmp fashion
15618c2ecf20Sopenharmony_ci */
15628c2ecf20Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk,
15638c2ecf20Sopenharmony_ci		     const struct btrfs_key *k2)
15648c2ecf20Sopenharmony_ci{
15658c2ecf20Sopenharmony_ci	struct btrfs_key k1;
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci	btrfs_disk_key_to_cpu(&k1, disk);
15688c2ecf20Sopenharmony_ci
15698c2ecf20Sopenharmony_ci	return btrfs_comp_cpu_keys(&k1, k2);
15708c2ecf20Sopenharmony_ci}
15718c2ecf20Sopenharmony_ci#endif
15728c2ecf20Sopenharmony_ci
15738c2ecf20Sopenharmony_ci/*
15748c2ecf20Sopenharmony_ci * same as comp_keys only with two btrfs_key's
15758c2ecf20Sopenharmony_ci */
15768c2ecf20Sopenharmony_ciint __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
15778c2ecf20Sopenharmony_ci{
15788c2ecf20Sopenharmony_ci	if (k1->objectid > k2->objectid)
15798c2ecf20Sopenharmony_ci		return 1;
15808c2ecf20Sopenharmony_ci	if (k1->objectid < k2->objectid)
15818c2ecf20Sopenharmony_ci		return -1;
15828c2ecf20Sopenharmony_ci	if (k1->type > k2->type)
15838c2ecf20Sopenharmony_ci		return 1;
15848c2ecf20Sopenharmony_ci	if (k1->type < k2->type)
15858c2ecf20Sopenharmony_ci		return -1;
15868c2ecf20Sopenharmony_ci	if (k1->offset > k2->offset)
15878c2ecf20Sopenharmony_ci		return 1;
15888c2ecf20Sopenharmony_ci	if (k1->offset < k2->offset)
15898c2ecf20Sopenharmony_ci		return -1;
15908c2ecf20Sopenharmony_ci	return 0;
15918c2ecf20Sopenharmony_ci}
15928c2ecf20Sopenharmony_ci
15938c2ecf20Sopenharmony_ci/*
15948c2ecf20Sopenharmony_ci * this is used by the defrag code to go through all the
15958c2ecf20Sopenharmony_ci * leaves pointed to by a node and reallocate them so that
15968c2ecf20Sopenharmony_ci * disk order is close to key order
15978c2ecf20Sopenharmony_ci */
15988c2ecf20Sopenharmony_ciint btrfs_realloc_node(struct btrfs_trans_handle *trans,
15998c2ecf20Sopenharmony_ci		       struct btrfs_root *root, struct extent_buffer *parent,
16008c2ecf20Sopenharmony_ci		       int start_slot, u64 *last_ret,
16018c2ecf20Sopenharmony_ci		       struct btrfs_key *progress)
16028c2ecf20Sopenharmony_ci{
16038c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
16048c2ecf20Sopenharmony_ci	struct extent_buffer *cur;
16058c2ecf20Sopenharmony_ci	u64 blocknr;
16068c2ecf20Sopenharmony_ci	u64 gen;
16078c2ecf20Sopenharmony_ci	u64 search_start = *last_ret;
16088c2ecf20Sopenharmony_ci	u64 last_block = 0;
16098c2ecf20Sopenharmony_ci	u64 other;
16108c2ecf20Sopenharmony_ci	u32 parent_nritems;
16118c2ecf20Sopenharmony_ci	int end_slot;
16128c2ecf20Sopenharmony_ci	int i;
16138c2ecf20Sopenharmony_ci	int err = 0;
16148c2ecf20Sopenharmony_ci	int parent_level;
16158c2ecf20Sopenharmony_ci	int uptodate;
16168c2ecf20Sopenharmony_ci	u32 blocksize;
16178c2ecf20Sopenharmony_ci	int progress_passed = 0;
16188c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
16198c2ecf20Sopenharmony_ci
16208c2ecf20Sopenharmony_ci	parent_level = btrfs_header_level(parent);
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci	WARN_ON(trans->transaction != fs_info->running_transaction);
16238c2ecf20Sopenharmony_ci	WARN_ON(trans->transid != fs_info->generation);
16248c2ecf20Sopenharmony_ci
16258c2ecf20Sopenharmony_ci	parent_nritems = btrfs_header_nritems(parent);
16268c2ecf20Sopenharmony_ci	blocksize = fs_info->nodesize;
16278c2ecf20Sopenharmony_ci	end_slot = parent_nritems - 1;
16288c2ecf20Sopenharmony_ci
16298c2ecf20Sopenharmony_ci	if (parent_nritems <= 1)
16308c2ecf20Sopenharmony_ci		return 0;
16318c2ecf20Sopenharmony_ci
16328c2ecf20Sopenharmony_ci	btrfs_set_lock_blocking_write(parent);
16338c2ecf20Sopenharmony_ci
16348c2ecf20Sopenharmony_ci	for (i = start_slot; i <= end_slot; i++) {
16358c2ecf20Sopenharmony_ci		struct btrfs_key first_key;
16368c2ecf20Sopenharmony_ci		int close = 1;
16378c2ecf20Sopenharmony_ci
16388c2ecf20Sopenharmony_ci		btrfs_node_key(parent, &disk_key, i);
16398c2ecf20Sopenharmony_ci		if (!progress_passed && comp_keys(&disk_key, progress) < 0)
16408c2ecf20Sopenharmony_ci			continue;
16418c2ecf20Sopenharmony_ci
16428c2ecf20Sopenharmony_ci		progress_passed = 1;
16438c2ecf20Sopenharmony_ci		blocknr = btrfs_node_blockptr(parent, i);
16448c2ecf20Sopenharmony_ci		gen = btrfs_node_ptr_generation(parent, i);
16458c2ecf20Sopenharmony_ci		btrfs_node_key_to_cpu(parent, &first_key, i);
16468c2ecf20Sopenharmony_ci		if (last_block == 0)
16478c2ecf20Sopenharmony_ci			last_block = blocknr;
16488c2ecf20Sopenharmony_ci
16498c2ecf20Sopenharmony_ci		if (i > 0) {
16508c2ecf20Sopenharmony_ci			other = btrfs_node_blockptr(parent, i - 1);
16518c2ecf20Sopenharmony_ci			close = close_blocks(blocknr, other, blocksize);
16528c2ecf20Sopenharmony_ci		}
16538c2ecf20Sopenharmony_ci		if (!close && i < end_slot) {
16548c2ecf20Sopenharmony_ci			other = btrfs_node_blockptr(parent, i + 1);
16558c2ecf20Sopenharmony_ci			close = close_blocks(blocknr, other, blocksize);
16568c2ecf20Sopenharmony_ci		}
16578c2ecf20Sopenharmony_ci		if (close) {
16588c2ecf20Sopenharmony_ci			last_block = blocknr;
16598c2ecf20Sopenharmony_ci			continue;
16608c2ecf20Sopenharmony_ci		}
16618c2ecf20Sopenharmony_ci
16628c2ecf20Sopenharmony_ci		cur = find_extent_buffer(fs_info, blocknr);
16638c2ecf20Sopenharmony_ci		if (cur)
16648c2ecf20Sopenharmony_ci			uptodate = btrfs_buffer_uptodate(cur, gen, 0);
16658c2ecf20Sopenharmony_ci		else
16668c2ecf20Sopenharmony_ci			uptodate = 0;
16678c2ecf20Sopenharmony_ci		if (!cur || !uptodate) {
16688c2ecf20Sopenharmony_ci			if (!cur) {
16698c2ecf20Sopenharmony_ci				cur = read_tree_block(fs_info, blocknr, gen,
16708c2ecf20Sopenharmony_ci						      parent_level - 1,
16718c2ecf20Sopenharmony_ci						      &first_key);
16728c2ecf20Sopenharmony_ci				if (IS_ERR(cur)) {
16738c2ecf20Sopenharmony_ci					return PTR_ERR(cur);
16748c2ecf20Sopenharmony_ci				} else if (!extent_buffer_uptodate(cur)) {
16758c2ecf20Sopenharmony_ci					free_extent_buffer(cur);
16768c2ecf20Sopenharmony_ci					return -EIO;
16778c2ecf20Sopenharmony_ci				}
16788c2ecf20Sopenharmony_ci			} else if (!uptodate) {
16798c2ecf20Sopenharmony_ci				err = btrfs_read_buffer(cur, gen,
16808c2ecf20Sopenharmony_ci						parent_level - 1,&first_key);
16818c2ecf20Sopenharmony_ci				if (err) {
16828c2ecf20Sopenharmony_ci					free_extent_buffer(cur);
16838c2ecf20Sopenharmony_ci					return err;
16848c2ecf20Sopenharmony_ci				}
16858c2ecf20Sopenharmony_ci			}
16868c2ecf20Sopenharmony_ci		}
16878c2ecf20Sopenharmony_ci		if (search_start == 0)
16888c2ecf20Sopenharmony_ci			search_start = last_block;
16898c2ecf20Sopenharmony_ci
16908c2ecf20Sopenharmony_ci		btrfs_tree_lock(cur);
16918c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(cur);
16928c2ecf20Sopenharmony_ci		err = __btrfs_cow_block(trans, root, cur, parent, i,
16938c2ecf20Sopenharmony_ci					&cur, search_start,
16948c2ecf20Sopenharmony_ci					min(16 * blocksize,
16958c2ecf20Sopenharmony_ci					    (end_slot - i) * blocksize),
16968c2ecf20Sopenharmony_ci					BTRFS_NESTING_COW);
16978c2ecf20Sopenharmony_ci		if (err) {
16988c2ecf20Sopenharmony_ci			btrfs_tree_unlock(cur);
16998c2ecf20Sopenharmony_ci			free_extent_buffer(cur);
17008c2ecf20Sopenharmony_ci			break;
17018c2ecf20Sopenharmony_ci		}
17028c2ecf20Sopenharmony_ci		search_start = cur->start;
17038c2ecf20Sopenharmony_ci		last_block = cur->start;
17048c2ecf20Sopenharmony_ci		*last_ret = search_start;
17058c2ecf20Sopenharmony_ci		btrfs_tree_unlock(cur);
17068c2ecf20Sopenharmony_ci		free_extent_buffer(cur);
17078c2ecf20Sopenharmony_ci	}
17088c2ecf20Sopenharmony_ci	return err;
17098c2ecf20Sopenharmony_ci}
17108c2ecf20Sopenharmony_ci
17118c2ecf20Sopenharmony_ci/*
17128c2ecf20Sopenharmony_ci * search for key in the extent_buffer.  The items start at offset p,
17138c2ecf20Sopenharmony_ci * and they are item_size apart.  There are 'max' items in p.
17148c2ecf20Sopenharmony_ci *
17158c2ecf20Sopenharmony_ci * the slot in the array is returned via slot, and it points to
17168c2ecf20Sopenharmony_ci * the place where you would insert key if it is not found in
17178c2ecf20Sopenharmony_ci * the array.
17188c2ecf20Sopenharmony_ci *
17198c2ecf20Sopenharmony_ci * slot may point to max if the key is bigger than all of the keys
17208c2ecf20Sopenharmony_ci */
17218c2ecf20Sopenharmony_cistatic noinline int generic_bin_search(struct extent_buffer *eb,
17228c2ecf20Sopenharmony_ci				       unsigned long p, int item_size,
17238c2ecf20Sopenharmony_ci				       const struct btrfs_key *key,
17248c2ecf20Sopenharmony_ci				       int max, int *slot)
17258c2ecf20Sopenharmony_ci{
17268c2ecf20Sopenharmony_ci	int low = 0;
17278c2ecf20Sopenharmony_ci	int high = max;
17288c2ecf20Sopenharmony_ci	int ret;
17298c2ecf20Sopenharmony_ci	const int key_size = sizeof(struct btrfs_disk_key);
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci	if (low > high) {
17328c2ecf20Sopenharmony_ci		btrfs_err(eb->fs_info,
17338c2ecf20Sopenharmony_ci		 "%s: low (%d) > high (%d) eb %llu owner %llu level %d",
17348c2ecf20Sopenharmony_ci			  __func__, low, high, eb->start,
17358c2ecf20Sopenharmony_ci			  btrfs_header_owner(eb), btrfs_header_level(eb));
17368c2ecf20Sopenharmony_ci		return -EINVAL;
17378c2ecf20Sopenharmony_ci	}
17388c2ecf20Sopenharmony_ci
17398c2ecf20Sopenharmony_ci	while (low < high) {
17408c2ecf20Sopenharmony_ci		unsigned long oip;
17418c2ecf20Sopenharmony_ci		unsigned long offset;
17428c2ecf20Sopenharmony_ci		struct btrfs_disk_key *tmp;
17438c2ecf20Sopenharmony_ci		struct btrfs_disk_key unaligned;
17448c2ecf20Sopenharmony_ci		int mid;
17458c2ecf20Sopenharmony_ci
17468c2ecf20Sopenharmony_ci		mid = (low + high) / 2;
17478c2ecf20Sopenharmony_ci		offset = p + mid * item_size;
17488c2ecf20Sopenharmony_ci		oip = offset_in_page(offset);
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci		if (oip + key_size <= PAGE_SIZE) {
17518c2ecf20Sopenharmony_ci			const unsigned long idx = offset >> PAGE_SHIFT;
17528c2ecf20Sopenharmony_ci			char *kaddr = page_address(eb->pages[idx]);
17538c2ecf20Sopenharmony_ci
17548c2ecf20Sopenharmony_ci			tmp = (struct btrfs_disk_key *)(kaddr + oip);
17558c2ecf20Sopenharmony_ci		} else {
17568c2ecf20Sopenharmony_ci			read_extent_buffer(eb, &unaligned, offset, key_size);
17578c2ecf20Sopenharmony_ci			tmp = &unaligned;
17588c2ecf20Sopenharmony_ci		}
17598c2ecf20Sopenharmony_ci
17608c2ecf20Sopenharmony_ci		ret = comp_keys(tmp, key);
17618c2ecf20Sopenharmony_ci
17628c2ecf20Sopenharmony_ci		if (ret < 0)
17638c2ecf20Sopenharmony_ci			low = mid + 1;
17648c2ecf20Sopenharmony_ci		else if (ret > 0)
17658c2ecf20Sopenharmony_ci			high = mid;
17668c2ecf20Sopenharmony_ci		else {
17678c2ecf20Sopenharmony_ci			*slot = mid;
17688c2ecf20Sopenharmony_ci			return 0;
17698c2ecf20Sopenharmony_ci		}
17708c2ecf20Sopenharmony_ci	}
17718c2ecf20Sopenharmony_ci	*slot = low;
17728c2ecf20Sopenharmony_ci	return 1;
17738c2ecf20Sopenharmony_ci}
17748c2ecf20Sopenharmony_ci
17758c2ecf20Sopenharmony_ci/*
17768c2ecf20Sopenharmony_ci * simple bin_search frontend that does the right thing for
17778c2ecf20Sopenharmony_ci * leaves vs nodes
17788c2ecf20Sopenharmony_ci */
17798c2ecf20Sopenharmony_ciint btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
17808c2ecf20Sopenharmony_ci		     int *slot)
17818c2ecf20Sopenharmony_ci{
17828c2ecf20Sopenharmony_ci	if (btrfs_header_level(eb) == 0)
17838c2ecf20Sopenharmony_ci		return generic_bin_search(eb,
17848c2ecf20Sopenharmony_ci					  offsetof(struct btrfs_leaf, items),
17858c2ecf20Sopenharmony_ci					  sizeof(struct btrfs_item),
17868c2ecf20Sopenharmony_ci					  key, btrfs_header_nritems(eb),
17878c2ecf20Sopenharmony_ci					  slot);
17888c2ecf20Sopenharmony_ci	else
17898c2ecf20Sopenharmony_ci		return generic_bin_search(eb,
17908c2ecf20Sopenharmony_ci					  offsetof(struct btrfs_node, ptrs),
17918c2ecf20Sopenharmony_ci					  sizeof(struct btrfs_key_ptr),
17928c2ecf20Sopenharmony_ci					  key, btrfs_header_nritems(eb),
17938c2ecf20Sopenharmony_ci					  slot);
17948c2ecf20Sopenharmony_ci}
17958c2ecf20Sopenharmony_ci
17968c2ecf20Sopenharmony_cistatic void root_add_used(struct btrfs_root *root, u32 size)
17978c2ecf20Sopenharmony_ci{
17988c2ecf20Sopenharmony_ci	spin_lock(&root->accounting_lock);
17998c2ecf20Sopenharmony_ci	btrfs_set_root_used(&root->root_item,
18008c2ecf20Sopenharmony_ci			    btrfs_root_used(&root->root_item) + size);
18018c2ecf20Sopenharmony_ci	spin_unlock(&root->accounting_lock);
18028c2ecf20Sopenharmony_ci}
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_cistatic void root_sub_used(struct btrfs_root *root, u32 size)
18058c2ecf20Sopenharmony_ci{
18068c2ecf20Sopenharmony_ci	spin_lock(&root->accounting_lock);
18078c2ecf20Sopenharmony_ci	btrfs_set_root_used(&root->root_item,
18088c2ecf20Sopenharmony_ci			    btrfs_root_used(&root->root_item) - size);
18098c2ecf20Sopenharmony_ci	spin_unlock(&root->accounting_lock);
18108c2ecf20Sopenharmony_ci}
18118c2ecf20Sopenharmony_ci
18128c2ecf20Sopenharmony_ci/* given a node and slot number, this reads the blocks it points to.  The
18138c2ecf20Sopenharmony_ci * extent buffer is returned with a reference taken (but unlocked).
18148c2ecf20Sopenharmony_ci */
18158c2ecf20Sopenharmony_cistruct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
18168c2ecf20Sopenharmony_ci					   int slot)
18178c2ecf20Sopenharmony_ci{
18188c2ecf20Sopenharmony_ci	int level = btrfs_header_level(parent);
18198c2ecf20Sopenharmony_ci	struct extent_buffer *eb;
18208c2ecf20Sopenharmony_ci	struct btrfs_key first_key;
18218c2ecf20Sopenharmony_ci
18228c2ecf20Sopenharmony_ci	if (slot < 0 || slot >= btrfs_header_nritems(parent))
18238c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOENT);
18248c2ecf20Sopenharmony_ci
18258c2ecf20Sopenharmony_ci	BUG_ON(level == 0);
18268c2ecf20Sopenharmony_ci
18278c2ecf20Sopenharmony_ci	btrfs_node_key_to_cpu(parent, &first_key, slot);
18288c2ecf20Sopenharmony_ci	eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot),
18298c2ecf20Sopenharmony_ci			     btrfs_node_ptr_generation(parent, slot),
18308c2ecf20Sopenharmony_ci			     level - 1, &first_key);
18318c2ecf20Sopenharmony_ci	if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
18328c2ecf20Sopenharmony_ci		free_extent_buffer(eb);
18338c2ecf20Sopenharmony_ci		eb = ERR_PTR(-EIO);
18348c2ecf20Sopenharmony_ci	}
18358c2ecf20Sopenharmony_ci
18368c2ecf20Sopenharmony_ci	return eb;
18378c2ecf20Sopenharmony_ci}
18388c2ecf20Sopenharmony_ci
18398c2ecf20Sopenharmony_ci/*
18408c2ecf20Sopenharmony_ci * node level balancing, used to make sure nodes are in proper order for
18418c2ecf20Sopenharmony_ci * item deletion.  We balance from the top down, so we have to make sure
18428c2ecf20Sopenharmony_ci * that a deletion won't leave an node completely empty later on.
18438c2ecf20Sopenharmony_ci */
18448c2ecf20Sopenharmony_cistatic noinline int balance_level(struct btrfs_trans_handle *trans,
18458c2ecf20Sopenharmony_ci			 struct btrfs_root *root,
18468c2ecf20Sopenharmony_ci			 struct btrfs_path *path, int level)
18478c2ecf20Sopenharmony_ci{
18488c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
18498c2ecf20Sopenharmony_ci	struct extent_buffer *right = NULL;
18508c2ecf20Sopenharmony_ci	struct extent_buffer *mid;
18518c2ecf20Sopenharmony_ci	struct extent_buffer *left = NULL;
18528c2ecf20Sopenharmony_ci	struct extent_buffer *parent = NULL;
18538c2ecf20Sopenharmony_ci	int ret = 0;
18548c2ecf20Sopenharmony_ci	int wret;
18558c2ecf20Sopenharmony_ci	int pslot;
18568c2ecf20Sopenharmony_ci	int orig_slot = path->slots[level];
18578c2ecf20Sopenharmony_ci	u64 orig_ptr;
18588c2ecf20Sopenharmony_ci
18598c2ecf20Sopenharmony_ci	ASSERT(level > 0);
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_ci	mid = path->nodes[level];
18628c2ecf20Sopenharmony_ci
18638c2ecf20Sopenharmony_ci	WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK &&
18648c2ecf20Sopenharmony_ci		path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING);
18658c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(mid) != trans->transid);
18668c2ecf20Sopenharmony_ci
18678c2ecf20Sopenharmony_ci	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
18688c2ecf20Sopenharmony_ci
18698c2ecf20Sopenharmony_ci	if (level < BTRFS_MAX_LEVEL - 1) {
18708c2ecf20Sopenharmony_ci		parent = path->nodes[level + 1];
18718c2ecf20Sopenharmony_ci		pslot = path->slots[level + 1];
18728c2ecf20Sopenharmony_ci	}
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ci	/*
18758c2ecf20Sopenharmony_ci	 * deal with the case where there is only one pointer in the root
18768c2ecf20Sopenharmony_ci	 * by promoting the node below to a root
18778c2ecf20Sopenharmony_ci	 */
18788c2ecf20Sopenharmony_ci	if (!parent) {
18798c2ecf20Sopenharmony_ci		struct extent_buffer *child;
18808c2ecf20Sopenharmony_ci
18818c2ecf20Sopenharmony_ci		if (btrfs_header_nritems(mid) != 1)
18828c2ecf20Sopenharmony_ci			return 0;
18838c2ecf20Sopenharmony_ci
18848c2ecf20Sopenharmony_ci		/* promote the child to a root */
18858c2ecf20Sopenharmony_ci		child = btrfs_read_node_slot(mid, 0);
18868c2ecf20Sopenharmony_ci		if (IS_ERR(child)) {
18878c2ecf20Sopenharmony_ci			ret = PTR_ERR(child);
18888c2ecf20Sopenharmony_ci			btrfs_handle_fs_error(fs_info, ret, NULL);
18898c2ecf20Sopenharmony_ci			goto enospc;
18908c2ecf20Sopenharmony_ci		}
18918c2ecf20Sopenharmony_ci
18928c2ecf20Sopenharmony_ci		btrfs_tree_lock(child);
18938c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(child);
18948c2ecf20Sopenharmony_ci		ret = btrfs_cow_block(trans, root, child, mid, 0, &child,
18958c2ecf20Sopenharmony_ci				      BTRFS_NESTING_COW);
18968c2ecf20Sopenharmony_ci		if (ret) {
18978c2ecf20Sopenharmony_ci			btrfs_tree_unlock(child);
18988c2ecf20Sopenharmony_ci			free_extent_buffer(child);
18998c2ecf20Sopenharmony_ci			goto enospc;
19008c2ecf20Sopenharmony_ci		}
19018c2ecf20Sopenharmony_ci
19028c2ecf20Sopenharmony_ci		ret = tree_mod_log_insert_root(root->node, child, 1);
19038c2ecf20Sopenharmony_ci		BUG_ON(ret < 0);
19048c2ecf20Sopenharmony_ci		rcu_assign_pointer(root->node, child);
19058c2ecf20Sopenharmony_ci
19068c2ecf20Sopenharmony_ci		add_root_to_dirty_list(root);
19078c2ecf20Sopenharmony_ci		btrfs_tree_unlock(child);
19088c2ecf20Sopenharmony_ci
19098c2ecf20Sopenharmony_ci		path->locks[level] = 0;
19108c2ecf20Sopenharmony_ci		path->nodes[level] = NULL;
19118c2ecf20Sopenharmony_ci		btrfs_clean_tree_block(mid);
19128c2ecf20Sopenharmony_ci		btrfs_tree_unlock(mid);
19138c2ecf20Sopenharmony_ci		/* once for the path */
19148c2ecf20Sopenharmony_ci		free_extent_buffer(mid);
19158c2ecf20Sopenharmony_ci
19168c2ecf20Sopenharmony_ci		root_sub_used(root, mid->len);
19178c2ecf20Sopenharmony_ci		btrfs_free_tree_block(trans, root, mid, 0, 1);
19188c2ecf20Sopenharmony_ci		/* once for the root ptr */
19198c2ecf20Sopenharmony_ci		free_extent_buffer_stale(mid);
19208c2ecf20Sopenharmony_ci		return 0;
19218c2ecf20Sopenharmony_ci	}
19228c2ecf20Sopenharmony_ci	if (btrfs_header_nritems(mid) >
19238c2ecf20Sopenharmony_ci	    BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4)
19248c2ecf20Sopenharmony_ci		return 0;
19258c2ecf20Sopenharmony_ci
19268c2ecf20Sopenharmony_ci	left = btrfs_read_node_slot(parent, pslot - 1);
19278c2ecf20Sopenharmony_ci	if (IS_ERR(left))
19288c2ecf20Sopenharmony_ci		left = NULL;
19298c2ecf20Sopenharmony_ci
19308c2ecf20Sopenharmony_ci	if (left) {
19318c2ecf20Sopenharmony_ci		__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
19328c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(left);
19338c2ecf20Sopenharmony_ci		wret = btrfs_cow_block(trans, root, left,
19348c2ecf20Sopenharmony_ci				       parent, pslot - 1, &left,
19358c2ecf20Sopenharmony_ci				       BTRFS_NESTING_LEFT_COW);
19368c2ecf20Sopenharmony_ci		if (wret) {
19378c2ecf20Sopenharmony_ci			ret = wret;
19388c2ecf20Sopenharmony_ci			goto enospc;
19398c2ecf20Sopenharmony_ci		}
19408c2ecf20Sopenharmony_ci	}
19418c2ecf20Sopenharmony_ci
19428c2ecf20Sopenharmony_ci	right = btrfs_read_node_slot(parent, pslot + 1);
19438c2ecf20Sopenharmony_ci	if (IS_ERR(right))
19448c2ecf20Sopenharmony_ci		right = NULL;
19458c2ecf20Sopenharmony_ci
19468c2ecf20Sopenharmony_ci	if (right) {
19478c2ecf20Sopenharmony_ci		__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
19488c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(right);
19498c2ecf20Sopenharmony_ci		wret = btrfs_cow_block(trans, root, right,
19508c2ecf20Sopenharmony_ci				       parent, pslot + 1, &right,
19518c2ecf20Sopenharmony_ci				       BTRFS_NESTING_RIGHT_COW);
19528c2ecf20Sopenharmony_ci		if (wret) {
19538c2ecf20Sopenharmony_ci			ret = wret;
19548c2ecf20Sopenharmony_ci			goto enospc;
19558c2ecf20Sopenharmony_ci		}
19568c2ecf20Sopenharmony_ci	}
19578c2ecf20Sopenharmony_ci
19588c2ecf20Sopenharmony_ci	/* first, try to make some room in the middle buffer */
19598c2ecf20Sopenharmony_ci	if (left) {
19608c2ecf20Sopenharmony_ci		orig_slot += btrfs_header_nritems(left);
19618c2ecf20Sopenharmony_ci		wret = push_node_left(trans, left, mid, 1);
19628c2ecf20Sopenharmony_ci		if (wret < 0)
19638c2ecf20Sopenharmony_ci			ret = wret;
19648c2ecf20Sopenharmony_ci	}
19658c2ecf20Sopenharmony_ci
19668c2ecf20Sopenharmony_ci	/*
19678c2ecf20Sopenharmony_ci	 * then try to empty the right most buffer into the middle
19688c2ecf20Sopenharmony_ci	 */
19698c2ecf20Sopenharmony_ci	if (right) {
19708c2ecf20Sopenharmony_ci		wret = push_node_left(trans, mid, right, 1);
19718c2ecf20Sopenharmony_ci		if (wret < 0 && wret != -ENOSPC)
19728c2ecf20Sopenharmony_ci			ret = wret;
19738c2ecf20Sopenharmony_ci		if (btrfs_header_nritems(right) == 0) {
19748c2ecf20Sopenharmony_ci			btrfs_clean_tree_block(right);
19758c2ecf20Sopenharmony_ci			btrfs_tree_unlock(right);
19768c2ecf20Sopenharmony_ci			del_ptr(root, path, level + 1, pslot + 1);
19778c2ecf20Sopenharmony_ci			root_sub_used(root, right->len);
19788c2ecf20Sopenharmony_ci			btrfs_free_tree_block(trans, root, right, 0, 1);
19798c2ecf20Sopenharmony_ci			free_extent_buffer_stale(right);
19808c2ecf20Sopenharmony_ci			right = NULL;
19818c2ecf20Sopenharmony_ci		} else {
19828c2ecf20Sopenharmony_ci			struct btrfs_disk_key right_key;
19838c2ecf20Sopenharmony_ci			btrfs_node_key(right, &right_key, 0);
19848c2ecf20Sopenharmony_ci			ret = tree_mod_log_insert_key(parent, pslot + 1,
19858c2ecf20Sopenharmony_ci					MOD_LOG_KEY_REPLACE, GFP_NOFS);
19868c2ecf20Sopenharmony_ci			BUG_ON(ret < 0);
19878c2ecf20Sopenharmony_ci			btrfs_set_node_key(parent, &right_key, pslot + 1);
19888c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(parent);
19898c2ecf20Sopenharmony_ci		}
19908c2ecf20Sopenharmony_ci	}
19918c2ecf20Sopenharmony_ci	if (btrfs_header_nritems(mid) == 1) {
19928c2ecf20Sopenharmony_ci		/*
19938c2ecf20Sopenharmony_ci		 * we're not allowed to leave a node with one item in the
19948c2ecf20Sopenharmony_ci		 * tree during a delete.  A deletion from lower in the tree
19958c2ecf20Sopenharmony_ci		 * could try to delete the only pointer in this node.
19968c2ecf20Sopenharmony_ci		 * So, pull some keys from the left.
19978c2ecf20Sopenharmony_ci		 * There has to be a left pointer at this point because
19988c2ecf20Sopenharmony_ci		 * otherwise we would have pulled some pointers from the
19998c2ecf20Sopenharmony_ci		 * right
20008c2ecf20Sopenharmony_ci		 */
20018c2ecf20Sopenharmony_ci		if (!left) {
20028c2ecf20Sopenharmony_ci			ret = -EROFS;
20038c2ecf20Sopenharmony_ci			btrfs_handle_fs_error(fs_info, ret, NULL);
20048c2ecf20Sopenharmony_ci			goto enospc;
20058c2ecf20Sopenharmony_ci		}
20068c2ecf20Sopenharmony_ci		wret = balance_node_right(trans, mid, left);
20078c2ecf20Sopenharmony_ci		if (wret < 0) {
20088c2ecf20Sopenharmony_ci			ret = wret;
20098c2ecf20Sopenharmony_ci			goto enospc;
20108c2ecf20Sopenharmony_ci		}
20118c2ecf20Sopenharmony_ci		if (wret == 1) {
20128c2ecf20Sopenharmony_ci			wret = push_node_left(trans, left, mid, 1);
20138c2ecf20Sopenharmony_ci			if (wret < 0)
20148c2ecf20Sopenharmony_ci				ret = wret;
20158c2ecf20Sopenharmony_ci		}
20168c2ecf20Sopenharmony_ci		BUG_ON(wret == 1);
20178c2ecf20Sopenharmony_ci	}
20188c2ecf20Sopenharmony_ci	if (btrfs_header_nritems(mid) == 0) {
20198c2ecf20Sopenharmony_ci		btrfs_clean_tree_block(mid);
20208c2ecf20Sopenharmony_ci		btrfs_tree_unlock(mid);
20218c2ecf20Sopenharmony_ci		del_ptr(root, path, level + 1, pslot);
20228c2ecf20Sopenharmony_ci		root_sub_used(root, mid->len);
20238c2ecf20Sopenharmony_ci		btrfs_free_tree_block(trans, root, mid, 0, 1);
20248c2ecf20Sopenharmony_ci		free_extent_buffer_stale(mid);
20258c2ecf20Sopenharmony_ci		mid = NULL;
20268c2ecf20Sopenharmony_ci	} else {
20278c2ecf20Sopenharmony_ci		/* update the parent key to reflect our changes */
20288c2ecf20Sopenharmony_ci		struct btrfs_disk_key mid_key;
20298c2ecf20Sopenharmony_ci		btrfs_node_key(mid, &mid_key, 0);
20308c2ecf20Sopenharmony_ci		ret = tree_mod_log_insert_key(parent, pslot,
20318c2ecf20Sopenharmony_ci				MOD_LOG_KEY_REPLACE, GFP_NOFS);
20328c2ecf20Sopenharmony_ci		BUG_ON(ret < 0);
20338c2ecf20Sopenharmony_ci		btrfs_set_node_key(parent, &mid_key, pslot);
20348c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(parent);
20358c2ecf20Sopenharmony_ci	}
20368c2ecf20Sopenharmony_ci
20378c2ecf20Sopenharmony_ci	/* update the path */
20388c2ecf20Sopenharmony_ci	if (left) {
20398c2ecf20Sopenharmony_ci		if (btrfs_header_nritems(left) > orig_slot) {
20408c2ecf20Sopenharmony_ci			atomic_inc(&left->refs);
20418c2ecf20Sopenharmony_ci			/* left was locked after cow */
20428c2ecf20Sopenharmony_ci			path->nodes[level] = left;
20438c2ecf20Sopenharmony_ci			path->slots[level + 1] -= 1;
20448c2ecf20Sopenharmony_ci			path->slots[level] = orig_slot;
20458c2ecf20Sopenharmony_ci			if (mid) {
20468c2ecf20Sopenharmony_ci				btrfs_tree_unlock(mid);
20478c2ecf20Sopenharmony_ci				free_extent_buffer(mid);
20488c2ecf20Sopenharmony_ci			}
20498c2ecf20Sopenharmony_ci		} else {
20508c2ecf20Sopenharmony_ci			orig_slot -= btrfs_header_nritems(left);
20518c2ecf20Sopenharmony_ci			path->slots[level] = orig_slot;
20528c2ecf20Sopenharmony_ci		}
20538c2ecf20Sopenharmony_ci	}
20548c2ecf20Sopenharmony_ci	/* double check we haven't messed things up */
20558c2ecf20Sopenharmony_ci	if (orig_ptr !=
20568c2ecf20Sopenharmony_ci	    btrfs_node_blockptr(path->nodes[level], path->slots[level]))
20578c2ecf20Sopenharmony_ci		BUG();
20588c2ecf20Sopenharmony_cienospc:
20598c2ecf20Sopenharmony_ci	if (right) {
20608c2ecf20Sopenharmony_ci		btrfs_tree_unlock(right);
20618c2ecf20Sopenharmony_ci		free_extent_buffer(right);
20628c2ecf20Sopenharmony_ci	}
20638c2ecf20Sopenharmony_ci	if (left) {
20648c2ecf20Sopenharmony_ci		if (path->nodes[level] != left)
20658c2ecf20Sopenharmony_ci			btrfs_tree_unlock(left);
20668c2ecf20Sopenharmony_ci		free_extent_buffer(left);
20678c2ecf20Sopenharmony_ci	}
20688c2ecf20Sopenharmony_ci	return ret;
20698c2ecf20Sopenharmony_ci}
20708c2ecf20Sopenharmony_ci
20718c2ecf20Sopenharmony_ci/* Node balancing for insertion.  Here we only split or push nodes around
20728c2ecf20Sopenharmony_ci * when they are completely full.  This is also done top down, so we
20738c2ecf20Sopenharmony_ci * have to be pessimistic.
20748c2ecf20Sopenharmony_ci */
20758c2ecf20Sopenharmony_cistatic noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
20768c2ecf20Sopenharmony_ci					  struct btrfs_root *root,
20778c2ecf20Sopenharmony_ci					  struct btrfs_path *path, int level)
20788c2ecf20Sopenharmony_ci{
20798c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
20808c2ecf20Sopenharmony_ci	struct extent_buffer *right = NULL;
20818c2ecf20Sopenharmony_ci	struct extent_buffer *mid;
20828c2ecf20Sopenharmony_ci	struct extent_buffer *left = NULL;
20838c2ecf20Sopenharmony_ci	struct extent_buffer *parent = NULL;
20848c2ecf20Sopenharmony_ci	int ret = 0;
20858c2ecf20Sopenharmony_ci	int wret;
20868c2ecf20Sopenharmony_ci	int pslot;
20878c2ecf20Sopenharmony_ci	int orig_slot = path->slots[level];
20888c2ecf20Sopenharmony_ci
20898c2ecf20Sopenharmony_ci	if (level == 0)
20908c2ecf20Sopenharmony_ci		return 1;
20918c2ecf20Sopenharmony_ci
20928c2ecf20Sopenharmony_ci	mid = path->nodes[level];
20938c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(mid) != trans->transid);
20948c2ecf20Sopenharmony_ci
20958c2ecf20Sopenharmony_ci	if (level < BTRFS_MAX_LEVEL - 1) {
20968c2ecf20Sopenharmony_ci		parent = path->nodes[level + 1];
20978c2ecf20Sopenharmony_ci		pslot = path->slots[level + 1];
20988c2ecf20Sopenharmony_ci	}
20998c2ecf20Sopenharmony_ci
21008c2ecf20Sopenharmony_ci	if (!parent)
21018c2ecf20Sopenharmony_ci		return 1;
21028c2ecf20Sopenharmony_ci
21038c2ecf20Sopenharmony_ci	left = btrfs_read_node_slot(parent, pslot - 1);
21048c2ecf20Sopenharmony_ci	if (IS_ERR(left))
21058c2ecf20Sopenharmony_ci		left = NULL;
21068c2ecf20Sopenharmony_ci
21078c2ecf20Sopenharmony_ci	/* first, try to make some room in the middle buffer */
21088c2ecf20Sopenharmony_ci	if (left) {
21098c2ecf20Sopenharmony_ci		u32 left_nr;
21108c2ecf20Sopenharmony_ci
21118c2ecf20Sopenharmony_ci		__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
21128c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(left);
21138c2ecf20Sopenharmony_ci
21148c2ecf20Sopenharmony_ci		left_nr = btrfs_header_nritems(left);
21158c2ecf20Sopenharmony_ci		if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
21168c2ecf20Sopenharmony_ci			wret = 1;
21178c2ecf20Sopenharmony_ci		} else {
21188c2ecf20Sopenharmony_ci			ret = btrfs_cow_block(trans, root, left, parent,
21198c2ecf20Sopenharmony_ci					      pslot - 1, &left,
21208c2ecf20Sopenharmony_ci					      BTRFS_NESTING_LEFT_COW);
21218c2ecf20Sopenharmony_ci			if (ret)
21228c2ecf20Sopenharmony_ci				wret = 1;
21238c2ecf20Sopenharmony_ci			else {
21248c2ecf20Sopenharmony_ci				wret = push_node_left(trans, left, mid, 0);
21258c2ecf20Sopenharmony_ci			}
21268c2ecf20Sopenharmony_ci		}
21278c2ecf20Sopenharmony_ci		if (wret < 0)
21288c2ecf20Sopenharmony_ci			ret = wret;
21298c2ecf20Sopenharmony_ci		if (wret == 0) {
21308c2ecf20Sopenharmony_ci			struct btrfs_disk_key disk_key;
21318c2ecf20Sopenharmony_ci			orig_slot += left_nr;
21328c2ecf20Sopenharmony_ci			btrfs_node_key(mid, &disk_key, 0);
21338c2ecf20Sopenharmony_ci			ret = tree_mod_log_insert_key(parent, pslot,
21348c2ecf20Sopenharmony_ci					MOD_LOG_KEY_REPLACE, GFP_NOFS);
21358c2ecf20Sopenharmony_ci			BUG_ON(ret < 0);
21368c2ecf20Sopenharmony_ci			btrfs_set_node_key(parent, &disk_key, pslot);
21378c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(parent);
21388c2ecf20Sopenharmony_ci			if (btrfs_header_nritems(left) > orig_slot) {
21398c2ecf20Sopenharmony_ci				path->nodes[level] = left;
21408c2ecf20Sopenharmony_ci				path->slots[level + 1] -= 1;
21418c2ecf20Sopenharmony_ci				path->slots[level] = orig_slot;
21428c2ecf20Sopenharmony_ci				btrfs_tree_unlock(mid);
21438c2ecf20Sopenharmony_ci				free_extent_buffer(mid);
21448c2ecf20Sopenharmony_ci			} else {
21458c2ecf20Sopenharmony_ci				orig_slot -=
21468c2ecf20Sopenharmony_ci					btrfs_header_nritems(left);
21478c2ecf20Sopenharmony_ci				path->slots[level] = orig_slot;
21488c2ecf20Sopenharmony_ci				btrfs_tree_unlock(left);
21498c2ecf20Sopenharmony_ci				free_extent_buffer(left);
21508c2ecf20Sopenharmony_ci			}
21518c2ecf20Sopenharmony_ci			return 0;
21528c2ecf20Sopenharmony_ci		}
21538c2ecf20Sopenharmony_ci		btrfs_tree_unlock(left);
21548c2ecf20Sopenharmony_ci		free_extent_buffer(left);
21558c2ecf20Sopenharmony_ci	}
21568c2ecf20Sopenharmony_ci	right = btrfs_read_node_slot(parent, pslot + 1);
21578c2ecf20Sopenharmony_ci	if (IS_ERR(right))
21588c2ecf20Sopenharmony_ci		right = NULL;
21598c2ecf20Sopenharmony_ci
21608c2ecf20Sopenharmony_ci	/*
21618c2ecf20Sopenharmony_ci	 * then try to empty the right most buffer into the middle
21628c2ecf20Sopenharmony_ci	 */
21638c2ecf20Sopenharmony_ci	if (right) {
21648c2ecf20Sopenharmony_ci		u32 right_nr;
21658c2ecf20Sopenharmony_ci
21668c2ecf20Sopenharmony_ci		__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
21678c2ecf20Sopenharmony_ci		btrfs_set_lock_blocking_write(right);
21688c2ecf20Sopenharmony_ci
21698c2ecf20Sopenharmony_ci		right_nr = btrfs_header_nritems(right);
21708c2ecf20Sopenharmony_ci		if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
21718c2ecf20Sopenharmony_ci			wret = 1;
21728c2ecf20Sopenharmony_ci		} else {
21738c2ecf20Sopenharmony_ci			ret = btrfs_cow_block(trans, root, right,
21748c2ecf20Sopenharmony_ci					      parent, pslot + 1,
21758c2ecf20Sopenharmony_ci					      &right, BTRFS_NESTING_RIGHT_COW);
21768c2ecf20Sopenharmony_ci			if (ret)
21778c2ecf20Sopenharmony_ci				wret = 1;
21788c2ecf20Sopenharmony_ci			else {
21798c2ecf20Sopenharmony_ci				wret = balance_node_right(trans, right, mid);
21808c2ecf20Sopenharmony_ci			}
21818c2ecf20Sopenharmony_ci		}
21828c2ecf20Sopenharmony_ci		if (wret < 0)
21838c2ecf20Sopenharmony_ci			ret = wret;
21848c2ecf20Sopenharmony_ci		if (wret == 0) {
21858c2ecf20Sopenharmony_ci			struct btrfs_disk_key disk_key;
21868c2ecf20Sopenharmony_ci
21878c2ecf20Sopenharmony_ci			btrfs_node_key(right, &disk_key, 0);
21888c2ecf20Sopenharmony_ci			ret = tree_mod_log_insert_key(parent, pslot + 1,
21898c2ecf20Sopenharmony_ci					MOD_LOG_KEY_REPLACE, GFP_NOFS);
21908c2ecf20Sopenharmony_ci			BUG_ON(ret < 0);
21918c2ecf20Sopenharmony_ci			btrfs_set_node_key(parent, &disk_key, pslot + 1);
21928c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(parent);
21938c2ecf20Sopenharmony_ci
21948c2ecf20Sopenharmony_ci			if (btrfs_header_nritems(mid) <= orig_slot) {
21958c2ecf20Sopenharmony_ci				path->nodes[level] = right;
21968c2ecf20Sopenharmony_ci				path->slots[level + 1] += 1;
21978c2ecf20Sopenharmony_ci				path->slots[level] = orig_slot -
21988c2ecf20Sopenharmony_ci					btrfs_header_nritems(mid);
21998c2ecf20Sopenharmony_ci				btrfs_tree_unlock(mid);
22008c2ecf20Sopenharmony_ci				free_extent_buffer(mid);
22018c2ecf20Sopenharmony_ci			} else {
22028c2ecf20Sopenharmony_ci				btrfs_tree_unlock(right);
22038c2ecf20Sopenharmony_ci				free_extent_buffer(right);
22048c2ecf20Sopenharmony_ci			}
22058c2ecf20Sopenharmony_ci			return 0;
22068c2ecf20Sopenharmony_ci		}
22078c2ecf20Sopenharmony_ci		btrfs_tree_unlock(right);
22088c2ecf20Sopenharmony_ci		free_extent_buffer(right);
22098c2ecf20Sopenharmony_ci	}
22108c2ecf20Sopenharmony_ci	return 1;
22118c2ecf20Sopenharmony_ci}
22128c2ecf20Sopenharmony_ci
22138c2ecf20Sopenharmony_ci/*
22148c2ecf20Sopenharmony_ci * readahead one full node of leaves, finding things that are close
22158c2ecf20Sopenharmony_ci * to the block in 'slot', and triggering ra on them.
22168c2ecf20Sopenharmony_ci */
22178c2ecf20Sopenharmony_cistatic void reada_for_search(struct btrfs_fs_info *fs_info,
22188c2ecf20Sopenharmony_ci			     struct btrfs_path *path,
22198c2ecf20Sopenharmony_ci			     int level, int slot, u64 objectid)
22208c2ecf20Sopenharmony_ci{
22218c2ecf20Sopenharmony_ci	struct extent_buffer *node;
22228c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
22238c2ecf20Sopenharmony_ci	u32 nritems;
22248c2ecf20Sopenharmony_ci	u64 search;
22258c2ecf20Sopenharmony_ci	u64 target;
22268c2ecf20Sopenharmony_ci	u64 nread = 0;
22278c2ecf20Sopenharmony_ci	struct extent_buffer *eb;
22288c2ecf20Sopenharmony_ci	u32 nr;
22298c2ecf20Sopenharmony_ci	u32 blocksize;
22308c2ecf20Sopenharmony_ci	u32 nscan = 0;
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci	if (level != 1)
22338c2ecf20Sopenharmony_ci		return;
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	if (!path->nodes[level])
22368c2ecf20Sopenharmony_ci		return;
22378c2ecf20Sopenharmony_ci
22388c2ecf20Sopenharmony_ci	node = path->nodes[level];
22398c2ecf20Sopenharmony_ci
22408c2ecf20Sopenharmony_ci	search = btrfs_node_blockptr(node, slot);
22418c2ecf20Sopenharmony_ci	blocksize = fs_info->nodesize;
22428c2ecf20Sopenharmony_ci	eb = find_extent_buffer(fs_info, search);
22438c2ecf20Sopenharmony_ci	if (eb) {
22448c2ecf20Sopenharmony_ci		free_extent_buffer(eb);
22458c2ecf20Sopenharmony_ci		return;
22468c2ecf20Sopenharmony_ci	}
22478c2ecf20Sopenharmony_ci
22488c2ecf20Sopenharmony_ci	target = search;
22498c2ecf20Sopenharmony_ci
22508c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(node);
22518c2ecf20Sopenharmony_ci	nr = slot;
22528c2ecf20Sopenharmony_ci
22538c2ecf20Sopenharmony_ci	while (1) {
22548c2ecf20Sopenharmony_ci		if (path->reada == READA_BACK) {
22558c2ecf20Sopenharmony_ci			if (nr == 0)
22568c2ecf20Sopenharmony_ci				break;
22578c2ecf20Sopenharmony_ci			nr--;
22588c2ecf20Sopenharmony_ci		} else if (path->reada == READA_FORWARD) {
22598c2ecf20Sopenharmony_ci			nr++;
22608c2ecf20Sopenharmony_ci			if (nr >= nritems)
22618c2ecf20Sopenharmony_ci				break;
22628c2ecf20Sopenharmony_ci		}
22638c2ecf20Sopenharmony_ci		if (path->reada == READA_BACK && objectid) {
22648c2ecf20Sopenharmony_ci			btrfs_node_key(node, &disk_key, nr);
22658c2ecf20Sopenharmony_ci			if (btrfs_disk_key_objectid(&disk_key) != objectid)
22668c2ecf20Sopenharmony_ci				break;
22678c2ecf20Sopenharmony_ci		}
22688c2ecf20Sopenharmony_ci		search = btrfs_node_blockptr(node, nr);
22698c2ecf20Sopenharmony_ci		if ((search <= target && target - search <= 65536) ||
22708c2ecf20Sopenharmony_ci		    (search > target && search - target <= 65536)) {
22718c2ecf20Sopenharmony_ci			readahead_tree_block(fs_info, search);
22728c2ecf20Sopenharmony_ci			nread += blocksize;
22738c2ecf20Sopenharmony_ci		}
22748c2ecf20Sopenharmony_ci		nscan++;
22758c2ecf20Sopenharmony_ci		if ((nread > 65536 || nscan > 32))
22768c2ecf20Sopenharmony_ci			break;
22778c2ecf20Sopenharmony_ci	}
22788c2ecf20Sopenharmony_ci}
22798c2ecf20Sopenharmony_ci
22808c2ecf20Sopenharmony_cistatic noinline void reada_for_balance(struct btrfs_fs_info *fs_info,
22818c2ecf20Sopenharmony_ci				       struct btrfs_path *path, int level)
22828c2ecf20Sopenharmony_ci{
22838c2ecf20Sopenharmony_ci	int slot;
22848c2ecf20Sopenharmony_ci	int nritems;
22858c2ecf20Sopenharmony_ci	struct extent_buffer *parent;
22868c2ecf20Sopenharmony_ci	struct extent_buffer *eb;
22878c2ecf20Sopenharmony_ci	u64 gen;
22888c2ecf20Sopenharmony_ci	u64 block1 = 0;
22898c2ecf20Sopenharmony_ci	u64 block2 = 0;
22908c2ecf20Sopenharmony_ci
22918c2ecf20Sopenharmony_ci	parent = path->nodes[level + 1];
22928c2ecf20Sopenharmony_ci	if (!parent)
22938c2ecf20Sopenharmony_ci		return;
22948c2ecf20Sopenharmony_ci
22958c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(parent);
22968c2ecf20Sopenharmony_ci	slot = path->slots[level + 1];
22978c2ecf20Sopenharmony_ci
22988c2ecf20Sopenharmony_ci	if (slot > 0) {
22998c2ecf20Sopenharmony_ci		block1 = btrfs_node_blockptr(parent, slot - 1);
23008c2ecf20Sopenharmony_ci		gen = btrfs_node_ptr_generation(parent, slot - 1);
23018c2ecf20Sopenharmony_ci		eb = find_extent_buffer(fs_info, block1);
23028c2ecf20Sopenharmony_ci		/*
23038c2ecf20Sopenharmony_ci		 * if we get -eagain from btrfs_buffer_uptodate, we
23048c2ecf20Sopenharmony_ci		 * don't want to return eagain here.  That will loop
23058c2ecf20Sopenharmony_ci		 * forever
23068c2ecf20Sopenharmony_ci		 */
23078c2ecf20Sopenharmony_ci		if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
23088c2ecf20Sopenharmony_ci			block1 = 0;
23098c2ecf20Sopenharmony_ci		free_extent_buffer(eb);
23108c2ecf20Sopenharmony_ci	}
23118c2ecf20Sopenharmony_ci	if (slot + 1 < nritems) {
23128c2ecf20Sopenharmony_ci		block2 = btrfs_node_blockptr(parent, slot + 1);
23138c2ecf20Sopenharmony_ci		gen = btrfs_node_ptr_generation(parent, slot + 1);
23148c2ecf20Sopenharmony_ci		eb = find_extent_buffer(fs_info, block2);
23158c2ecf20Sopenharmony_ci		if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0)
23168c2ecf20Sopenharmony_ci			block2 = 0;
23178c2ecf20Sopenharmony_ci		free_extent_buffer(eb);
23188c2ecf20Sopenharmony_ci	}
23198c2ecf20Sopenharmony_ci
23208c2ecf20Sopenharmony_ci	if (block1)
23218c2ecf20Sopenharmony_ci		readahead_tree_block(fs_info, block1);
23228c2ecf20Sopenharmony_ci	if (block2)
23238c2ecf20Sopenharmony_ci		readahead_tree_block(fs_info, block2);
23248c2ecf20Sopenharmony_ci}
23258c2ecf20Sopenharmony_ci
23268c2ecf20Sopenharmony_ci
23278c2ecf20Sopenharmony_ci/*
23288c2ecf20Sopenharmony_ci * when we walk down the tree, it is usually safe to unlock the higher layers
23298c2ecf20Sopenharmony_ci * in the tree.  The exceptions are when our path goes through slot 0, because
23308c2ecf20Sopenharmony_ci * operations on the tree might require changing key pointers higher up in the
23318c2ecf20Sopenharmony_ci * tree.
23328c2ecf20Sopenharmony_ci *
23338c2ecf20Sopenharmony_ci * callers might also have set path->keep_locks, which tells this code to keep
23348c2ecf20Sopenharmony_ci * the lock if the path points to the last slot in the block.  This is part of
23358c2ecf20Sopenharmony_ci * walking through the tree, and selecting the next slot in the higher block.
23368c2ecf20Sopenharmony_ci *
23378c2ecf20Sopenharmony_ci * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
23388c2ecf20Sopenharmony_ci * if lowest_unlock is 1, level 0 won't be unlocked
23398c2ecf20Sopenharmony_ci */
23408c2ecf20Sopenharmony_cistatic noinline void unlock_up(struct btrfs_path *path, int level,
23418c2ecf20Sopenharmony_ci			       int lowest_unlock, int min_write_lock_level,
23428c2ecf20Sopenharmony_ci			       int *write_lock_level)
23438c2ecf20Sopenharmony_ci{
23448c2ecf20Sopenharmony_ci	int i;
23458c2ecf20Sopenharmony_ci	int skip_level = level;
23468c2ecf20Sopenharmony_ci	int no_skips = 0;
23478c2ecf20Sopenharmony_ci	struct extent_buffer *t;
23488c2ecf20Sopenharmony_ci
23498c2ecf20Sopenharmony_ci	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
23508c2ecf20Sopenharmony_ci		if (!path->nodes[i])
23518c2ecf20Sopenharmony_ci			break;
23528c2ecf20Sopenharmony_ci		if (!path->locks[i])
23538c2ecf20Sopenharmony_ci			break;
23548c2ecf20Sopenharmony_ci		if (!no_skips && path->slots[i] == 0) {
23558c2ecf20Sopenharmony_ci			skip_level = i + 1;
23568c2ecf20Sopenharmony_ci			continue;
23578c2ecf20Sopenharmony_ci		}
23588c2ecf20Sopenharmony_ci		if (!no_skips && path->keep_locks) {
23598c2ecf20Sopenharmony_ci			u32 nritems;
23608c2ecf20Sopenharmony_ci			t = path->nodes[i];
23618c2ecf20Sopenharmony_ci			nritems = btrfs_header_nritems(t);
23628c2ecf20Sopenharmony_ci			if (nritems < 1 || path->slots[i] >= nritems - 1) {
23638c2ecf20Sopenharmony_ci				skip_level = i + 1;
23648c2ecf20Sopenharmony_ci				continue;
23658c2ecf20Sopenharmony_ci			}
23668c2ecf20Sopenharmony_ci		}
23678c2ecf20Sopenharmony_ci		if (skip_level < i && i >= lowest_unlock)
23688c2ecf20Sopenharmony_ci			no_skips = 1;
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_ci		t = path->nodes[i];
23718c2ecf20Sopenharmony_ci		if (i >= lowest_unlock && i > skip_level) {
23728c2ecf20Sopenharmony_ci			btrfs_tree_unlock_rw(t, path->locks[i]);
23738c2ecf20Sopenharmony_ci			path->locks[i] = 0;
23748c2ecf20Sopenharmony_ci			if (write_lock_level &&
23758c2ecf20Sopenharmony_ci			    i > min_write_lock_level &&
23768c2ecf20Sopenharmony_ci			    i <= *write_lock_level) {
23778c2ecf20Sopenharmony_ci				*write_lock_level = i - 1;
23788c2ecf20Sopenharmony_ci			}
23798c2ecf20Sopenharmony_ci		}
23808c2ecf20Sopenharmony_ci	}
23818c2ecf20Sopenharmony_ci}
23828c2ecf20Sopenharmony_ci
23838c2ecf20Sopenharmony_ci/*
23848c2ecf20Sopenharmony_ci * helper function for btrfs_search_slot.  The goal is to find a block
23858c2ecf20Sopenharmony_ci * in cache without setting the path to blocking.  If we find the block
23868c2ecf20Sopenharmony_ci * we return zero and the path is unchanged.
23878c2ecf20Sopenharmony_ci *
23888c2ecf20Sopenharmony_ci * If we can't find the block, we set the path blocking and do some
23898c2ecf20Sopenharmony_ci * reada.  -EAGAIN is returned and the search must be repeated.
23908c2ecf20Sopenharmony_ci */
23918c2ecf20Sopenharmony_cistatic int
23928c2ecf20Sopenharmony_ciread_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
23938c2ecf20Sopenharmony_ci		      struct extent_buffer **eb_ret, int level, int slot,
23948c2ecf20Sopenharmony_ci		      const struct btrfs_key *key)
23958c2ecf20Sopenharmony_ci{
23968c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
23978c2ecf20Sopenharmony_ci	u64 blocknr;
23988c2ecf20Sopenharmony_ci	u64 gen;
23998c2ecf20Sopenharmony_ci	struct extent_buffer *tmp;
24008c2ecf20Sopenharmony_ci	struct btrfs_key first_key;
24018c2ecf20Sopenharmony_ci	int ret;
24028c2ecf20Sopenharmony_ci	int parent_level;
24038c2ecf20Sopenharmony_ci
24048c2ecf20Sopenharmony_ci	blocknr = btrfs_node_blockptr(*eb_ret, slot);
24058c2ecf20Sopenharmony_ci	gen = btrfs_node_ptr_generation(*eb_ret, slot);
24068c2ecf20Sopenharmony_ci	parent_level = btrfs_header_level(*eb_ret);
24078c2ecf20Sopenharmony_ci	btrfs_node_key_to_cpu(*eb_ret, &first_key, slot);
24088c2ecf20Sopenharmony_ci
24098c2ecf20Sopenharmony_ci	tmp = find_extent_buffer(fs_info, blocknr);
24108c2ecf20Sopenharmony_ci	if (tmp) {
24118c2ecf20Sopenharmony_ci		/* first we do an atomic uptodate check */
24128c2ecf20Sopenharmony_ci		if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
24138c2ecf20Sopenharmony_ci			/*
24148c2ecf20Sopenharmony_ci			 * Do extra check for first_key, eb can be stale due to
24158c2ecf20Sopenharmony_ci			 * being cached, read from scrub, or have multiple
24168c2ecf20Sopenharmony_ci			 * parents (shared tree blocks).
24178c2ecf20Sopenharmony_ci			 */
24188c2ecf20Sopenharmony_ci			if (btrfs_verify_level_key(tmp,
24198c2ecf20Sopenharmony_ci					parent_level - 1, &first_key, gen)) {
24208c2ecf20Sopenharmony_ci				free_extent_buffer(tmp);
24218c2ecf20Sopenharmony_ci				return -EUCLEAN;
24228c2ecf20Sopenharmony_ci			}
24238c2ecf20Sopenharmony_ci			*eb_ret = tmp;
24248c2ecf20Sopenharmony_ci			return 0;
24258c2ecf20Sopenharmony_ci		}
24268c2ecf20Sopenharmony_ci
24278c2ecf20Sopenharmony_ci		/* the pages were up to date, but we failed
24288c2ecf20Sopenharmony_ci		 * the generation number check.  Do a full
24298c2ecf20Sopenharmony_ci		 * read for the generation number that is correct.
24308c2ecf20Sopenharmony_ci		 * We must do this without dropping locks so
24318c2ecf20Sopenharmony_ci		 * we can trust our generation number
24328c2ecf20Sopenharmony_ci		 */
24338c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(p);
24348c2ecf20Sopenharmony_ci
24358c2ecf20Sopenharmony_ci		/* now we're allowed to do a blocking uptodate check */
24368c2ecf20Sopenharmony_ci		ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
24378c2ecf20Sopenharmony_ci		if (!ret) {
24388c2ecf20Sopenharmony_ci			*eb_ret = tmp;
24398c2ecf20Sopenharmony_ci			return 0;
24408c2ecf20Sopenharmony_ci		}
24418c2ecf20Sopenharmony_ci		free_extent_buffer(tmp);
24428c2ecf20Sopenharmony_ci		btrfs_release_path(p);
24438c2ecf20Sopenharmony_ci		return -EIO;
24448c2ecf20Sopenharmony_ci	}
24458c2ecf20Sopenharmony_ci
24468c2ecf20Sopenharmony_ci	/*
24478c2ecf20Sopenharmony_ci	 * reduce lock contention at high levels
24488c2ecf20Sopenharmony_ci	 * of the btree by dropping locks before
24498c2ecf20Sopenharmony_ci	 * we read.  Don't release the lock on the current
24508c2ecf20Sopenharmony_ci	 * level because we need to walk this node to figure
24518c2ecf20Sopenharmony_ci	 * out which blocks to read.
24528c2ecf20Sopenharmony_ci	 */
24538c2ecf20Sopenharmony_ci	btrfs_unlock_up_safe(p, level + 1);
24548c2ecf20Sopenharmony_ci	btrfs_set_path_blocking(p);
24558c2ecf20Sopenharmony_ci
24568c2ecf20Sopenharmony_ci	if (p->reada != READA_NONE)
24578c2ecf20Sopenharmony_ci		reada_for_search(fs_info, p, level, slot, key->objectid);
24588c2ecf20Sopenharmony_ci
24598c2ecf20Sopenharmony_ci	ret = -EAGAIN;
24608c2ecf20Sopenharmony_ci	tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
24618c2ecf20Sopenharmony_ci			      &first_key);
24628c2ecf20Sopenharmony_ci	if (!IS_ERR(tmp)) {
24638c2ecf20Sopenharmony_ci		/*
24648c2ecf20Sopenharmony_ci		 * If the read above didn't mark this buffer up to date,
24658c2ecf20Sopenharmony_ci		 * it will never end up being up to date.  Set ret to EIO now
24668c2ecf20Sopenharmony_ci		 * and give up so that our caller doesn't loop forever
24678c2ecf20Sopenharmony_ci		 * on our EAGAINs.
24688c2ecf20Sopenharmony_ci		 */
24698c2ecf20Sopenharmony_ci		if (!extent_buffer_uptodate(tmp))
24708c2ecf20Sopenharmony_ci			ret = -EIO;
24718c2ecf20Sopenharmony_ci		free_extent_buffer(tmp);
24728c2ecf20Sopenharmony_ci	} else {
24738c2ecf20Sopenharmony_ci		ret = PTR_ERR(tmp);
24748c2ecf20Sopenharmony_ci	}
24758c2ecf20Sopenharmony_ci
24768c2ecf20Sopenharmony_ci	btrfs_release_path(p);
24778c2ecf20Sopenharmony_ci	return ret;
24788c2ecf20Sopenharmony_ci}
24798c2ecf20Sopenharmony_ci
24808c2ecf20Sopenharmony_ci/*
24818c2ecf20Sopenharmony_ci * helper function for btrfs_search_slot.  This does all of the checks
24828c2ecf20Sopenharmony_ci * for node-level blocks and does any balancing required based on
24838c2ecf20Sopenharmony_ci * the ins_len.
24848c2ecf20Sopenharmony_ci *
24858c2ecf20Sopenharmony_ci * If no extra work was required, zero is returned.  If we had to
24868c2ecf20Sopenharmony_ci * drop the path, -EAGAIN is returned and btrfs_search_slot must
24878c2ecf20Sopenharmony_ci * start over
24888c2ecf20Sopenharmony_ci */
24898c2ecf20Sopenharmony_cistatic int
24908c2ecf20Sopenharmony_cisetup_nodes_for_search(struct btrfs_trans_handle *trans,
24918c2ecf20Sopenharmony_ci		       struct btrfs_root *root, struct btrfs_path *p,
24928c2ecf20Sopenharmony_ci		       struct extent_buffer *b, int level, int ins_len,
24938c2ecf20Sopenharmony_ci		       int *write_lock_level)
24948c2ecf20Sopenharmony_ci{
24958c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
24968c2ecf20Sopenharmony_ci	int ret;
24978c2ecf20Sopenharmony_ci
24988c2ecf20Sopenharmony_ci	if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >=
24998c2ecf20Sopenharmony_ci	    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
25008c2ecf20Sopenharmony_ci		int sret;
25018c2ecf20Sopenharmony_ci
25028c2ecf20Sopenharmony_ci		if (*write_lock_level < level + 1) {
25038c2ecf20Sopenharmony_ci			*write_lock_level = level + 1;
25048c2ecf20Sopenharmony_ci			btrfs_release_path(p);
25058c2ecf20Sopenharmony_ci			goto again;
25068c2ecf20Sopenharmony_ci		}
25078c2ecf20Sopenharmony_ci
25088c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(p);
25098c2ecf20Sopenharmony_ci		reada_for_balance(fs_info, p, level);
25108c2ecf20Sopenharmony_ci		sret = split_node(trans, root, p, level);
25118c2ecf20Sopenharmony_ci
25128c2ecf20Sopenharmony_ci		BUG_ON(sret > 0);
25138c2ecf20Sopenharmony_ci		if (sret) {
25148c2ecf20Sopenharmony_ci			ret = sret;
25158c2ecf20Sopenharmony_ci			goto done;
25168c2ecf20Sopenharmony_ci		}
25178c2ecf20Sopenharmony_ci		b = p->nodes[level];
25188c2ecf20Sopenharmony_ci	} else if (ins_len < 0 && btrfs_header_nritems(b) <
25198c2ecf20Sopenharmony_ci		   BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) {
25208c2ecf20Sopenharmony_ci		int sret;
25218c2ecf20Sopenharmony_ci
25228c2ecf20Sopenharmony_ci		if (*write_lock_level < level + 1) {
25238c2ecf20Sopenharmony_ci			*write_lock_level = level + 1;
25248c2ecf20Sopenharmony_ci			btrfs_release_path(p);
25258c2ecf20Sopenharmony_ci			goto again;
25268c2ecf20Sopenharmony_ci		}
25278c2ecf20Sopenharmony_ci
25288c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(p);
25298c2ecf20Sopenharmony_ci		reada_for_balance(fs_info, p, level);
25308c2ecf20Sopenharmony_ci		sret = balance_level(trans, root, p, level);
25318c2ecf20Sopenharmony_ci
25328c2ecf20Sopenharmony_ci		if (sret) {
25338c2ecf20Sopenharmony_ci			ret = sret;
25348c2ecf20Sopenharmony_ci			goto done;
25358c2ecf20Sopenharmony_ci		}
25368c2ecf20Sopenharmony_ci		b = p->nodes[level];
25378c2ecf20Sopenharmony_ci		if (!b) {
25388c2ecf20Sopenharmony_ci			btrfs_release_path(p);
25398c2ecf20Sopenharmony_ci			goto again;
25408c2ecf20Sopenharmony_ci		}
25418c2ecf20Sopenharmony_ci		BUG_ON(btrfs_header_nritems(b) == 1);
25428c2ecf20Sopenharmony_ci	}
25438c2ecf20Sopenharmony_ci	return 0;
25448c2ecf20Sopenharmony_ci
25458c2ecf20Sopenharmony_ciagain:
25468c2ecf20Sopenharmony_ci	ret = -EAGAIN;
25478c2ecf20Sopenharmony_cidone:
25488c2ecf20Sopenharmony_ci	return ret;
25498c2ecf20Sopenharmony_ci}
25508c2ecf20Sopenharmony_ci
25518c2ecf20Sopenharmony_ciint btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path,
25528c2ecf20Sopenharmony_ci		u64 iobjectid, u64 ioff, u8 key_type,
25538c2ecf20Sopenharmony_ci		struct btrfs_key *found_key)
25548c2ecf20Sopenharmony_ci{
25558c2ecf20Sopenharmony_ci	int ret;
25568c2ecf20Sopenharmony_ci	struct btrfs_key key;
25578c2ecf20Sopenharmony_ci	struct extent_buffer *eb;
25588c2ecf20Sopenharmony_ci
25598c2ecf20Sopenharmony_ci	ASSERT(path);
25608c2ecf20Sopenharmony_ci	ASSERT(found_key);
25618c2ecf20Sopenharmony_ci
25628c2ecf20Sopenharmony_ci	key.type = key_type;
25638c2ecf20Sopenharmony_ci	key.objectid = iobjectid;
25648c2ecf20Sopenharmony_ci	key.offset = ioff;
25658c2ecf20Sopenharmony_ci
25668c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
25678c2ecf20Sopenharmony_ci	if (ret < 0)
25688c2ecf20Sopenharmony_ci		return ret;
25698c2ecf20Sopenharmony_ci
25708c2ecf20Sopenharmony_ci	eb = path->nodes[0];
25718c2ecf20Sopenharmony_ci	if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
25728c2ecf20Sopenharmony_ci		ret = btrfs_next_leaf(fs_root, path);
25738c2ecf20Sopenharmony_ci		if (ret)
25748c2ecf20Sopenharmony_ci			return ret;
25758c2ecf20Sopenharmony_ci		eb = path->nodes[0];
25768c2ecf20Sopenharmony_ci	}
25778c2ecf20Sopenharmony_ci
25788c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
25798c2ecf20Sopenharmony_ci	if (found_key->type != key.type ||
25808c2ecf20Sopenharmony_ci			found_key->objectid != key.objectid)
25818c2ecf20Sopenharmony_ci		return 1;
25828c2ecf20Sopenharmony_ci
25838c2ecf20Sopenharmony_ci	return 0;
25848c2ecf20Sopenharmony_ci}
25858c2ecf20Sopenharmony_ci
25868c2ecf20Sopenharmony_cistatic struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
25878c2ecf20Sopenharmony_ci							struct btrfs_path *p,
25888c2ecf20Sopenharmony_ci							int write_lock_level)
25898c2ecf20Sopenharmony_ci{
25908c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
25918c2ecf20Sopenharmony_ci	struct extent_buffer *b;
25928c2ecf20Sopenharmony_ci	int root_lock = 0;
25938c2ecf20Sopenharmony_ci	int level = 0;
25948c2ecf20Sopenharmony_ci
25958c2ecf20Sopenharmony_ci	if (p->search_commit_root) {
25968c2ecf20Sopenharmony_ci		/*
25978c2ecf20Sopenharmony_ci		 * The commit roots are read only so we always do read locks,
25988c2ecf20Sopenharmony_ci		 * and we always must hold the commit_root_sem when doing
25998c2ecf20Sopenharmony_ci		 * searches on them, the only exception is send where we don't
26008c2ecf20Sopenharmony_ci		 * want to block transaction commits for a long time, so
26018c2ecf20Sopenharmony_ci		 * we need to clone the commit root in order to avoid races
26028c2ecf20Sopenharmony_ci		 * with transaction commits that create a snapshot of one of
26038c2ecf20Sopenharmony_ci		 * the roots used by a send operation.
26048c2ecf20Sopenharmony_ci		 */
26058c2ecf20Sopenharmony_ci		if (p->need_commit_sem) {
26068c2ecf20Sopenharmony_ci			down_read(&fs_info->commit_root_sem);
26078c2ecf20Sopenharmony_ci			b = btrfs_clone_extent_buffer(root->commit_root);
26088c2ecf20Sopenharmony_ci			up_read(&fs_info->commit_root_sem);
26098c2ecf20Sopenharmony_ci			if (!b)
26108c2ecf20Sopenharmony_ci				return ERR_PTR(-ENOMEM);
26118c2ecf20Sopenharmony_ci
26128c2ecf20Sopenharmony_ci		} else {
26138c2ecf20Sopenharmony_ci			b = root->commit_root;
26148c2ecf20Sopenharmony_ci			atomic_inc(&b->refs);
26158c2ecf20Sopenharmony_ci		}
26168c2ecf20Sopenharmony_ci		level = btrfs_header_level(b);
26178c2ecf20Sopenharmony_ci		/*
26188c2ecf20Sopenharmony_ci		 * Ensure that all callers have set skip_locking when
26198c2ecf20Sopenharmony_ci		 * p->search_commit_root = 1.
26208c2ecf20Sopenharmony_ci		 */
26218c2ecf20Sopenharmony_ci		ASSERT(p->skip_locking == 1);
26228c2ecf20Sopenharmony_ci
26238c2ecf20Sopenharmony_ci		goto out;
26248c2ecf20Sopenharmony_ci	}
26258c2ecf20Sopenharmony_ci
26268c2ecf20Sopenharmony_ci	if (p->skip_locking) {
26278c2ecf20Sopenharmony_ci		b = btrfs_root_node(root);
26288c2ecf20Sopenharmony_ci		level = btrfs_header_level(b);
26298c2ecf20Sopenharmony_ci		goto out;
26308c2ecf20Sopenharmony_ci	}
26318c2ecf20Sopenharmony_ci
26328c2ecf20Sopenharmony_ci	/* We try very hard to do read locks on the root */
26338c2ecf20Sopenharmony_ci	root_lock = BTRFS_READ_LOCK;
26348c2ecf20Sopenharmony_ci
26358c2ecf20Sopenharmony_ci	/*
26368c2ecf20Sopenharmony_ci	 * If the level is set to maximum, we can skip trying to get the read
26378c2ecf20Sopenharmony_ci	 * lock.
26388c2ecf20Sopenharmony_ci	 */
26398c2ecf20Sopenharmony_ci	if (write_lock_level < BTRFS_MAX_LEVEL) {
26408c2ecf20Sopenharmony_ci		/*
26418c2ecf20Sopenharmony_ci		 * We don't know the level of the root node until we actually
26428c2ecf20Sopenharmony_ci		 * have it read locked
26438c2ecf20Sopenharmony_ci		 */
26448c2ecf20Sopenharmony_ci		b = __btrfs_read_lock_root_node(root, p->recurse);
26458c2ecf20Sopenharmony_ci		level = btrfs_header_level(b);
26468c2ecf20Sopenharmony_ci		if (level > write_lock_level)
26478c2ecf20Sopenharmony_ci			goto out;
26488c2ecf20Sopenharmony_ci
26498c2ecf20Sopenharmony_ci		/* Whoops, must trade for write lock */
26508c2ecf20Sopenharmony_ci		btrfs_tree_read_unlock(b);
26518c2ecf20Sopenharmony_ci		free_extent_buffer(b);
26528c2ecf20Sopenharmony_ci	}
26538c2ecf20Sopenharmony_ci
26548c2ecf20Sopenharmony_ci	b = btrfs_lock_root_node(root);
26558c2ecf20Sopenharmony_ci	root_lock = BTRFS_WRITE_LOCK;
26568c2ecf20Sopenharmony_ci
26578c2ecf20Sopenharmony_ci	/* The level might have changed, check again */
26588c2ecf20Sopenharmony_ci	level = btrfs_header_level(b);
26598c2ecf20Sopenharmony_ci
26608c2ecf20Sopenharmony_ciout:
26618c2ecf20Sopenharmony_ci	/*
26628c2ecf20Sopenharmony_ci	 * The root may have failed to write out at some point, and thus is no
26638c2ecf20Sopenharmony_ci	 * longer valid, return an error in this case.
26648c2ecf20Sopenharmony_ci	 */
26658c2ecf20Sopenharmony_ci	if (!extent_buffer_uptodate(b)) {
26668c2ecf20Sopenharmony_ci		if (root_lock)
26678c2ecf20Sopenharmony_ci			btrfs_tree_unlock_rw(b, root_lock);
26688c2ecf20Sopenharmony_ci		free_extent_buffer(b);
26698c2ecf20Sopenharmony_ci		return ERR_PTR(-EIO);
26708c2ecf20Sopenharmony_ci	}
26718c2ecf20Sopenharmony_ci
26728c2ecf20Sopenharmony_ci	p->nodes[level] = b;
26738c2ecf20Sopenharmony_ci	if (!p->skip_locking)
26748c2ecf20Sopenharmony_ci		p->locks[level] = root_lock;
26758c2ecf20Sopenharmony_ci	/*
26768c2ecf20Sopenharmony_ci	 * Callers are responsible for dropping b's references.
26778c2ecf20Sopenharmony_ci	 */
26788c2ecf20Sopenharmony_ci	return b;
26798c2ecf20Sopenharmony_ci}
26808c2ecf20Sopenharmony_ci
26818c2ecf20Sopenharmony_ci
26828c2ecf20Sopenharmony_ci/*
26838c2ecf20Sopenharmony_ci * btrfs_search_slot - look for a key in a tree and perform necessary
26848c2ecf20Sopenharmony_ci * modifications to preserve tree invariants.
26858c2ecf20Sopenharmony_ci *
26868c2ecf20Sopenharmony_ci * @trans:	Handle of transaction, used when modifying the tree
26878c2ecf20Sopenharmony_ci * @p:		Holds all btree nodes along the search path
26888c2ecf20Sopenharmony_ci * @root:	The root node of the tree
26898c2ecf20Sopenharmony_ci * @key:	The key we are looking for
26908c2ecf20Sopenharmony_ci * @ins_len:	Indicates purpose of search, for inserts it is 1, for
26918c2ecf20Sopenharmony_ci *		deletions it's -1. 0 for plain searches
26928c2ecf20Sopenharmony_ci * @cow:	boolean should CoW operations be performed. Must always be 1
26938c2ecf20Sopenharmony_ci *		when modifying the tree.
26948c2ecf20Sopenharmony_ci *
26958c2ecf20Sopenharmony_ci * If @ins_len > 0, nodes and leaves will be split as we walk down the tree.
26968c2ecf20Sopenharmony_ci * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible)
26978c2ecf20Sopenharmony_ci *
26988c2ecf20Sopenharmony_ci * If @key is found, 0 is returned and you can find the item in the leaf level
26998c2ecf20Sopenharmony_ci * of the path (level 0)
27008c2ecf20Sopenharmony_ci *
27018c2ecf20Sopenharmony_ci * If @key isn't found, 1 is returned and the leaf level of the path (level 0)
27028c2ecf20Sopenharmony_ci * points to the slot where it should be inserted
27038c2ecf20Sopenharmony_ci *
27048c2ecf20Sopenharmony_ci * If an error is encountered while searching the tree a negative error number
27058c2ecf20Sopenharmony_ci * is returned
27068c2ecf20Sopenharmony_ci */
27078c2ecf20Sopenharmony_ciint btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
27088c2ecf20Sopenharmony_ci		      const struct btrfs_key *key, struct btrfs_path *p,
27098c2ecf20Sopenharmony_ci		      int ins_len, int cow)
27108c2ecf20Sopenharmony_ci{
27118c2ecf20Sopenharmony_ci	struct extent_buffer *b;
27128c2ecf20Sopenharmony_ci	int slot;
27138c2ecf20Sopenharmony_ci	int ret;
27148c2ecf20Sopenharmony_ci	int err;
27158c2ecf20Sopenharmony_ci	int level;
27168c2ecf20Sopenharmony_ci	int lowest_unlock = 1;
27178c2ecf20Sopenharmony_ci	/* everything at write_lock_level or lower must be write locked */
27188c2ecf20Sopenharmony_ci	int write_lock_level = 0;
27198c2ecf20Sopenharmony_ci	u8 lowest_level = 0;
27208c2ecf20Sopenharmony_ci	int min_write_lock_level;
27218c2ecf20Sopenharmony_ci	int prev_cmp;
27228c2ecf20Sopenharmony_ci
27238c2ecf20Sopenharmony_ci	lowest_level = p->lowest_level;
27248c2ecf20Sopenharmony_ci	WARN_ON(lowest_level && ins_len > 0);
27258c2ecf20Sopenharmony_ci	WARN_ON(p->nodes[0] != NULL);
27268c2ecf20Sopenharmony_ci	BUG_ON(!cow && ins_len);
27278c2ecf20Sopenharmony_ci
27288c2ecf20Sopenharmony_ci	if (ins_len < 0) {
27298c2ecf20Sopenharmony_ci		lowest_unlock = 2;
27308c2ecf20Sopenharmony_ci
27318c2ecf20Sopenharmony_ci		/* when we are removing items, we might have to go up to level
27328c2ecf20Sopenharmony_ci		 * two as we update tree pointers  Make sure we keep write
27338c2ecf20Sopenharmony_ci		 * for those levels as well
27348c2ecf20Sopenharmony_ci		 */
27358c2ecf20Sopenharmony_ci		write_lock_level = 2;
27368c2ecf20Sopenharmony_ci	} else if (ins_len > 0) {
27378c2ecf20Sopenharmony_ci		/*
27388c2ecf20Sopenharmony_ci		 * for inserting items, make sure we have a write lock on
27398c2ecf20Sopenharmony_ci		 * level 1 so we can update keys
27408c2ecf20Sopenharmony_ci		 */
27418c2ecf20Sopenharmony_ci		write_lock_level = 1;
27428c2ecf20Sopenharmony_ci	}
27438c2ecf20Sopenharmony_ci
27448c2ecf20Sopenharmony_ci	if (!cow)
27458c2ecf20Sopenharmony_ci		write_lock_level = -1;
27468c2ecf20Sopenharmony_ci
27478c2ecf20Sopenharmony_ci	if (cow && (p->keep_locks || p->lowest_level))
27488c2ecf20Sopenharmony_ci		write_lock_level = BTRFS_MAX_LEVEL;
27498c2ecf20Sopenharmony_ci
27508c2ecf20Sopenharmony_ci	min_write_lock_level = write_lock_level;
27518c2ecf20Sopenharmony_ci
27528c2ecf20Sopenharmony_ciagain:
27538c2ecf20Sopenharmony_ci	prev_cmp = -1;
27548c2ecf20Sopenharmony_ci	b = btrfs_search_slot_get_root(root, p, write_lock_level);
27558c2ecf20Sopenharmony_ci	if (IS_ERR(b)) {
27568c2ecf20Sopenharmony_ci		ret = PTR_ERR(b);
27578c2ecf20Sopenharmony_ci		goto done;
27588c2ecf20Sopenharmony_ci	}
27598c2ecf20Sopenharmony_ci
27608c2ecf20Sopenharmony_ci	while (b) {
27618c2ecf20Sopenharmony_ci		int dec = 0;
27628c2ecf20Sopenharmony_ci
27638c2ecf20Sopenharmony_ci		level = btrfs_header_level(b);
27648c2ecf20Sopenharmony_ci
27658c2ecf20Sopenharmony_ci		if (cow) {
27668c2ecf20Sopenharmony_ci			bool last_level = (level == (BTRFS_MAX_LEVEL - 1));
27678c2ecf20Sopenharmony_ci
27688c2ecf20Sopenharmony_ci			/*
27698c2ecf20Sopenharmony_ci			 * if we don't really need to cow this block
27708c2ecf20Sopenharmony_ci			 * then we don't want to set the path blocking,
27718c2ecf20Sopenharmony_ci			 * so we test it here
27728c2ecf20Sopenharmony_ci			 */
27738c2ecf20Sopenharmony_ci			if (!should_cow_block(trans, root, b)) {
27748c2ecf20Sopenharmony_ci				trans->dirty = true;
27758c2ecf20Sopenharmony_ci				goto cow_done;
27768c2ecf20Sopenharmony_ci			}
27778c2ecf20Sopenharmony_ci
27788c2ecf20Sopenharmony_ci			/*
27798c2ecf20Sopenharmony_ci			 * must have write locks on this node and the
27808c2ecf20Sopenharmony_ci			 * parent
27818c2ecf20Sopenharmony_ci			 */
27828c2ecf20Sopenharmony_ci			if (level > write_lock_level ||
27838c2ecf20Sopenharmony_ci			    (level + 1 > write_lock_level &&
27848c2ecf20Sopenharmony_ci			    level + 1 < BTRFS_MAX_LEVEL &&
27858c2ecf20Sopenharmony_ci			    p->nodes[level + 1])) {
27868c2ecf20Sopenharmony_ci				write_lock_level = level + 1;
27878c2ecf20Sopenharmony_ci				btrfs_release_path(p);
27888c2ecf20Sopenharmony_ci				goto again;
27898c2ecf20Sopenharmony_ci			}
27908c2ecf20Sopenharmony_ci
27918c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(p);
27928c2ecf20Sopenharmony_ci			if (last_level)
27938c2ecf20Sopenharmony_ci				err = btrfs_cow_block(trans, root, b, NULL, 0,
27948c2ecf20Sopenharmony_ci						      &b,
27958c2ecf20Sopenharmony_ci						      BTRFS_NESTING_COW);
27968c2ecf20Sopenharmony_ci			else
27978c2ecf20Sopenharmony_ci				err = btrfs_cow_block(trans, root, b,
27988c2ecf20Sopenharmony_ci						      p->nodes[level + 1],
27998c2ecf20Sopenharmony_ci						      p->slots[level + 1], &b,
28008c2ecf20Sopenharmony_ci						      BTRFS_NESTING_COW);
28018c2ecf20Sopenharmony_ci			if (err) {
28028c2ecf20Sopenharmony_ci				ret = err;
28038c2ecf20Sopenharmony_ci				goto done;
28048c2ecf20Sopenharmony_ci			}
28058c2ecf20Sopenharmony_ci		}
28068c2ecf20Sopenharmony_cicow_done:
28078c2ecf20Sopenharmony_ci		p->nodes[level] = b;
28088c2ecf20Sopenharmony_ci		/*
28098c2ecf20Sopenharmony_ci		 * Leave path with blocking locks to avoid massive
28108c2ecf20Sopenharmony_ci		 * lock context switch, this is made on purpose.
28118c2ecf20Sopenharmony_ci		 */
28128c2ecf20Sopenharmony_ci
28138c2ecf20Sopenharmony_ci		/*
28148c2ecf20Sopenharmony_ci		 * we have a lock on b and as long as we aren't changing
28158c2ecf20Sopenharmony_ci		 * the tree, there is no way to for the items in b to change.
28168c2ecf20Sopenharmony_ci		 * It is safe to drop the lock on our parent before we
28178c2ecf20Sopenharmony_ci		 * go through the expensive btree search on b.
28188c2ecf20Sopenharmony_ci		 *
28198c2ecf20Sopenharmony_ci		 * If we're inserting or deleting (ins_len != 0), then we might
28208c2ecf20Sopenharmony_ci		 * be changing slot zero, which may require changing the parent.
28218c2ecf20Sopenharmony_ci		 * So, we can't drop the lock until after we know which slot
28228c2ecf20Sopenharmony_ci		 * we're operating on.
28238c2ecf20Sopenharmony_ci		 */
28248c2ecf20Sopenharmony_ci		if (!ins_len && !p->keep_locks) {
28258c2ecf20Sopenharmony_ci			int u = level + 1;
28268c2ecf20Sopenharmony_ci
28278c2ecf20Sopenharmony_ci			if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
28288c2ecf20Sopenharmony_ci				btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
28298c2ecf20Sopenharmony_ci				p->locks[u] = 0;
28308c2ecf20Sopenharmony_ci			}
28318c2ecf20Sopenharmony_ci		}
28328c2ecf20Sopenharmony_ci
28338c2ecf20Sopenharmony_ci		/*
28348c2ecf20Sopenharmony_ci		 * If btrfs_bin_search returns an exact match (prev_cmp == 0)
28358c2ecf20Sopenharmony_ci		 * we can safely assume the target key will always be in slot 0
28368c2ecf20Sopenharmony_ci		 * on lower levels due to the invariants BTRFS' btree provides,
28378c2ecf20Sopenharmony_ci		 * namely that a btrfs_key_ptr entry always points to the
28388c2ecf20Sopenharmony_ci		 * lowest key in the child node, thus we can skip searching
28398c2ecf20Sopenharmony_ci		 * lower levels
28408c2ecf20Sopenharmony_ci		 */
28418c2ecf20Sopenharmony_ci		if (prev_cmp == 0) {
28428c2ecf20Sopenharmony_ci			slot = 0;
28438c2ecf20Sopenharmony_ci			ret = 0;
28448c2ecf20Sopenharmony_ci		} else {
28458c2ecf20Sopenharmony_ci			ret = btrfs_bin_search(b, key, &slot);
28468c2ecf20Sopenharmony_ci			prev_cmp = ret;
28478c2ecf20Sopenharmony_ci			if (ret < 0)
28488c2ecf20Sopenharmony_ci				goto done;
28498c2ecf20Sopenharmony_ci		}
28508c2ecf20Sopenharmony_ci
28518c2ecf20Sopenharmony_ci		if (level == 0) {
28528c2ecf20Sopenharmony_ci			p->slots[level] = slot;
28538c2ecf20Sopenharmony_ci			if (ins_len > 0 &&
28548c2ecf20Sopenharmony_ci			    btrfs_leaf_free_space(b) < ins_len) {
28558c2ecf20Sopenharmony_ci				if (write_lock_level < 1) {
28568c2ecf20Sopenharmony_ci					write_lock_level = 1;
28578c2ecf20Sopenharmony_ci					btrfs_release_path(p);
28588c2ecf20Sopenharmony_ci					goto again;
28598c2ecf20Sopenharmony_ci				}
28608c2ecf20Sopenharmony_ci
28618c2ecf20Sopenharmony_ci				btrfs_set_path_blocking(p);
28628c2ecf20Sopenharmony_ci				err = split_leaf(trans, root, key,
28638c2ecf20Sopenharmony_ci						 p, ins_len, ret == 0);
28648c2ecf20Sopenharmony_ci
28658c2ecf20Sopenharmony_ci				BUG_ON(err > 0);
28668c2ecf20Sopenharmony_ci				if (err) {
28678c2ecf20Sopenharmony_ci					ret = err;
28688c2ecf20Sopenharmony_ci					goto done;
28698c2ecf20Sopenharmony_ci				}
28708c2ecf20Sopenharmony_ci			}
28718c2ecf20Sopenharmony_ci			if (!p->search_for_split)
28728c2ecf20Sopenharmony_ci				unlock_up(p, level, lowest_unlock,
28738c2ecf20Sopenharmony_ci					  min_write_lock_level, NULL);
28748c2ecf20Sopenharmony_ci			goto done;
28758c2ecf20Sopenharmony_ci		}
28768c2ecf20Sopenharmony_ci		if (ret && slot > 0) {
28778c2ecf20Sopenharmony_ci			dec = 1;
28788c2ecf20Sopenharmony_ci			slot--;
28798c2ecf20Sopenharmony_ci		}
28808c2ecf20Sopenharmony_ci		p->slots[level] = slot;
28818c2ecf20Sopenharmony_ci		err = setup_nodes_for_search(trans, root, p, b, level, ins_len,
28828c2ecf20Sopenharmony_ci					     &write_lock_level);
28838c2ecf20Sopenharmony_ci		if (err == -EAGAIN)
28848c2ecf20Sopenharmony_ci			goto again;
28858c2ecf20Sopenharmony_ci		if (err) {
28868c2ecf20Sopenharmony_ci			ret = err;
28878c2ecf20Sopenharmony_ci			goto done;
28888c2ecf20Sopenharmony_ci		}
28898c2ecf20Sopenharmony_ci		b = p->nodes[level];
28908c2ecf20Sopenharmony_ci		slot = p->slots[level];
28918c2ecf20Sopenharmony_ci
28928c2ecf20Sopenharmony_ci		/*
28938c2ecf20Sopenharmony_ci		 * Slot 0 is special, if we change the key we have to update
28948c2ecf20Sopenharmony_ci		 * the parent pointer which means we must have a write lock on
28958c2ecf20Sopenharmony_ci		 * the parent
28968c2ecf20Sopenharmony_ci		 */
28978c2ecf20Sopenharmony_ci		if (slot == 0 && ins_len && write_lock_level < level + 1) {
28988c2ecf20Sopenharmony_ci			write_lock_level = level + 1;
28998c2ecf20Sopenharmony_ci			btrfs_release_path(p);
29008c2ecf20Sopenharmony_ci			goto again;
29018c2ecf20Sopenharmony_ci		}
29028c2ecf20Sopenharmony_ci
29038c2ecf20Sopenharmony_ci		unlock_up(p, level, lowest_unlock, min_write_lock_level,
29048c2ecf20Sopenharmony_ci			  &write_lock_level);
29058c2ecf20Sopenharmony_ci
29068c2ecf20Sopenharmony_ci		if (level == lowest_level) {
29078c2ecf20Sopenharmony_ci			if (dec)
29088c2ecf20Sopenharmony_ci				p->slots[level]++;
29098c2ecf20Sopenharmony_ci			goto done;
29108c2ecf20Sopenharmony_ci		}
29118c2ecf20Sopenharmony_ci
29128c2ecf20Sopenharmony_ci		err = read_block_for_search(root, p, &b, level, slot, key);
29138c2ecf20Sopenharmony_ci		if (err == -EAGAIN)
29148c2ecf20Sopenharmony_ci			goto again;
29158c2ecf20Sopenharmony_ci		if (err) {
29168c2ecf20Sopenharmony_ci			ret = err;
29178c2ecf20Sopenharmony_ci			goto done;
29188c2ecf20Sopenharmony_ci		}
29198c2ecf20Sopenharmony_ci
29208c2ecf20Sopenharmony_ci		if (!p->skip_locking) {
29218c2ecf20Sopenharmony_ci			level = btrfs_header_level(b);
29228c2ecf20Sopenharmony_ci			if (level <= write_lock_level) {
29238c2ecf20Sopenharmony_ci				if (!btrfs_try_tree_write_lock(b)) {
29248c2ecf20Sopenharmony_ci					btrfs_set_path_blocking(p);
29258c2ecf20Sopenharmony_ci					btrfs_tree_lock(b);
29268c2ecf20Sopenharmony_ci				}
29278c2ecf20Sopenharmony_ci				p->locks[level] = BTRFS_WRITE_LOCK;
29288c2ecf20Sopenharmony_ci			} else {
29298c2ecf20Sopenharmony_ci				if (!btrfs_tree_read_lock_atomic(b)) {
29308c2ecf20Sopenharmony_ci					btrfs_set_path_blocking(p);
29318c2ecf20Sopenharmony_ci					__btrfs_tree_read_lock(b, BTRFS_NESTING_NORMAL,
29328c2ecf20Sopenharmony_ci							       p->recurse);
29338c2ecf20Sopenharmony_ci				}
29348c2ecf20Sopenharmony_ci				p->locks[level] = BTRFS_READ_LOCK;
29358c2ecf20Sopenharmony_ci			}
29368c2ecf20Sopenharmony_ci			p->nodes[level] = b;
29378c2ecf20Sopenharmony_ci		}
29388c2ecf20Sopenharmony_ci	}
29398c2ecf20Sopenharmony_ci	ret = 1;
29408c2ecf20Sopenharmony_cidone:
29418c2ecf20Sopenharmony_ci	/*
29428c2ecf20Sopenharmony_ci	 * we don't really know what they plan on doing with the path
29438c2ecf20Sopenharmony_ci	 * from here on, so for now just mark it as blocking
29448c2ecf20Sopenharmony_ci	 */
29458c2ecf20Sopenharmony_ci	if (!p->leave_spinning)
29468c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(p);
29478c2ecf20Sopenharmony_ci	if (ret < 0 && !p->skip_release_on_error)
29488c2ecf20Sopenharmony_ci		btrfs_release_path(p);
29498c2ecf20Sopenharmony_ci	return ret;
29508c2ecf20Sopenharmony_ci}
29518c2ecf20Sopenharmony_ci
29528c2ecf20Sopenharmony_ci/*
29538c2ecf20Sopenharmony_ci * Like btrfs_search_slot, this looks for a key in the given tree. It uses the
29548c2ecf20Sopenharmony_ci * current state of the tree together with the operations recorded in the tree
29558c2ecf20Sopenharmony_ci * modification log to search for the key in a previous version of this tree, as
29568c2ecf20Sopenharmony_ci * denoted by the time_seq parameter.
29578c2ecf20Sopenharmony_ci *
29588c2ecf20Sopenharmony_ci * Naturally, there is no support for insert, delete or cow operations.
29598c2ecf20Sopenharmony_ci *
29608c2ecf20Sopenharmony_ci * The resulting path and return value will be set up as if we called
29618c2ecf20Sopenharmony_ci * btrfs_search_slot at that point in time with ins_len and cow both set to 0.
29628c2ecf20Sopenharmony_ci */
29638c2ecf20Sopenharmony_ciint btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
29648c2ecf20Sopenharmony_ci			  struct btrfs_path *p, u64 time_seq)
29658c2ecf20Sopenharmony_ci{
29668c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
29678c2ecf20Sopenharmony_ci	struct extent_buffer *b;
29688c2ecf20Sopenharmony_ci	int slot;
29698c2ecf20Sopenharmony_ci	int ret;
29708c2ecf20Sopenharmony_ci	int err;
29718c2ecf20Sopenharmony_ci	int level;
29728c2ecf20Sopenharmony_ci	int lowest_unlock = 1;
29738c2ecf20Sopenharmony_ci	u8 lowest_level = 0;
29748c2ecf20Sopenharmony_ci
29758c2ecf20Sopenharmony_ci	lowest_level = p->lowest_level;
29768c2ecf20Sopenharmony_ci	WARN_ON(p->nodes[0] != NULL);
29778c2ecf20Sopenharmony_ci
29788c2ecf20Sopenharmony_ci	if (p->search_commit_root) {
29798c2ecf20Sopenharmony_ci		BUG_ON(time_seq);
29808c2ecf20Sopenharmony_ci		return btrfs_search_slot(NULL, root, key, p, 0, 0);
29818c2ecf20Sopenharmony_ci	}
29828c2ecf20Sopenharmony_ci
29838c2ecf20Sopenharmony_ciagain:
29848c2ecf20Sopenharmony_ci	b = get_old_root(root, time_seq);
29858c2ecf20Sopenharmony_ci	if (!b) {
29868c2ecf20Sopenharmony_ci		ret = -EIO;
29878c2ecf20Sopenharmony_ci		goto done;
29888c2ecf20Sopenharmony_ci	}
29898c2ecf20Sopenharmony_ci	level = btrfs_header_level(b);
29908c2ecf20Sopenharmony_ci	p->locks[level] = BTRFS_READ_LOCK;
29918c2ecf20Sopenharmony_ci
29928c2ecf20Sopenharmony_ci	while (b) {
29938c2ecf20Sopenharmony_ci		int dec = 0;
29948c2ecf20Sopenharmony_ci
29958c2ecf20Sopenharmony_ci		level = btrfs_header_level(b);
29968c2ecf20Sopenharmony_ci		p->nodes[level] = b;
29978c2ecf20Sopenharmony_ci
29988c2ecf20Sopenharmony_ci		/*
29998c2ecf20Sopenharmony_ci		 * we have a lock on b and as long as we aren't changing
30008c2ecf20Sopenharmony_ci		 * the tree, there is no way to for the items in b to change.
30018c2ecf20Sopenharmony_ci		 * It is safe to drop the lock on our parent before we
30028c2ecf20Sopenharmony_ci		 * go through the expensive btree search on b.
30038c2ecf20Sopenharmony_ci		 */
30048c2ecf20Sopenharmony_ci		btrfs_unlock_up_safe(p, level + 1);
30058c2ecf20Sopenharmony_ci
30068c2ecf20Sopenharmony_ci		ret = btrfs_bin_search(b, key, &slot);
30078c2ecf20Sopenharmony_ci		if (ret < 0)
30088c2ecf20Sopenharmony_ci			goto done;
30098c2ecf20Sopenharmony_ci
30108c2ecf20Sopenharmony_ci		if (level == 0) {
30118c2ecf20Sopenharmony_ci			p->slots[level] = slot;
30128c2ecf20Sopenharmony_ci			unlock_up(p, level, lowest_unlock, 0, NULL);
30138c2ecf20Sopenharmony_ci			goto done;
30148c2ecf20Sopenharmony_ci		}
30158c2ecf20Sopenharmony_ci
30168c2ecf20Sopenharmony_ci		if (ret && slot > 0) {
30178c2ecf20Sopenharmony_ci			dec = 1;
30188c2ecf20Sopenharmony_ci			slot--;
30198c2ecf20Sopenharmony_ci		}
30208c2ecf20Sopenharmony_ci		p->slots[level] = slot;
30218c2ecf20Sopenharmony_ci		unlock_up(p, level, lowest_unlock, 0, NULL);
30228c2ecf20Sopenharmony_ci
30238c2ecf20Sopenharmony_ci		if (level == lowest_level) {
30248c2ecf20Sopenharmony_ci			if (dec)
30258c2ecf20Sopenharmony_ci				p->slots[level]++;
30268c2ecf20Sopenharmony_ci			goto done;
30278c2ecf20Sopenharmony_ci		}
30288c2ecf20Sopenharmony_ci
30298c2ecf20Sopenharmony_ci		err = read_block_for_search(root, p, &b, level, slot, key);
30308c2ecf20Sopenharmony_ci		if (err == -EAGAIN)
30318c2ecf20Sopenharmony_ci			goto again;
30328c2ecf20Sopenharmony_ci		if (err) {
30338c2ecf20Sopenharmony_ci			ret = err;
30348c2ecf20Sopenharmony_ci			goto done;
30358c2ecf20Sopenharmony_ci		}
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci		level = btrfs_header_level(b);
30388c2ecf20Sopenharmony_ci		if (!btrfs_tree_read_lock_atomic(b)) {
30398c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(p);
30408c2ecf20Sopenharmony_ci			btrfs_tree_read_lock(b);
30418c2ecf20Sopenharmony_ci		}
30428c2ecf20Sopenharmony_ci		b = tree_mod_log_rewind(fs_info, p, b, time_seq);
30438c2ecf20Sopenharmony_ci		if (!b) {
30448c2ecf20Sopenharmony_ci			ret = -ENOMEM;
30458c2ecf20Sopenharmony_ci			goto done;
30468c2ecf20Sopenharmony_ci		}
30478c2ecf20Sopenharmony_ci		p->locks[level] = BTRFS_READ_LOCK;
30488c2ecf20Sopenharmony_ci		p->nodes[level] = b;
30498c2ecf20Sopenharmony_ci	}
30508c2ecf20Sopenharmony_ci	ret = 1;
30518c2ecf20Sopenharmony_cidone:
30528c2ecf20Sopenharmony_ci	if (!p->leave_spinning)
30538c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(p);
30548c2ecf20Sopenharmony_ci	if (ret < 0)
30558c2ecf20Sopenharmony_ci		btrfs_release_path(p);
30568c2ecf20Sopenharmony_ci
30578c2ecf20Sopenharmony_ci	return ret;
30588c2ecf20Sopenharmony_ci}
30598c2ecf20Sopenharmony_ci
30608c2ecf20Sopenharmony_ci/*
30618c2ecf20Sopenharmony_ci * helper to use instead of search slot if no exact match is needed but
30628c2ecf20Sopenharmony_ci * instead the next or previous item should be returned.
30638c2ecf20Sopenharmony_ci * When find_higher is true, the next higher item is returned, the next lower
30648c2ecf20Sopenharmony_ci * otherwise.
30658c2ecf20Sopenharmony_ci * When return_any and find_higher are both true, and no higher item is found,
30668c2ecf20Sopenharmony_ci * return the next lower instead.
30678c2ecf20Sopenharmony_ci * When return_any is true and find_higher is false, and no lower item is found,
30688c2ecf20Sopenharmony_ci * return the next higher instead.
30698c2ecf20Sopenharmony_ci * It returns 0 if any item is found, 1 if none is found (tree empty), and
30708c2ecf20Sopenharmony_ci * < 0 on error
30718c2ecf20Sopenharmony_ci */
30728c2ecf20Sopenharmony_ciint btrfs_search_slot_for_read(struct btrfs_root *root,
30738c2ecf20Sopenharmony_ci			       const struct btrfs_key *key,
30748c2ecf20Sopenharmony_ci			       struct btrfs_path *p, int find_higher,
30758c2ecf20Sopenharmony_ci			       int return_any)
30768c2ecf20Sopenharmony_ci{
30778c2ecf20Sopenharmony_ci	int ret;
30788c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
30798c2ecf20Sopenharmony_ci
30808c2ecf20Sopenharmony_ciagain:
30818c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
30828c2ecf20Sopenharmony_ci	if (ret <= 0)
30838c2ecf20Sopenharmony_ci		return ret;
30848c2ecf20Sopenharmony_ci	/*
30858c2ecf20Sopenharmony_ci	 * a return value of 1 means the path is at the position where the
30868c2ecf20Sopenharmony_ci	 * item should be inserted. Normally this is the next bigger item,
30878c2ecf20Sopenharmony_ci	 * but in case the previous item is the last in a leaf, path points
30888c2ecf20Sopenharmony_ci	 * to the first free slot in the previous leaf, i.e. at an invalid
30898c2ecf20Sopenharmony_ci	 * item.
30908c2ecf20Sopenharmony_ci	 */
30918c2ecf20Sopenharmony_ci	leaf = p->nodes[0];
30928c2ecf20Sopenharmony_ci
30938c2ecf20Sopenharmony_ci	if (find_higher) {
30948c2ecf20Sopenharmony_ci		if (p->slots[0] >= btrfs_header_nritems(leaf)) {
30958c2ecf20Sopenharmony_ci			ret = btrfs_next_leaf(root, p);
30968c2ecf20Sopenharmony_ci			if (ret <= 0)
30978c2ecf20Sopenharmony_ci				return ret;
30988c2ecf20Sopenharmony_ci			if (!return_any)
30998c2ecf20Sopenharmony_ci				return 1;
31008c2ecf20Sopenharmony_ci			/*
31018c2ecf20Sopenharmony_ci			 * no higher item found, return the next
31028c2ecf20Sopenharmony_ci			 * lower instead
31038c2ecf20Sopenharmony_ci			 */
31048c2ecf20Sopenharmony_ci			return_any = 0;
31058c2ecf20Sopenharmony_ci			find_higher = 0;
31068c2ecf20Sopenharmony_ci			btrfs_release_path(p);
31078c2ecf20Sopenharmony_ci			goto again;
31088c2ecf20Sopenharmony_ci		}
31098c2ecf20Sopenharmony_ci	} else {
31108c2ecf20Sopenharmony_ci		if (p->slots[0] == 0) {
31118c2ecf20Sopenharmony_ci			ret = btrfs_prev_leaf(root, p);
31128c2ecf20Sopenharmony_ci			if (ret < 0)
31138c2ecf20Sopenharmony_ci				return ret;
31148c2ecf20Sopenharmony_ci			if (!ret) {
31158c2ecf20Sopenharmony_ci				leaf = p->nodes[0];
31168c2ecf20Sopenharmony_ci				if (p->slots[0] == btrfs_header_nritems(leaf))
31178c2ecf20Sopenharmony_ci					p->slots[0]--;
31188c2ecf20Sopenharmony_ci				return 0;
31198c2ecf20Sopenharmony_ci			}
31208c2ecf20Sopenharmony_ci			if (!return_any)
31218c2ecf20Sopenharmony_ci				return 1;
31228c2ecf20Sopenharmony_ci			/*
31238c2ecf20Sopenharmony_ci			 * no lower item found, return the next
31248c2ecf20Sopenharmony_ci			 * higher instead
31258c2ecf20Sopenharmony_ci			 */
31268c2ecf20Sopenharmony_ci			return_any = 0;
31278c2ecf20Sopenharmony_ci			find_higher = 1;
31288c2ecf20Sopenharmony_ci			btrfs_release_path(p);
31298c2ecf20Sopenharmony_ci			goto again;
31308c2ecf20Sopenharmony_ci		} else {
31318c2ecf20Sopenharmony_ci			--p->slots[0];
31328c2ecf20Sopenharmony_ci		}
31338c2ecf20Sopenharmony_ci	}
31348c2ecf20Sopenharmony_ci	return 0;
31358c2ecf20Sopenharmony_ci}
31368c2ecf20Sopenharmony_ci
31378c2ecf20Sopenharmony_ci/*
31388c2ecf20Sopenharmony_ci * adjust the pointers going up the tree, starting at level
31398c2ecf20Sopenharmony_ci * making sure the right key of each node is points to 'key'.
31408c2ecf20Sopenharmony_ci * This is used after shifting pointers to the left, so it stops
31418c2ecf20Sopenharmony_ci * fixing up pointers when a given leaf/node is not in slot 0 of the
31428c2ecf20Sopenharmony_ci * higher levels
31438c2ecf20Sopenharmony_ci *
31448c2ecf20Sopenharmony_ci */
31458c2ecf20Sopenharmony_cistatic void fixup_low_keys(struct btrfs_path *path,
31468c2ecf20Sopenharmony_ci			   struct btrfs_disk_key *key, int level)
31478c2ecf20Sopenharmony_ci{
31488c2ecf20Sopenharmony_ci	int i;
31498c2ecf20Sopenharmony_ci	struct extent_buffer *t;
31508c2ecf20Sopenharmony_ci	int ret;
31518c2ecf20Sopenharmony_ci
31528c2ecf20Sopenharmony_ci	for (i = level; i < BTRFS_MAX_LEVEL; i++) {
31538c2ecf20Sopenharmony_ci		int tslot = path->slots[i];
31548c2ecf20Sopenharmony_ci
31558c2ecf20Sopenharmony_ci		if (!path->nodes[i])
31568c2ecf20Sopenharmony_ci			break;
31578c2ecf20Sopenharmony_ci		t = path->nodes[i];
31588c2ecf20Sopenharmony_ci		ret = tree_mod_log_insert_key(t, tslot, MOD_LOG_KEY_REPLACE,
31598c2ecf20Sopenharmony_ci				GFP_ATOMIC);
31608c2ecf20Sopenharmony_ci		BUG_ON(ret < 0);
31618c2ecf20Sopenharmony_ci		btrfs_set_node_key(t, key, tslot);
31628c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(path->nodes[i]);
31638c2ecf20Sopenharmony_ci		if (tslot != 0)
31648c2ecf20Sopenharmony_ci			break;
31658c2ecf20Sopenharmony_ci	}
31668c2ecf20Sopenharmony_ci}
31678c2ecf20Sopenharmony_ci
31688c2ecf20Sopenharmony_ci/*
31698c2ecf20Sopenharmony_ci * update item key.
31708c2ecf20Sopenharmony_ci *
31718c2ecf20Sopenharmony_ci * This function isn't completely safe. It's the caller's responsibility
31728c2ecf20Sopenharmony_ci * that the new key won't break the order
31738c2ecf20Sopenharmony_ci */
31748c2ecf20Sopenharmony_civoid btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
31758c2ecf20Sopenharmony_ci			     struct btrfs_path *path,
31768c2ecf20Sopenharmony_ci			     const struct btrfs_key *new_key)
31778c2ecf20Sopenharmony_ci{
31788c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
31798c2ecf20Sopenharmony_ci	struct extent_buffer *eb;
31808c2ecf20Sopenharmony_ci	int slot;
31818c2ecf20Sopenharmony_ci
31828c2ecf20Sopenharmony_ci	eb = path->nodes[0];
31838c2ecf20Sopenharmony_ci	slot = path->slots[0];
31848c2ecf20Sopenharmony_ci	if (slot > 0) {
31858c2ecf20Sopenharmony_ci		btrfs_item_key(eb, &disk_key, slot - 1);
31868c2ecf20Sopenharmony_ci		if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
31878c2ecf20Sopenharmony_ci			btrfs_crit(fs_info,
31888c2ecf20Sopenharmony_ci		"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
31898c2ecf20Sopenharmony_ci				   slot, btrfs_disk_key_objectid(&disk_key),
31908c2ecf20Sopenharmony_ci				   btrfs_disk_key_type(&disk_key),
31918c2ecf20Sopenharmony_ci				   btrfs_disk_key_offset(&disk_key),
31928c2ecf20Sopenharmony_ci				   new_key->objectid, new_key->type,
31938c2ecf20Sopenharmony_ci				   new_key->offset);
31948c2ecf20Sopenharmony_ci			btrfs_print_leaf(eb);
31958c2ecf20Sopenharmony_ci			BUG();
31968c2ecf20Sopenharmony_ci		}
31978c2ecf20Sopenharmony_ci	}
31988c2ecf20Sopenharmony_ci	if (slot < btrfs_header_nritems(eb) - 1) {
31998c2ecf20Sopenharmony_ci		btrfs_item_key(eb, &disk_key, slot + 1);
32008c2ecf20Sopenharmony_ci		if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
32018c2ecf20Sopenharmony_ci			btrfs_crit(fs_info,
32028c2ecf20Sopenharmony_ci		"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
32038c2ecf20Sopenharmony_ci				   slot, btrfs_disk_key_objectid(&disk_key),
32048c2ecf20Sopenharmony_ci				   btrfs_disk_key_type(&disk_key),
32058c2ecf20Sopenharmony_ci				   btrfs_disk_key_offset(&disk_key),
32068c2ecf20Sopenharmony_ci				   new_key->objectid, new_key->type,
32078c2ecf20Sopenharmony_ci				   new_key->offset);
32088c2ecf20Sopenharmony_ci			btrfs_print_leaf(eb);
32098c2ecf20Sopenharmony_ci			BUG();
32108c2ecf20Sopenharmony_ci		}
32118c2ecf20Sopenharmony_ci	}
32128c2ecf20Sopenharmony_ci
32138c2ecf20Sopenharmony_ci	btrfs_cpu_key_to_disk(&disk_key, new_key);
32148c2ecf20Sopenharmony_ci	btrfs_set_item_key(eb, &disk_key, slot);
32158c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(eb);
32168c2ecf20Sopenharmony_ci	if (slot == 0)
32178c2ecf20Sopenharmony_ci		fixup_low_keys(path, &disk_key, 1);
32188c2ecf20Sopenharmony_ci}
32198c2ecf20Sopenharmony_ci
32208c2ecf20Sopenharmony_ci/*
32218c2ecf20Sopenharmony_ci * Check key order of two sibling extent buffers.
32228c2ecf20Sopenharmony_ci *
32238c2ecf20Sopenharmony_ci * Return true if something is wrong.
32248c2ecf20Sopenharmony_ci * Return false if everything is fine.
32258c2ecf20Sopenharmony_ci *
32268c2ecf20Sopenharmony_ci * Tree-checker only works inside one tree block, thus the following
32278c2ecf20Sopenharmony_ci * corruption can not be detected by tree-checker:
32288c2ecf20Sopenharmony_ci *
32298c2ecf20Sopenharmony_ci * Leaf @left			| Leaf @right
32308c2ecf20Sopenharmony_ci * --------------------------------------------------------------
32318c2ecf20Sopenharmony_ci * | 1 | 2 | 3 | 4 | 5 | f6 |   | 7 | 8 |
32328c2ecf20Sopenharmony_ci *
32338c2ecf20Sopenharmony_ci * Key f6 in leaf @left itself is valid, but not valid when the next
32348c2ecf20Sopenharmony_ci * key in leaf @right is 7.
32358c2ecf20Sopenharmony_ci * This can only be checked at tree block merge time.
32368c2ecf20Sopenharmony_ci * And since tree checker has ensured all key order in each tree block
32378c2ecf20Sopenharmony_ci * is correct, we only need to bother the last key of @left and the first
32388c2ecf20Sopenharmony_ci * key of @right.
32398c2ecf20Sopenharmony_ci */
32408c2ecf20Sopenharmony_cistatic bool check_sibling_keys(struct extent_buffer *left,
32418c2ecf20Sopenharmony_ci			       struct extent_buffer *right)
32428c2ecf20Sopenharmony_ci{
32438c2ecf20Sopenharmony_ci	struct btrfs_key left_last;
32448c2ecf20Sopenharmony_ci	struct btrfs_key right_first;
32458c2ecf20Sopenharmony_ci	int level = btrfs_header_level(left);
32468c2ecf20Sopenharmony_ci	int nr_left = btrfs_header_nritems(left);
32478c2ecf20Sopenharmony_ci	int nr_right = btrfs_header_nritems(right);
32488c2ecf20Sopenharmony_ci
32498c2ecf20Sopenharmony_ci	/* No key to check in one of the tree blocks */
32508c2ecf20Sopenharmony_ci	if (!nr_left || !nr_right)
32518c2ecf20Sopenharmony_ci		return false;
32528c2ecf20Sopenharmony_ci
32538c2ecf20Sopenharmony_ci	if (level) {
32548c2ecf20Sopenharmony_ci		btrfs_node_key_to_cpu(left, &left_last, nr_left - 1);
32558c2ecf20Sopenharmony_ci		btrfs_node_key_to_cpu(right, &right_first, 0);
32568c2ecf20Sopenharmony_ci	} else {
32578c2ecf20Sopenharmony_ci		btrfs_item_key_to_cpu(left, &left_last, nr_left - 1);
32588c2ecf20Sopenharmony_ci		btrfs_item_key_to_cpu(right, &right_first, 0);
32598c2ecf20Sopenharmony_ci	}
32608c2ecf20Sopenharmony_ci
32618c2ecf20Sopenharmony_ci	if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
32628c2ecf20Sopenharmony_ci		btrfs_crit(left->fs_info,
32638c2ecf20Sopenharmony_ci"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)",
32648c2ecf20Sopenharmony_ci			   left_last.objectid, left_last.type,
32658c2ecf20Sopenharmony_ci			   left_last.offset, right_first.objectid,
32668c2ecf20Sopenharmony_ci			   right_first.type, right_first.offset);
32678c2ecf20Sopenharmony_ci		return true;
32688c2ecf20Sopenharmony_ci	}
32698c2ecf20Sopenharmony_ci	return false;
32708c2ecf20Sopenharmony_ci}
32718c2ecf20Sopenharmony_ci
32728c2ecf20Sopenharmony_ci/*
32738c2ecf20Sopenharmony_ci * try to push data from one node into the next node left in the
32748c2ecf20Sopenharmony_ci * tree.
32758c2ecf20Sopenharmony_ci *
32768c2ecf20Sopenharmony_ci * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
32778c2ecf20Sopenharmony_ci * error, and > 0 if there was no room in the left hand block.
32788c2ecf20Sopenharmony_ci */
32798c2ecf20Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans,
32808c2ecf20Sopenharmony_ci			  struct extent_buffer *dst,
32818c2ecf20Sopenharmony_ci			  struct extent_buffer *src, int empty)
32828c2ecf20Sopenharmony_ci{
32838c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
32848c2ecf20Sopenharmony_ci	int push_items = 0;
32858c2ecf20Sopenharmony_ci	int src_nritems;
32868c2ecf20Sopenharmony_ci	int dst_nritems;
32878c2ecf20Sopenharmony_ci	int ret = 0;
32888c2ecf20Sopenharmony_ci
32898c2ecf20Sopenharmony_ci	src_nritems = btrfs_header_nritems(src);
32908c2ecf20Sopenharmony_ci	dst_nritems = btrfs_header_nritems(dst);
32918c2ecf20Sopenharmony_ci	push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
32928c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(src) != trans->transid);
32938c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(dst) != trans->transid);
32948c2ecf20Sopenharmony_ci
32958c2ecf20Sopenharmony_ci	if (!empty && src_nritems <= 8)
32968c2ecf20Sopenharmony_ci		return 1;
32978c2ecf20Sopenharmony_ci
32988c2ecf20Sopenharmony_ci	if (push_items <= 0)
32998c2ecf20Sopenharmony_ci		return 1;
33008c2ecf20Sopenharmony_ci
33018c2ecf20Sopenharmony_ci	if (empty) {
33028c2ecf20Sopenharmony_ci		push_items = min(src_nritems, push_items);
33038c2ecf20Sopenharmony_ci		if (push_items < src_nritems) {
33048c2ecf20Sopenharmony_ci			/* leave at least 8 pointers in the node if
33058c2ecf20Sopenharmony_ci			 * we aren't going to empty it
33068c2ecf20Sopenharmony_ci			 */
33078c2ecf20Sopenharmony_ci			if (src_nritems - push_items < 8) {
33088c2ecf20Sopenharmony_ci				if (push_items <= 8)
33098c2ecf20Sopenharmony_ci					return 1;
33108c2ecf20Sopenharmony_ci				push_items -= 8;
33118c2ecf20Sopenharmony_ci			}
33128c2ecf20Sopenharmony_ci		}
33138c2ecf20Sopenharmony_ci	} else
33148c2ecf20Sopenharmony_ci		push_items = min(src_nritems - 8, push_items);
33158c2ecf20Sopenharmony_ci
33168c2ecf20Sopenharmony_ci	/* dst is the left eb, src is the middle eb */
33178c2ecf20Sopenharmony_ci	if (check_sibling_keys(dst, src)) {
33188c2ecf20Sopenharmony_ci		ret = -EUCLEAN;
33198c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
33208c2ecf20Sopenharmony_ci		return ret;
33218c2ecf20Sopenharmony_ci	}
33228c2ecf20Sopenharmony_ci	ret = tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items);
33238c2ecf20Sopenharmony_ci	if (ret) {
33248c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
33258c2ecf20Sopenharmony_ci		return ret;
33268c2ecf20Sopenharmony_ci	}
33278c2ecf20Sopenharmony_ci	copy_extent_buffer(dst, src,
33288c2ecf20Sopenharmony_ci			   btrfs_node_key_ptr_offset(dst_nritems),
33298c2ecf20Sopenharmony_ci			   btrfs_node_key_ptr_offset(0),
33308c2ecf20Sopenharmony_ci			   push_items * sizeof(struct btrfs_key_ptr));
33318c2ecf20Sopenharmony_ci
33328c2ecf20Sopenharmony_ci	if (push_items < src_nritems) {
33338c2ecf20Sopenharmony_ci		/*
33348c2ecf20Sopenharmony_ci		 * Don't call tree_mod_log_insert_move here, key removal was
33358c2ecf20Sopenharmony_ci		 * already fully logged by tree_mod_log_eb_copy above.
33368c2ecf20Sopenharmony_ci		 */
33378c2ecf20Sopenharmony_ci		memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
33388c2ecf20Sopenharmony_ci				      btrfs_node_key_ptr_offset(push_items),
33398c2ecf20Sopenharmony_ci				      (src_nritems - push_items) *
33408c2ecf20Sopenharmony_ci				      sizeof(struct btrfs_key_ptr));
33418c2ecf20Sopenharmony_ci	}
33428c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(src, src_nritems - push_items);
33438c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(dst, dst_nritems + push_items);
33448c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(src);
33458c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(dst);
33468c2ecf20Sopenharmony_ci
33478c2ecf20Sopenharmony_ci	return ret;
33488c2ecf20Sopenharmony_ci}
33498c2ecf20Sopenharmony_ci
33508c2ecf20Sopenharmony_ci/*
33518c2ecf20Sopenharmony_ci * try to push data from one node into the next node right in the
33528c2ecf20Sopenharmony_ci * tree.
33538c2ecf20Sopenharmony_ci *
33548c2ecf20Sopenharmony_ci * returns 0 if some ptrs were pushed, < 0 if there was some horrible
33558c2ecf20Sopenharmony_ci * error, and > 0 if there was no room in the right hand block.
33568c2ecf20Sopenharmony_ci *
33578c2ecf20Sopenharmony_ci * this will  only push up to 1/2 the contents of the left node over
33588c2ecf20Sopenharmony_ci */
33598c2ecf20Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans,
33608c2ecf20Sopenharmony_ci			      struct extent_buffer *dst,
33618c2ecf20Sopenharmony_ci			      struct extent_buffer *src)
33628c2ecf20Sopenharmony_ci{
33638c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
33648c2ecf20Sopenharmony_ci	int push_items = 0;
33658c2ecf20Sopenharmony_ci	int max_push;
33668c2ecf20Sopenharmony_ci	int src_nritems;
33678c2ecf20Sopenharmony_ci	int dst_nritems;
33688c2ecf20Sopenharmony_ci	int ret = 0;
33698c2ecf20Sopenharmony_ci
33708c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(src) != trans->transid);
33718c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(dst) != trans->transid);
33728c2ecf20Sopenharmony_ci
33738c2ecf20Sopenharmony_ci	src_nritems = btrfs_header_nritems(src);
33748c2ecf20Sopenharmony_ci	dst_nritems = btrfs_header_nritems(dst);
33758c2ecf20Sopenharmony_ci	push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems;
33768c2ecf20Sopenharmony_ci	if (push_items <= 0)
33778c2ecf20Sopenharmony_ci		return 1;
33788c2ecf20Sopenharmony_ci
33798c2ecf20Sopenharmony_ci	if (src_nritems < 4)
33808c2ecf20Sopenharmony_ci		return 1;
33818c2ecf20Sopenharmony_ci
33828c2ecf20Sopenharmony_ci	max_push = src_nritems / 2 + 1;
33838c2ecf20Sopenharmony_ci	/* don't try to empty the node */
33848c2ecf20Sopenharmony_ci	if (max_push >= src_nritems)
33858c2ecf20Sopenharmony_ci		return 1;
33868c2ecf20Sopenharmony_ci
33878c2ecf20Sopenharmony_ci	if (max_push < push_items)
33888c2ecf20Sopenharmony_ci		push_items = max_push;
33898c2ecf20Sopenharmony_ci
33908c2ecf20Sopenharmony_ci	/* dst is the right eb, src is the middle eb */
33918c2ecf20Sopenharmony_ci	if (check_sibling_keys(src, dst)) {
33928c2ecf20Sopenharmony_ci		ret = -EUCLEAN;
33938c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
33948c2ecf20Sopenharmony_ci		return ret;
33958c2ecf20Sopenharmony_ci	}
33968c2ecf20Sopenharmony_ci	ret = tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
33978c2ecf20Sopenharmony_ci	BUG_ON(ret < 0);
33988c2ecf20Sopenharmony_ci	memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
33998c2ecf20Sopenharmony_ci				      btrfs_node_key_ptr_offset(0),
34008c2ecf20Sopenharmony_ci				      (dst_nritems) *
34018c2ecf20Sopenharmony_ci				      sizeof(struct btrfs_key_ptr));
34028c2ecf20Sopenharmony_ci
34038c2ecf20Sopenharmony_ci	ret = tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items,
34048c2ecf20Sopenharmony_ci				   push_items);
34058c2ecf20Sopenharmony_ci	if (ret) {
34068c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
34078c2ecf20Sopenharmony_ci		return ret;
34088c2ecf20Sopenharmony_ci	}
34098c2ecf20Sopenharmony_ci	copy_extent_buffer(dst, src,
34108c2ecf20Sopenharmony_ci			   btrfs_node_key_ptr_offset(0),
34118c2ecf20Sopenharmony_ci			   btrfs_node_key_ptr_offset(src_nritems - push_items),
34128c2ecf20Sopenharmony_ci			   push_items * sizeof(struct btrfs_key_ptr));
34138c2ecf20Sopenharmony_ci
34148c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(src, src_nritems - push_items);
34158c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(dst, dst_nritems + push_items);
34168c2ecf20Sopenharmony_ci
34178c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(src);
34188c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(dst);
34198c2ecf20Sopenharmony_ci
34208c2ecf20Sopenharmony_ci	return ret;
34218c2ecf20Sopenharmony_ci}
34228c2ecf20Sopenharmony_ci
34238c2ecf20Sopenharmony_ci/*
34248c2ecf20Sopenharmony_ci * helper function to insert a new root level in the tree.
34258c2ecf20Sopenharmony_ci * A new node is allocated, and a single item is inserted to
34268c2ecf20Sopenharmony_ci * point to the existing root
34278c2ecf20Sopenharmony_ci *
34288c2ecf20Sopenharmony_ci * returns zero on success or < 0 on failure.
34298c2ecf20Sopenharmony_ci */
34308c2ecf20Sopenharmony_cistatic noinline int insert_new_root(struct btrfs_trans_handle *trans,
34318c2ecf20Sopenharmony_ci			   struct btrfs_root *root,
34328c2ecf20Sopenharmony_ci			   struct btrfs_path *path, int level)
34338c2ecf20Sopenharmony_ci{
34348c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
34358c2ecf20Sopenharmony_ci	u64 lower_gen;
34368c2ecf20Sopenharmony_ci	struct extent_buffer *lower;
34378c2ecf20Sopenharmony_ci	struct extent_buffer *c;
34388c2ecf20Sopenharmony_ci	struct extent_buffer *old;
34398c2ecf20Sopenharmony_ci	struct btrfs_disk_key lower_key;
34408c2ecf20Sopenharmony_ci	int ret;
34418c2ecf20Sopenharmony_ci
34428c2ecf20Sopenharmony_ci	BUG_ON(path->nodes[level]);
34438c2ecf20Sopenharmony_ci	BUG_ON(path->nodes[level-1] != root->node);
34448c2ecf20Sopenharmony_ci
34458c2ecf20Sopenharmony_ci	lower = path->nodes[level-1];
34468c2ecf20Sopenharmony_ci	if (level == 1)
34478c2ecf20Sopenharmony_ci		btrfs_item_key(lower, &lower_key, 0);
34488c2ecf20Sopenharmony_ci	else
34498c2ecf20Sopenharmony_ci		btrfs_node_key(lower, &lower_key, 0);
34508c2ecf20Sopenharmony_ci
34518c2ecf20Sopenharmony_ci	c = alloc_tree_block_no_bg_flush(trans, root, 0, &lower_key, level,
34528c2ecf20Sopenharmony_ci					 root->node->start, 0,
34538c2ecf20Sopenharmony_ci					 BTRFS_NESTING_NEW_ROOT);
34548c2ecf20Sopenharmony_ci	if (IS_ERR(c))
34558c2ecf20Sopenharmony_ci		return PTR_ERR(c);
34568c2ecf20Sopenharmony_ci
34578c2ecf20Sopenharmony_ci	root_add_used(root, fs_info->nodesize);
34588c2ecf20Sopenharmony_ci
34598c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(c, 1);
34608c2ecf20Sopenharmony_ci	btrfs_set_node_key(c, &lower_key, 0);
34618c2ecf20Sopenharmony_ci	btrfs_set_node_blockptr(c, 0, lower->start);
34628c2ecf20Sopenharmony_ci	lower_gen = btrfs_header_generation(lower);
34638c2ecf20Sopenharmony_ci	WARN_ON(lower_gen != trans->transid);
34648c2ecf20Sopenharmony_ci
34658c2ecf20Sopenharmony_ci	btrfs_set_node_ptr_generation(c, 0, lower_gen);
34668c2ecf20Sopenharmony_ci
34678c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(c);
34688c2ecf20Sopenharmony_ci
34698c2ecf20Sopenharmony_ci	old = root->node;
34708c2ecf20Sopenharmony_ci	ret = tree_mod_log_insert_root(root->node, c, 0);
34718c2ecf20Sopenharmony_ci	BUG_ON(ret < 0);
34728c2ecf20Sopenharmony_ci	rcu_assign_pointer(root->node, c);
34738c2ecf20Sopenharmony_ci
34748c2ecf20Sopenharmony_ci	/* the super has an extra ref to root->node */
34758c2ecf20Sopenharmony_ci	free_extent_buffer(old);
34768c2ecf20Sopenharmony_ci
34778c2ecf20Sopenharmony_ci	add_root_to_dirty_list(root);
34788c2ecf20Sopenharmony_ci	atomic_inc(&c->refs);
34798c2ecf20Sopenharmony_ci	path->nodes[level] = c;
34808c2ecf20Sopenharmony_ci	path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
34818c2ecf20Sopenharmony_ci	path->slots[level] = 0;
34828c2ecf20Sopenharmony_ci	return 0;
34838c2ecf20Sopenharmony_ci}
34848c2ecf20Sopenharmony_ci
34858c2ecf20Sopenharmony_ci/*
34868c2ecf20Sopenharmony_ci * worker function to insert a single pointer in a node.
34878c2ecf20Sopenharmony_ci * the node should have enough room for the pointer already
34888c2ecf20Sopenharmony_ci *
34898c2ecf20Sopenharmony_ci * slot and level indicate where you want the key to go, and
34908c2ecf20Sopenharmony_ci * blocknr is the block the key points to.
34918c2ecf20Sopenharmony_ci */
34928c2ecf20Sopenharmony_cistatic void insert_ptr(struct btrfs_trans_handle *trans,
34938c2ecf20Sopenharmony_ci		       struct btrfs_path *path,
34948c2ecf20Sopenharmony_ci		       struct btrfs_disk_key *key, u64 bytenr,
34958c2ecf20Sopenharmony_ci		       int slot, int level)
34968c2ecf20Sopenharmony_ci{
34978c2ecf20Sopenharmony_ci	struct extent_buffer *lower;
34988c2ecf20Sopenharmony_ci	int nritems;
34998c2ecf20Sopenharmony_ci	int ret;
35008c2ecf20Sopenharmony_ci
35018c2ecf20Sopenharmony_ci	BUG_ON(!path->nodes[level]);
35028c2ecf20Sopenharmony_ci	btrfs_assert_tree_locked(path->nodes[level]);
35038c2ecf20Sopenharmony_ci	lower = path->nodes[level];
35048c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(lower);
35058c2ecf20Sopenharmony_ci	BUG_ON(slot > nritems);
35068c2ecf20Sopenharmony_ci	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info));
35078c2ecf20Sopenharmony_ci	if (slot != nritems) {
35088c2ecf20Sopenharmony_ci		if (level) {
35098c2ecf20Sopenharmony_ci			ret = tree_mod_log_insert_move(lower, slot + 1, slot,
35108c2ecf20Sopenharmony_ci					nritems - slot);
35118c2ecf20Sopenharmony_ci			BUG_ON(ret < 0);
35128c2ecf20Sopenharmony_ci		}
35138c2ecf20Sopenharmony_ci		memmove_extent_buffer(lower,
35148c2ecf20Sopenharmony_ci			      btrfs_node_key_ptr_offset(slot + 1),
35158c2ecf20Sopenharmony_ci			      btrfs_node_key_ptr_offset(slot),
35168c2ecf20Sopenharmony_ci			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
35178c2ecf20Sopenharmony_ci	}
35188c2ecf20Sopenharmony_ci	if (level) {
35198c2ecf20Sopenharmony_ci		ret = tree_mod_log_insert_key(lower, slot, MOD_LOG_KEY_ADD,
35208c2ecf20Sopenharmony_ci				GFP_NOFS);
35218c2ecf20Sopenharmony_ci		BUG_ON(ret < 0);
35228c2ecf20Sopenharmony_ci	}
35238c2ecf20Sopenharmony_ci	btrfs_set_node_key(lower, key, slot);
35248c2ecf20Sopenharmony_ci	btrfs_set_node_blockptr(lower, slot, bytenr);
35258c2ecf20Sopenharmony_ci	WARN_ON(trans->transid == 0);
35268c2ecf20Sopenharmony_ci	btrfs_set_node_ptr_generation(lower, slot, trans->transid);
35278c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(lower, nritems + 1);
35288c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(lower);
35298c2ecf20Sopenharmony_ci}
35308c2ecf20Sopenharmony_ci
35318c2ecf20Sopenharmony_ci/*
35328c2ecf20Sopenharmony_ci * split the node at the specified level in path in two.
35338c2ecf20Sopenharmony_ci * The path is corrected to point to the appropriate node after the split
35348c2ecf20Sopenharmony_ci *
35358c2ecf20Sopenharmony_ci * Before splitting this tries to make some room in the node by pushing
35368c2ecf20Sopenharmony_ci * left and right, if either one works, it returns right away.
35378c2ecf20Sopenharmony_ci *
35388c2ecf20Sopenharmony_ci * returns 0 on success and < 0 on failure
35398c2ecf20Sopenharmony_ci */
35408c2ecf20Sopenharmony_cistatic noinline int split_node(struct btrfs_trans_handle *trans,
35418c2ecf20Sopenharmony_ci			       struct btrfs_root *root,
35428c2ecf20Sopenharmony_ci			       struct btrfs_path *path, int level)
35438c2ecf20Sopenharmony_ci{
35448c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
35458c2ecf20Sopenharmony_ci	struct extent_buffer *c;
35468c2ecf20Sopenharmony_ci	struct extent_buffer *split;
35478c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
35488c2ecf20Sopenharmony_ci	int mid;
35498c2ecf20Sopenharmony_ci	int ret;
35508c2ecf20Sopenharmony_ci	u32 c_nritems;
35518c2ecf20Sopenharmony_ci
35528c2ecf20Sopenharmony_ci	c = path->nodes[level];
35538c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(c) != trans->transid);
35548c2ecf20Sopenharmony_ci	if (c == root->node) {
35558c2ecf20Sopenharmony_ci		/*
35568c2ecf20Sopenharmony_ci		 * trying to split the root, lets make a new one
35578c2ecf20Sopenharmony_ci		 *
35588c2ecf20Sopenharmony_ci		 * tree mod log: We don't log_removal old root in
35598c2ecf20Sopenharmony_ci		 * insert_new_root, because that root buffer will be kept as a
35608c2ecf20Sopenharmony_ci		 * normal node. We are going to log removal of half of the
35618c2ecf20Sopenharmony_ci		 * elements below with tree_mod_log_eb_copy. We're holding a
35628c2ecf20Sopenharmony_ci		 * tree lock on the buffer, which is why we cannot race with
35638c2ecf20Sopenharmony_ci		 * other tree_mod_log users.
35648c2ecf20Sopenharmony_ci		 */
35658c2ecf20Sopenharmony_ci		ret = insert_new_root(trans, root, path, level + 1);
35668c2ecf20Sopenharmony_ci		if (ret)
35678c2ecf20Sopenharmony_ci			return ret;
35688c2ecf20Sopenharmony_ci	} else {
35698c2ecf20Sopenharmony_ci		ret = push_nodes_for_insert(trans, root, path, level);
35708c2ecf20Sopenharmony_ci		c = path->nodes[level];
35718c2ecf20Sopenharmony_ci		if (!ret && btrfs_header_nritems(c) <
35728c2ecf20Sopenharmony_ci		    BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3)
35738c2ecf20Sopenharmony_ci			return 0;
35748c2ecf20Sopenharmony_ci		if (ret < 0)
35758c2ecf20Sopenharmony_ci			return ret;
35768c2ecf20Sopenharmony_ci	}
35778c2ecf20Sopenharmony_ci
35788c2ecf20Sopenharmony_ci	c_nritems = btrfs_header_nritems(c);
35798c2ecf20Sopenharmony_ci	mid = (c_nritems + 1) / 2;
35808c2ecf20Sopenharmony_ci	btrfs_node_key(c, &disk_key, mid);
35818c2ecf20Sopenharmony_ci
35828c2ecf20Sopenharmony_ci	split = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, level,
35838c2ecf20Sopenharmony_ci					     c->start, 0, BTRFS_NESTING_SPLIT);
35848c2ecf20Sopenharmony_ci	if (IS_ERR(split))
35858c2ecf20Sopenharmony_ci		return PTR_ERR(split);
35868c2ecf20Sopenharmony_ci
35878c2ecf20Sopenharmony_ci	root_add_used(root, fs_info->nodesize);
35888c2ecf20Sopenharmony_ci	ASSERT(btrfs_header_level(c) == level);
35898c2ecf20Sopenharmony_ci
35908c2ecf20Sopenharmony_ci	ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
35918c2ecf20Sopenharmony_ci	if (ret) {
35928c2ecf20Sopenharmony_ci		btrfs_tree_unlock(split);
35938c2ecf20Sopenharmony_ci		free_extent_buffer(split);
35948c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
35958c2ecf20Sopenharmony_ci		return ret;
35968c2ecf20Sopenharmony_ci	}
35978c2ecf20Sopenharmony_ci	copy_extent_buffer(split, c,
35988c2ecf20Sopenharmony_ci			   btrfs_node_key_ptr_offset(0),
35998c2ecf20Sopenharmony_ci			   btrfs_node_key_ptr_offset(mid),
36008c2ecf20Sopenharmony_ci			   (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
36018c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(split, c_nritems - mid);
36028c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(c, mid);
36038c2ecf20Sopenharmony_ci	ret = 0;
36048c2ecf20Sopenharmony_ci
36058c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(c);
36068c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(split);
36078c2ecf20Sopenharmony_ci
36088c2ecf20Sopenharmony_ci	insert_ptr(trans, path, &disk_key, split->start,
36098c2ecf20Sopenharmony_ci		   path->slots[level + 1] + 1, level + 1);
36108c2ecf20Sopenharmony_ci
36118c2ecf20Sopenharmony_ci	if (path->slots[level] >= mid) {
36128c2ecf20Sopenharmony_ci		path->slots[level] -= mid;
36138c2ecf20Sopenharmony_ci		btrfs_tree_unlock(c);
36148c2ecf20Sopenharmony_ci		free_extent_buffer(c);
36158c2ecf20Sopenharmony_ci		path->nodes[level] = split;
36168c2ecf20Sopenharmony_ci		path->slots[level + 1] += 1;
36178c2ecf20Sopenharmony_ci	} else {
36188c2ecf20Sopenharmony_ci		btrfs_tree_unlock(split);
36198c2ecf20Sopenharmony_ci		free_extent_buffer(split);
36208c2ecf20Sopenharmony_ci	}
36218c2ecf20Sopenharmony_ci	return ret;
36228c2ecf20Sopenharmony_ci}
36238c2ecf20Sopenharmony_ci
36248c2ecf20Sopenharmony_ci/*
36258c2ecf20Sopenharmony_ci * how many bytes are required to store the items in a leaf.  start
36268c2ecf20Sopenharmony_ci * and nr indicate which items in the leaf to check.  This totals up the
36278c2ecf20Sopenharmony_ci * space used both by the item structs and the item data
36288c2ecf20Sopenharmony_ci */
36298c2ecf20Sopenharmony_cistatic int leaf_space_used(struct extent_buffer *l, int start, int nr)
36308c2ecf20Sopenharmony_ci{
36318c2ecf20Sopenharmony_ci	struct btrfs_item *start_item;
36328c2ecf20Sopenharmony_ci	struct btrfs_item *end_item;
36338c2ecf20Sopenharmony_ci	int data_len;
36348c2ecf20Sopenharmony_ci	int nritems = btrfs_header_nritems(l);
36358c2ecf20Sopenharmony_ci	int end = min(nritems, start + nr) - 1;
36368c2ecf20Sopenharmony_ci
36378c2ecf20Sopenharmony_ci	if (!nr)
36388c2ecf20Sopenharmony_ci		return 0;
36398c2ecf20Sopenharmony_ci	start_item = btrfs_item_nr(start);
36408c2ecf20Sopenharmony_ci	end_item = btrfs_item_nr(end);
36418c2ecf20Sopenharmony_ci	data_len = btrfs_item_offset(l, start_item) +
36428c2ecf20Sopenharmony_ci		   btrfs_item_size(l, start_item);
36438c2ecf20Sopenharmony_ci	data_len = data_len - btrfs_item_offset(l, end_item);
36448c2ecf20Sopenharmony_ci	data_len += sizeof(struct btrfs_item) * nr;
36458c2ecf20Sopenharmony_ci	WARN_ON(data_len < 0);
36468c2ecf20Sopenharmony_ci	return data_len;
36478c2ecf20Sopenharmony_ci}
36488c2ecf20Sopenharmony_ci
36498c2ecf20Sopenharmony_ci/*
36508c2ecf20Sopenharmony_ci * The space between the end of the leaf items and
36518c2ecf20Sopenharmony_ci * the start of the leaf data.  IOW, how much room
36528c2ecf20Sopenharmony_ci * the leaf has left for both items and data
36538c2ecf20Sopenharmony_ci */
36548c2ecf20Sopenharmony_cinoinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
36558c2ecf20Sopenharmony_ci{
36568c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = leaf->fs_info;
36578c2ecf20Sopenharmony_ci	int nritems = btrfs_header_nritems(leaf);
36588c2ecf20Sopenharmony_ci	int ret;
36598c2ecf20Sopenharmony_ci
36608c2ecf20Sopenharmony_ci	ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems);
36618c2ecf20Sopenharmony_ci	if (ret < 0) {
36628c2ecf20Sopenharmony_ci		btrfs_crit(fs_info,
36638c2ecf20Sopenharmony_ci			   "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
36648c2ecf20Sopenharmony_ci			   ret,
36658c2ecf20Sopenharmony_ci			   (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info),
36668c2ecf20Sopenharmony_ci			   leaf_space_used(leaf, 0, nritems), nritems);
36678c2ecf20Sopenharmony_ci	}
36688c2ecf20Sopenharmony_ci	return ret;
36698c2ecf20Sopenharmony_ci}
36708c2ecf20Sopenharmony_ci
36718c2ecf20Sopenharmony_ci/*
36728c2ecf20Sopenharmony_ci * min slot controls the lowest index we're willing to push to the
36738c2ecf20Sopenharmony_ci * right.  We'll push up to and including min_slot, but no lower
36748c2ecf20Sopenharmony_ci */
36758c2ecf20Sopenharmony_cistatic noinline int __push_leaf_right(struct btrfs_path *path,
36768c2ecf20Sopenharmony_ci				      int data_size, int empty,
36778c2ecf20Sopenharmony_ci				      struct extent_buffer *right,
36788c2ecf20Sopenharmony_ci				      int free_space, u32 left_nritems,
36798c2ecf20Sopenharmony_ci				      u32 min_slot)
36808c2ecf20Sopenharmony_ci{
36818c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = right->fs_info;
36828c2ecf20Sopenharmony_ci	struct extent_buffer *left = path->nodes[0];
36838c2ecf20Sopenharmony_ci	struct extent_buffer *upper = path->nodes[1];
36848c2ecf20Sopenharmony_ci	struct btrfs_map_token token;
36858c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
36868c2ecf20Sopenharmony_ci	int slot;
36878c2ecf20Sopenharmony_ci	u32 i;
36888c2ecf20Sopenharmony_ci	int push_space = 0;
36898c2ecf20Sopenharmony_ci	int push_items = 0;
36908c2ecf20Sopenharmony_ci	struct btrfs_item *item;
36918c2ecf20Sopenharmony_ci	u32 nr;
36928c2ecf20Sopenharmony_ci	u32 right_nritems;
36938c2ecf20Sopenharmony_ci	u32 data_end;
36948c2ecf20Sopenharmony_ci	u32 this_item_size;
36958c2ecf20Sopenharmony_ci
36968c2ecf20Sopenharmony_ci	if (empty)
36978c2ecf20Sopenharmony_ci		nr = 0;
36988c2ecf20Sopenharmony_ci	else
36998c2ecf20Sopenharmony_ci		nr = max_t(u32, 1, min_slot);
37008c2ecf20Sopenharmony_ci
37018c2ecf20Sopenharmony_ci	if (path->slots[0] >= left_nritems)
37028c2ecf20Sopenharmony_ci		push_space += data_size;
37038c2ecf20Sopenharmony_ci
37048c2ecf20Sopenharmony_ci	slot = path->slots[1];
37058c2ecf20Sopenharmony_ci	i = left_nritems - 1;
37068c2ecf20Sopenharmony_ci	while (i >= nr) {
37078c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
37088c2ecf20Sopenharmony_ci
37098c2ecf20Sopenharmony_ci		if (!empty && push_items > 0) {
37108c2ecf20Sopenharmony_ci			if (path->slots[0] > i)
37118c2ecf20Sopenharmony_ci				break;
37128c2ecf20Sopenharmony_ci			if (path->slots[0] == i) {
37138c2ecf20Sopenharmony_ci				int space = btrfs_leaf_free_space(left);
37148c2ecf20Sopenharmony_ci
37158c2ecf20Sopenharmony_ci				if (space + push_space * 2 > free_space)
37168c2ecf20Sopenharmony_ci					break;
37178c2ecf20Sopenharmony_ci			}
37188c2ecf20Sopenharmony_ci		}
37198c2ecf20Sopenharmony_ci
37208c2ecf20Sopenharmony_ci		if (path->slots[0] == i)
37218c2ecf20Sopenharmony_ci			push_space += data_size;
37228c2ecf20Sopenharmony_ci
37238c2ecf20Sopenharmony_ci		this_item_size = btrfs_item_size(left, item);
37248c2ecf20Sopenharmony_ci		if (this_item_size + sizeof(*item) + push_space > free_space)
37258c2ecf20Sopenharmony_ci			break;
37268c2ecf20Sopenharmony_ci
37278c2ecf20Sopenharmony_ci		push_items++;
37288c2ecf20Sopenharmony_ci		push_space += this_item_size + sizeof(*item);
37298c2ecf20Sopenharmony_ci		if (i == 0)
37308c2ecf20Sopenharmony_ci			break;
37318c2ecf20Sopenharmony_ci		i--;
37328c2ecf20Sopenharmony_ci	}
37338c2ecf20Sopenharmony_ci
37348c2ecf20Sopenharmony_ci	if (push_items == 0)
37358c2ecf20Sopenharmony_ci		goto out_unlock;
37368c2ecf20Sopenharmony_ci
37378c2ecf20Sopenharmony_ci	WARN_ON(!empty && push_items == left_nritems);
37388c2ecf20Sopenharmony_ci
37398c2ecf20Sopenharmony_ci	/* push left to right */
37408c2ecf20Sopenharmony_ci	right_nritems = btrfs_header_nritems(right);
37418c2ecf20Sopenharmony_ci
37428c2ecf20Sopenharmony_ci	push_space = btrfs_item_end_nr(left, left_nritems - push_items);
37438c2ecf20Sopenharmony_ci	push_space -= leaf_data_end(left);
37448c2ecf20Sopenharmony_ci
37458c2ecf20Sopenharmony_ci	/* make room in the right data area */
37468c2ecf20Sopenharmony_ci	data_end = leaf_data_end(right);
37478c2ecf20Sopenharmony_ci	memmove_extent_buffer(right,
37488c2ecf20Sopenharmony_ci			      BTRFS_LEAF_DATA_OFFSET + data_end - push_space,
37498c2ecf20Sopenharmony_ci			      BTRFS_LEAF_DATA_OFFSET + data_end,
37508c2ecf20Sopenharmony_ci			      BTRFS_LEAF_DATA_SIZE(fs_info) - data_end);
37518c2ecf20Sopenharmony_ci
37528c2ecf20Sopenharmony_ci	/* copy from the left data area */
37538c2ecf20Sopenharmony_ci	copy_extent_buffer(right, left, BTRFS_LEAF_DATA_OFFSET +
37548c2ecf20Sopenharmony_ci		     BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
37558c2ecf20Sopenharmony_ci		     BTRFS_LEAF_DATA_OFFSET + leaf_data_end(left),
37568c2ecf20Sopenharmony_ci		     push_space);
37578c2ecf20Sopenharmony_ci
37588c2ecf20Sopenharmony_ci	memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
37598c2ecf20Sopenharmony_ci			      btrfs_item_nr_offset(0),
37608c2ecf20Sopenharmony_ci			      right_nritems * sizeof(struct btrfs_item));
37618c2ecf20Sopenharmony_ci
37628c2ecf20Sopenharmony_ci	/* copy the items from left to right */
37638c2ecf20Sopenharmony_ci	copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
37648c2ecf20Sopenharmony_ci		   btrfs_item_nr_offset(left_nritems - push_items),
37658c2ecf20Sopenharmony_ci		   push_items * sizeof(struct btrfs_item));
37668c2ecf20Sopenharmony_ci
37678c2ecf20Sopenharmony_ci	/* update the item pointers */
37688c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, right);
37698c2ecf20Sopenharmony_ci	right_nritems += push_items;
37708c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(right, right_nritems);
37718c2ecf20Sopenharmony_ci	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
37728c2ecf20Sopenharmony_ci	for (i = 0; i < right_nritems; i++) {
37738c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
37748c2ecf20Sopenharmony_ci		push_space -= btrfs_token_item_size(&token, item);
37758c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item, push_space);
37768c2ecf20Sopenharmony_ci	}
37778c2ecf20Sopenharmony_ci
37788c2ecf20Sopenharmony_ci	left_nritems -= push_items;
37798c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(left, left_nritems);
37808c2ecf20Sopenharmony_ci
37818c2ecf20Sopenharmony_ci	if (left_nritems)
37828c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(left);
37838c2ecf20Sopenharmony_ci	else
37848c2ecf20Sopenharmony_ci		btrfs_clean_tree_block(left);
37858c2ecf20Sopenharmony_ci
37868c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(right);
37878c2ecf20Sopenharmony_ci
37888c2ecf20Sopenharmony_ci	btrfs_item_key(right, &disk_key, 0);
37898c2ecf20Sopenharmony_ci	btrfs_set_node_key(upper, &disk_key, slot + 1);
37908c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(upper);
37918c2ecf20Sopenharmony_ci
37928c2ecf20Sopenharmony_ci	/* then fixup the leaf pointer in the path */
37938c2ecf20Sopenharmony_ci	if (path->slots[0] >= left_nritems) {
37948c2ecf20Sopenharmony_ci		path->slots[0] -= left_nritems;
37958c2ecf20Sopenharmony_ci		if (btrfs_header_nritems(path->nodes[0]) == 0)
37968c2ecf20Sopenharmony_ci			btrfs_clean_tree_block(path->nodes[0]);
37978c2ecf20Sopenharmony_ci		btrfs_tree_unlock(path->nodes[0]);
37988c2ecf20Sopenharmony_ci		free_extent_buffer(path->nodes[0]);
37998c2ecf20Sopenharmony_ci		path->nodes[0] = right;
38008c2ecf20Sopenharmony_ci		path->slots[1] += 1;
38018c2ecf20Sopenharmony_ci	} else {
38028c2ecf20Sopenharmony_ci		btrfs_tree_unlock(right);
38038c2ecf20Sopenharmony_ci		free_extent_buffer(right);
38048c2ecf20Sopenharmony_ci	}
38058c2ecf20Sopenharmony_ci	return 0;
38068c2ecf20Sopenharmony_ci
38078c2ecf20Sopenharmony_ciout_unlock:
38088c2ecf20Sopenharmony_ci	btrfs_tree_unlock(right);
38098c2ecf20Sopenharmony_ci	free_extent_buffer(right);
38108c2ecf20Sopenharmony_ci	return 1;
38118c2ecf20Sopenharmony_ci}
38128c2ecf20Sopenharmony_ci
38138c2ecf20Sopenharmony_ci/*
38148c2ecf20Sopenharmony_ci * push some data in the path leaf to the right, trying to free up at
38158c2ecf20Sopenharmony_ci * least data_size bytes.  returns zero if the push worked, nonzero otherwise
38168c2ecf20Sopenharmony_ci *
38178c2ecf20Sopenharmony_ci * returns 1 if the push failed because the other node didn't have enough
38188c2ecf20Sopenharmony_ci * room, 0 if everything worked out and < 0 if there were major errors.
38198c2ecf20Sopenharmony_ci *
38208c2ecf20Sopenharmony_ci * this will push starting from min_slot to the end of the leaf.  It won't
38218c2ecf20Sopenharmony_ci * push any slot lower than min_slot
38228c2ecf20Sopenharmony_ci */
38238c2ecf20Sopenharmony_cistatic int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
38248c2ecf20Sopenharmony_ci			   *root, struct btrfs_path *path,
38258c2ecf20Sopenharmony_ci			   int min_data_size, int data_size,
38268c2ecf20Sopenharmony_ci			   int empty, u32 min_slot)
38278c2ecf20Sopenharmony_ci{
38288c2ecf20Sopenharmony_ci	struct extent_buffer *left = path->nodes[0];
38298c2ecf20Sopenharmony_ci	struct extent_buffer *right;
38308c2ecf20Sopenharmony_ci	struct extent_buffer *upper;
38318c2ecf20Sopenharmony_ci	int slot;
38328c2ecf20Sopenharmony_ci	int free_space;
38338c2ecf20Sopenharmony_ci	u32 left_nritems;
38348c2ecf20Sopenharmony_ci	int ret;
38358c2ecf20Sopenharmony_ci
38368c2ecf20Sopenharmony_ci	if (!path->nodes[1])
38378c2ecf20Sopenharmony_ci		return 1;
38388c2ecf20Sopenharmony_ci
38398c2ecf20Sopenharmony_ci	slot = path->slots[1];
38408c2ecf20Sopenharmony_ci	upper = path->nodes[1];
38418c2ecf20Sopenharmony_ci	if (slot >= btrfs_header_nritems(upper) - 1)
38428c2ecf20Sopenharmony_ci		return 1;
38438c2ecf20Sopenharmony_ci
38448c2ecf20Sopenharmony_ci	btrfs_assert_tree_locked(path->nodes[1]);
38458c2ecf20Sopenharmony_ci
38468c2ecf20Sopenharmony_ci	right = btrfs_read_node_slot(upper, slot + 1);
38478c2ecf20Sopenharmony_ci	/*
38488c2ecf20Sopenharmony_ci	 * slot + 1 is not valid or we fail to read the right node,
38498c2ecf20Sopenharmony_ci	 * no big deal, just return.
38508c2ecf20Sopenharmony_ci	 */
38518c2ecf20Sopenharmony_ci	if (IS_ERR(right))
38528c2ecf20Sopenharmony_ci		return 1;
38538c2ecf20Sopenharmony_ci
38548c2ecf20Sopenharmony_ci	__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
38558c2ecf20Sopenharmony_ci	btrfs_set_lock_blocking_write(right);
38568c2ecf20Sopenharmony_ci
38578c2ecf20Sopenharmony_ci	free_space = btrfs_leaf_free_space(right);
38588c2ecf20Sopenharmony_ci	if (free_space < data_size)
38598c2ecf20Sopenharmony_ci		goto out_unlock;
38608c2ecf20Sopenharmony_ci
38618c2ecf20Sopenharmony_ci	/* cow and double check */
38628c2ecf20Sopenharmony_ci	ret = btrfs_cow_block(trans, root, right, upper,
38638c2ecf20Sopenharmony_ci			      slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
38648c2ecf20Sopenharmony_ci	if (ret)
38658c2ecf20Sopenharmony_ci		goto out_unlock;
38668c2ecf20Sopenharmony_ci
38678c2ecf20Sopenharmony_ci	free_space = btrfs_leaf_free_space(right);
38688c2ecf20Sopenharmony_ci	if (free_space < data_size)
38698c2ecf20Sopenharmony_ci		goto out_unlock;
38708c2ecf20Sopenharmony_ci
38718c2ecf20Sopenharmony_ci	left_nritems = btrfs_header_nritems(left);
38728c2ecf20Sopenharmony_ci	if (left_nritems == 0)
38738c2ecf20Sopenharmony_ci		goto out_unlock;
38748c2ecf20Sopenharmony_ci
38758c2ecf20Sopenharmony_ci	if (check_sibling_keys(left, right)) {
38768c2ecf20Sopenharmony_ci		ret = -EUCLEAN;
38778c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
38788c2ecf20Sopenharmony_ci		btrfs_tree_unlock(right);
38798c2ecf20Sopenharmony_ci		free_extent_buffer(right);
38808c2ecf20Sopenharmony_ci		return ret;
38818c2ecf20Sopenharmony_ci	}
38828c2ecf20Sopenharmony_ci	if (path->slots[0] == left_nritems && !empty) {
38838c2ecf20Sopenharmony_ci		/* Key greater than all keys in the leaf, right neighbor has
38848c2ecf20Sopenharmony_ci		 * enough room for it and we're not emptying our leaf to delete
38858c2ecf20Sopenharmony_ci		 * it, therefore use right neighbor to insert the new item and
38868c2ecf20Sopenharmony_ci		 * no need to touch/dirty our left leaf. */
38878c2ecf20Sopenharmony_ci		btrfs_tree_unlock(left);
38888c2ecf20Sopenharmony_ci		free_extent_buffer(left);
38898c2ecf20Sopenharmony_ci		path->nodes[0] = right;
38908c2ecf20Sopenharmony_ci		path->slots[0] = 0;
38918c2ecf20Sopenharmony_ci		path->slots[1]++;
38928c2ecf20Sopenharmony_ci		return 0;
38938c2ecf20Sopenharmony_ci	}
38948c2ecf20Sopenharmony_ci
38958c2ecf20Sopenharmony_ci	return __push_leaf_right(path, min_data_size, empty,
38968c2ecf20Sopenharmony_ci				right, free_space, left_nritems, min_slot);
38978c2ecf20Sopenharmony_ciout_unlock:
38988c2ecf20Sopenharmony_ci	btrfs_tree_unlock(right);
38998c2ecf20Sopenharmony_ci	free_extent_buffer(right);
39008c2ecf20Sopenharmony_ci	return 1;
39018c2ecf20Sopenharmony_ci}
39028c2ecf20Sopenharmony_ci
39038c2ecf20Sopenharmony_ci/*
39048c2ecf20Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at
39058c2ecf20Sopenharmony_ci * least data_size bytes.  returns zero if the push worked, nonzero otherwise
39068c2ecf20Sopenharmony_ci *
39078c2ecf20Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items.  The
39088c2ecf20Sopenharmony_ci * item at 'max_slot' won't be touched.  Use (u32)-1 to make us do all the
39098c2ecf20Sopenharmony_ci * items
39108c2ecf20Sopenharmony_ci */
39118c2ecf20Sopenharmony_cistatic noinline int __push_leaf_left(struct btrfs_path *path, int data_size,
39128c2ecf20Sopenharmony_ci				     int empty, struct extent_buffer *left,
39138c2ecf20Sopenharmony_ci				     int free_space, u32 right_nritems,
39148c2ecf20Sopenharmony_ci				     u32 max_slot)
39158c2ecf20Sopenharmony_ci{
39168c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = left->fs_info;
39178c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
39188c2ecf20Sopenharmony_ci	struct extent_buffer *right = path->nodes[0];
39198c2ecf20Sopenharmony_ci	int i;
39208c2ecf20Sopenharmony_ci	int push_space = 0;
39218c2ecf20Sopenharmony_ci	int push_items = 0;
39228c2ecf20Sopenharmony_ci	struct btrfs_item *item;
39238c2ecf20Sopenharmony_ci	u32 old_left_nritems;
39248c2ecf20Sopenharmony_ci	u32 nr;
39258c2ecf20Sopenharmony_ci	int ret = 0;
39268c2ecf20Sopenharmony_ci	u32 this_item_size;
39278c2ecf20Sopenharmony_ci	u32 old_left_item_size;
39288c2ecf20Sopenharmony_ci	struct btrfs_map_token token;
39298c2ecf20Sopenharmony_ci
39308c2ecf20Sopenharmony_ci	if (empty)
39318c2ecf20Sopenharmony_ci		nr = min(right_nritems, max_slot);
39328c2ecf20Sopenharmony_ci	else
39338c2ecf20Sopenharmony_ci		nr = min(right_nritems - 1, max_slot);
39348c2ecf20Sopenharmony_ci
39358c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
39368c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
39378c2ecf20Sopenharmony_ci
39388c2ecf20Sopenharmony_ci		if (!empty && push_items > 0) {
39398c2ecf20Sopenharmony_ci			if (path->slots[0] < i)
39408c2ecf20Sopenharmony_ci				break;
39418c2ecf20Sopenharmony_ci			if (path->slots[0] == i) {
39428c2ecf20Sopenharmony_ci				int space = btrfs_leaf_free_space(right);
39438c2ecf20Sopenharmony_ci
39448c2ecf20Sopenharmony_ci				if (space + push_space * 2 > free_space)
39458c2ecf20Sopenharmony_ci					break;
39468c2ecf20Sopenharmony_ci			}
39478c2ecf20Sopenharmony_ci		}
39488c2ecf20Sopenharmony_ci
39498c2ecf20Sopenharmony_ci		if (path->slots[0] == i)
39508c2ecf20Sopenharmony_ci			push_space += data_size;
39518c2ecf20Sopenharmony_ci
39528c2ecf20Sopenharmony_ci		this_item_size = btrfs_item_size(right, item);
39538c2ecf20Sopenharmony_ci		if (this_item_size + sizeof(*item) + push_space > free_space)
39548c2ecf20Sopenharmony_ci			break;
39558c2ecf20Sopenharmony_ci
39568c2ecf20Sopenharmony_ci		push_items++;
39578c2ecf20Sopenharmony_ci		push_space += this_item_size + sizeof(*item);
39588c2ecf20Sopenharmony_ci	}
39598c2ecf20Sopenharmony_ci
39608c2ecf20Sopenharmony_ci	if (push_items == 0) {
39618c2ecf20Sopenharmony_ci		ret = 1;
39628c2ecf20Sopenharmony_ci		goto out;
39638c2ecf20Sopenharmony_ci	}
39648c2ecf20Sopenharmony_ci	WARN_ON(!empty && push_items == btrfs_header_nritems(right));
39658c2ecf20Sopenharmony_ci
39668c2ecf20Sopenharmony_ci	/* push data from right to left */
39678c2ecf20Sopenharmony_ci	copy_extent_buffer(left, right,
39688c2ecf20Sopenharmony_ci			   btrfs_item_nr_offset(btrfs_header_nritems(left)),
39698c2ecf20Sopenharmony_ci			   btrfs_item_nr_offset(0),
39708c2ecf20Sopenharmony_ci			   push_items * sizeof(struct btrfs_item));
39718c2ecf20Sopenharmony_ci
39728c2ecf20Sopenharmony_ci	push_space = BTRFS_LEAF_DATA_SIZE(fs_info) -
39738c2ecf20Sopenharmony_ci		     btrfs_item_offset_nr(right, push_items - 1);
39748c2ecf20Sopenharmony_ci
39758c2ecf20Sopenharmony_ci	copy_extent_buffer(left, right, BTRFS_LEAF_DATA_OFFSET +
39768c2ecf20Sopenharmony_ci		     leaf_data_end(left) - push_space,
39778c2ecf20Sopenharmony_ci		     BTRFS_LEAF_DATA_OFFSET +
39788c2ecf20Sopenharmony_ci		     btrfs_item_offset_nr(right, push_items - 1),
39798c2ecf20Sopenharmony_ci		     push_space);
39808c2ecf20Sopenharmony_ci	old_left_nritems = btrfs_header_nritems(left);
39818c2ecf20Sopenharmony_ci	BUG_ON(old_left_nritems <= 0);
39828c2ecf20Sopenharmony_ci
39838c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, left);
39848c2ecf20Sopenharmony_ci	old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
39858c2ecf20Sopenharmony_ci	for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
39868c2ecf20Sopenharmony_ci		u32 ioff;
39878c2ecf20Sopenharmony_ci
39888c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
39898c2ecf20Sopenharmony_ci
39908c2ecf20Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, item);
39918c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item,
39928c2ecf20Sopenharmony_ci		      ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size));
39938c2ecf20Sopenharmony_ci	}
39948c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(left, old_left_nritems + push_items);
39958c2ecf20Sopenharmony_ci
39968c2ecf20Sopenharmony_ci	/* fixup right node */
39978c2ecf20Sopenharmony_ci	if (push_items > right_nritems)
39988c2ecf20Sopenharmony_ci		WARN(1, KERN_CRIT "push items %d nr %u\n", push_items,
39998c2ecf20Sopenharmony_ci		       right_nritems);
40008c2ecf20Sopenharmony_ci
40018c2ecf20Sopenharmony_ci	if (push_items < right_nritems) {
40028c2ecf20Sopenharmony_ci		push_space = btrfs_item_offset_nr(right, push_items - 1) -
40038c2ecf20Sopenharmony_ci						  leaf_data_end(right);
40048c2ecf20Sopenharmony_ci		memmove_extent_buffer(right, BTRFS_LEAF_DATA_OFFSET +
40058c2ecf20Sopenharmony_ci				      BTRFS_LEAF_DATA_SIZE(fs_info) - push_space,
40068c2ecf20Sopenharmony_ci				      BTRFS_LEAF_DATA_OFFSET +
40078c2ecf20Sopenharmony_ci				      leaf_data_end(right), push_space);
40088c2ecf20Sopenharmony_ci
40098c2ecf20Sopenharmony_ci		memmove_extent_buffer(right, btrfs_item_nr_offset(0),
40108c2ecf20Sopenharmony_ci			      btrfs_item_nr_offset(push_items),
40118c2ecf20Sopenharmony_ci			     (btrfs_header_nritems(right) - push_items) *
40128c2ecf20Sopenharmony_ci			     sizeof(struct btrfs_item));
40138c2ecf20Sopenharmony_ci	}
40148c2ecf20Sopenharmony_ci
40158c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, right);
40168c2ecf20Sopenharmony_ci	right_nritems -= push_items;
40178c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(right, right_nritems);
40188c2ecf20Sopenharmony_ci	push_space = BTRFS_LEAF_DATA_SIZE(fs_info);
40198c2ecf20Sopenharmony_ci	for (i = 0; i < right_nritems; i++) {
40208c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
40218c2ecf20Sopenharmony_ci
40228c2ecf20Sopenharmony_ci		push_space = push_space - btrfs_token_item_size(&token, item);
40238c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item, push_space);
40248c2ecf20Sopenharmony_ci	}
40258c2ecf20Sopenharmony_ci
40268c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(left);
40278c2ecf20Sopenharmony_ci	if (right_nritems)
40288c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(right);
40298c2ecf20Sopenharmony_ci	else
40308c2ecf20Sopenharmony_ci		btrfs_clean_tree_block(right);
40318c2ecf20Sopenharmony_ci
40328c2ecf20Sopenharmony_ci	btrfs_item_key(right, &disk_key, 0);
40338c2ecf20Sopenharmony_ci	fixup_low_keys(path, &disk_key, 1);
40348c2ecf20Sopenharmony_ci
40358c2ecf20Sopenharmony_ci	/* then fixup the leaf pointer in the path */
40368c2ecf20Sopenharmony_ci	if (path->slots[0] < push_items) {
40378c2ecf20Sopenharmony_ci		path->slots[0] += old_left_nritems;
40388c2ecf20Sopenharmony_ci		btrfs_tree_unlock(path->nodes[0]);
40398c2ecf20Sopenharmony_ci		free_extent_buffer(path->nodes[0]);
40408c2ecf20Sopenharmony_ci		path->nodes[0] = left;
40418c2ecf20Sopenharmony_ci		path->slots[1] -= 1;
40428c2ecf20Sopenharmony_ci	} else {
40438c2ecf20Sopenharmony_ci		btrfs_tree_unlock(left);
40448c2ecf20Sopenharmony_ci		free_extent_buffer(left);
40458c2ecf20Sopenharmony_ci		path->slots[0] -= push_items;
40468c2ecf20Sopenharmony_ci	}
40478c2ecf20Sopenharmony_ci	BUG_ON(path->slots[0] < 0);
40488c2ecf20Sopenharmony_ci	return ret;
40498c2ecf20Sopenharmony_ciout:
40508c2ecf20Sopenharmony_ci	btrfs_tree_unlock(left);
40518c2ecf20Sopenharmony_ci	free_extent_buffer(left);
40528c2ecf20Sopenharmony_ci	return ret;
40538c2ecf20Sopenharmony_ci}
40548c2ecf20Sopenharmony_ci
40558c2ecf20Sopenharmony_ci/*
40568c2ecf20Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at
40578c2ecf20Sopenharmony_ci * least data_size bytes.  returns zero if the push worked, nonzero otherwise
40588c2ecf20Sopenharmony_ci *
40598c2ecf20Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items.  The
40608c2ecf20Sopenharmony_ci * item at 'max_slot' won't be touched.  Use (u32)-1 to make us push all the
40618c2ecf20Sopenharmony_ci * items
40628c2ecf20Sopenharmony_ci */
40638c2ecf20Sopenharmony_cistatic int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
40648c2ecf20Sopenharmony_ci			  *root, struct btrfs_path *path, int min_data_size,
40658c2ecf20Sopenharmony_ci			  int data_size, int empty, u32 max_slot)
40668c2ecf20Sopenharmony_ci{
40678c2ecf20Sopenharmony_ci	struct extent_buffer *right = path->nodes[0];
40688c2ecf20Sopenharmony_ci	struct extent_buffer *left;
40698c2ecf20Sopenharmony_ci	int slot;
40708c2ecf20Sopenharmony_ci	int free_space;
40718c2ecf20Sopenharmony_ci	u32 right_nritems;
40728c2ecf20Sopenharmony_ci	int ret = 0;
40738c2ecf20Sopenharmony_ci
40748c2ecf20Sopenharmony_ci	slot = path->slots[1];
40758c2ecf20Sopenharmony_ci	if (slot == 0)
40768c2ecf20Sopenharmony_ci		return 1;
40778c2ecf20Sopenharmony_ci	if (!path->nodes[1])
40788c2ecf20Sopenharmony_ci		return 1;
40798c2ecf20Sopenharmony_ci
40808c2ecf20Sopenharmony_ci	right_nritems = btrfs_header_nritems(right);
40818c2ecf20Sopenharmony_ci	if (right_nritems == 0)
40828c2ecf20Sopenharmony_ci		return 1;
40838c2ecf20Sopenharmony_ci
40848c2ecf20Sopenharmony_ci	btrfs_assert_tree_locked(path->nodes[1]);
40858c2ecf20Sopenharmony_ci
40868c2ecf20Sopenharmony_ci	left = btrfs_read_node_slot(path->nodes[1], slot - 1);
40878c2ecf20Sopenharmony_ci	/*
40888c2ecf20Sopenharmony_ci	 * slot - 1 is not valid or we fail to read the left node,
40898c2ecf20Sopenharmony_ci	 * no big deal, just return.
40908c2ecf20Sopenharmony_ci	 */
40918c2ecf20Sopenharmony_ci	if (IS_ERR(left))
40928c2ecf20Sopenharmony_ci		return 1;
40938c2ecf20Sopenharmony_ci
40948c2ecf20Sopenharmony_ci	__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
40958c2ecf20Sopenharmony_ci	btrfs_set_lock_blocking_write(left);
40968c2ecf20Sopenharmony_ci
40978c2ecf20Sopenharmony_ci	free_space = btrfs_leaf_free_space(left);
40988c2ecf20Sopenharmony_ci	if (free_space < data_size) {
40998c2ecf20Sopenharmony_ci		ret = 1;
41008c2ecf20Sopenharmony_ci		goto out;
41018c2ecf20Sopenharmony_ci	}
41028c2ecf20Sopenharmony_ci
41038c2ecf20Sopenharmony_ci	/* cow and double check */
41048c2ecf20Sopenharmony_ci	ret = btrfs_cow_block(trans, root, left,
41058c2ecf20Sopenharmony_ci			      path->nodes[1], slot - 1, &left,
41068c2ecf20Sopenharmony_ci			      BTRFS_NESTING_LEFT_COW);
41078c2ecf20Sopenharmony_ci	if (ret) {
41088c2ecf20Sopenharmony_ci		/* we hit -ENOSPC, but it isn't fatal here */
41098c2ecf20Sopenharmony_ci		if (ret == -ENOSPC)
41108c2ecf20Sopenharmony_ci			ret = 1;
41118c2ecf20Sopenharmony_ci		goto out;
41128c2ecf20Sopenharmony_ci	}
41138c2ecf20Sopenharmony_ci
41148c2ecf20Sopenharmony_ci	free_space = btrfs_leaf_free_space(left);
41158c2ecf20Sopenharmony_ci	if (free_space < data_size) {
41168c2ecf20Sopenharmony_ci		ret = 1;
41178c2ecf20Sopenharmony_ci		goto out;
41188c2ecf20Sopenharmony_ci	}
41198c2ecf20Sopenharmony_ci
41208c2ecf20Sopenharmony_ci	if (check_sibling_keys(left, right)) {
41218c2ecf20Sopenharmony_ci		ret = -EUCLEAN;
41228c2ecf20Sopenharmony_ci		btrfs_abort_transaction(trans, ret);
41238c2ecf20Sopenharmony_ci		goto out;
41248c2ecf20Sopenharmony_ci	}
41258c2ecf20Sopenharmony_ci	return __push_leaf_left(path, min_data_size,
41268c2ecf20Sopenharmony_ci			       empty, left, free_space, right_nritems,
41278c2ecf20Sopenharmony_ci			       max_slot);
41288c2ecf20Sopenharmony_ciout:
41298c2ecf20Sopenharmony_ci	btrfs_tree_unlock(left);
41308c2ecf20Sopenharmony_ci	free_extent_buffer(left);
41318c2ecf20Sopenharmony_ci	return ret;
41328c2ecf20Sopenharmony_ci}
41338c2ecf20Sopenharmony_ci
41348c2ecf20Sopenharmony_ci/*
41358c2ecf20Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size
41368c2ecf20Sopenharmony_ci * available for the resulting leaf level of the path.
41378c2ecf20Sopenharmony_ci */
41388c2ecf20Sopenharmony_cistatic noinline void copy_for_split(struct btrfs_trans_handle *trans,
41398c2ecf20Sopenharmony_ci				    struct btrfs_path *path,
41408c2ecf20Sopenharmony_ci				    struct extent_buffer *l,
41418c2ecf20Sopenharmony_ci				    struct extent_buffer *right,
41428c2ecf20Sopenharmony_ci				    int slot, int mid, int nritems)
41438c2ecf20Sopenharmony_ci{
41448c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = trans->fs_info;
41458c2ecf20Sopenharmony_ci	int data_copy_size;
41468c2ecf20Sopenharmony_ci	int rt_data_off;
41478c2ecf20Sopenharmony_ci	int i;
41488c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
41498c2ecf20Sopenharmony_ci	struct btrfs_map_token token;
41508c2ecf20Sopenharmony_ci
41518c2ecf20Sopenharmony_ci	nritems = nritems - mid;
41528c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(right, nritems);
41538c2ecf20Sopenharmony_ci	data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(l);
41548c2ecf20Sopenharmony_ci
41558c2ecf20Sopenharmony_ci	copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
41568c2ecf20Sopenharmony_ci			   btrfs_item_nr_offset(mid),
41578c2ecf20Sopenharmony_ci			   nritems * sizeof(struct btrfs_item));
41588c2ecf20Sopenharmony_ci
41598c2ecf20Sopenharmony_ci	copy_extent_buffer(right, l,
41608c2ecf20Sopenharmony_ci		     BTRFS_LEAF_DATA_OFFSET + BTRFS_LEAF_DATA_SIZE(fs_info) -
41618c2ecf20Sopenharmony_ci		     data_copy_size, BTRFS_LEAF_DATA_OFFSET +
41628c2ecf20Sopenharmony_ci		     leaf_data_end(l), data_copy_size);
41638c2ecf20Sopenharmony_ci
41648c2ecf20Sopenharmony_ci	rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_end_nr(l, mid);
41658c2ecf20Sopenharmony_ci
41668c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, right);
41678c2ecf20Sopenharmony_ci	for (i = 0; i < nritems; i++) {
41688c2ecf20Sopenharmony_ci		struct btrfs_item *item = btrfs_item_nr(i);
41698c2ecf20Sopenharmony_ci		u32 ioff;
41708c2ecf20Sopenharmony_ci
41718c2ecf20Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, item);
41728c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item, ioff + rt_data_off);
41738c2ecf20Sopenharmony_ci	}
41748c2ecf20Sopenharmony_ci
41758c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(l, mid);
41768c2ecf20Sopenharmony_ci	btrfs_item_key(right, &disk_key, 0);
41778c2ecf20Sopenharmony_ci	insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
41788c2ecf20Sopenharmony_ci
41798c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(right);
41808c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(l);
41818c2ecf20Sopenharmony_ci	BUG_ON(path->slots[0] != slot);
41828c2ecf20Sopenharmony_ci
41838c2ecf20Sopenharmony_ci	if (mid <= slot) {
41848c2ecf20Sopenharmony_ci		btrfs_tree_unlock(path->nodes[0]);
41858c2ecf20Sopenharmony_ci		free_extent_buffer(path->nodes[0]);
41868c2ecf20Sopenharmony_ci		path->nodes[0] = right;
41878c2ecf20Sopenharmony_ci		path->slots[0] -= mid;
41888c2ecf20Sopenharmony_ci		path->slots[1] += 1;
41898c2ecf20Sopenharmony_ci	} else {
41908c2ecf20Sopenharmony_ci		btrfs_tree_unlock(right);
41918c2ecf20Sopenharmony_ci		free_extent_buffer(right);
41928c2ecf20Sopenharmony_ci	}
41938c2ecf20Sopenharmony_ci
41948c2ecf20Sopenharmony_ci	BUG_ON(path->slots[0] < 0);
41958c2ecf20Sopenharmony_ci}
41968c2ecf20Sopenharmony_ci
41978c2ecf20Sopenharmony_ci/*
41988c2ecf20Sopenharmony_ci * double splits happen when we need to insert a big item in the middle
41998c2ecf20Sopenharmony_ci * of a leaf.  A double split can leave us with 3 mostly empty leaves:
42008c2ecf20Sopenharmony_ci * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ]
42018c2ecf20Sopenharmony_ci *          A                 B                 C
42028c2ecf20Sopenharmony_ci *
42038c2ecf20Sopenharmony_ci * We avoid this by trying to push the items on either side of our target
42048c2ecf20Sopenharmony_ci * into the adjacent leaves.  If all goes well we can avoid the double split
42058c2ecf20Sopenharmony_ci * completely.
42068c2ecf20Sopenharmony_ci */
42078c2ecf20Sopenharmony_cistatic noinline int push_for_double_split(struct btrfs_trans_handle *trans,
42088c2ecf20Sopenharmony_ci					  struct btrfs_root *root,
42098c2ecf20Sopenharmony_ci					  struct btrfs_path *path,
42108c2ecf20Sopenharmony_ci					  int data_size)
42118c2ecf20Sopenharmony_ci{
42128c2ecf20Sopenharmony_ci	int ret;
42138c2ecf20Sopenharmony_ci	int progress = 0;
42148c2ecf20Sopenharmony_ci	int slot;
42158c2ecf20Sopenharmony_ci	u32 nritems;
42168c2ecf20Sopenharmony_ci	int space_needed = data_size;
42178c2ecf20Sopenharmony_ci
42188c2ecf20Sopenharmony_ci	slot = path->slots[0];
42198c2ecf20Sopenharmony_ci	if (slot < btrfs_header_nritems(path->nodes[0]))
42208c2ecf20Sopenharmony_ci		space_needed -= btrfs_leaf_free_space(path->nodes[0]);
42218c2ecf20Sopenharmony_ci
42228c2ecf20Sopenharmony_ci	/*
42238c2ecf20Sopenharmony_ci	 * try to push all the items after our slot into the
42248c2ecf20Sopenharmony_ci	 * right leaf
42258c2ecf20Sopenharmony_ci	 */
42268c2ecf20Sopenharmony_ci	ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
42278c2ecf20Sopenharmony_ci	if (ret < 0)
42288c2ecf20Sopenharmony_ci		return ret;
42298c2ecf20Sopenharmony_ci
42308c2ecf20Sopenharmony_ci	if (ret == 0)
42318c2ecf20Sopenharmony_ci		progress++;
42328c2ecf20Sopenharmony_ci
42338c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(path->nodes[0]);
42348c2ecf20Sopenharmony_ci	/*
42358c2ecf20Sopenharmony_ci	 * our goal is to get our slot at the start or end of a leaf.  If
42368c2ecf20Sopenharmony_ci	 * we've done so we're done
42378c2ecf20Sopenharmony_ci	 */
42388c2ecf20Sopenharmony_ci	if (path->slots[0] == 0 || path->slots[0] == nritems)
42398c2ecf20Sopenharmony_ci		return 0;
42408c2ecf20Sopenharmony_ci
42418c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
42428c2ecf20Sopenharmony_ci		return 0;
42438c2ecf20Sopenharmony_ci
42448c2ecf20Sopenharmony_ci	/* try to push all the items before our slot into the next leaf */
42458c2ecf20Sopenharmony_ci	slot = path->slots[0];
42468c2ecf20Sopenharmony_ci	space_needed = data_size;
42478c2ecf20Sopenharmony_ci	if (slot > 0)
42488c2ecf20Sopenharmony_ci		space_needed -= btrfs_leaf_free_space(path->nodes[0]);
42498c2ecf20Sopenharmony_ci	ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
42508c2ecf20Sopenharmony_ci	if (ret < 0)
42518c2ecf20Sopenharmony_ci		return ret;
42528c2ecf20Sopenharmony_ci
42538c2ecf20Sopenharmony_ci	if (ret == 0)
42548c2ecf20Sopenharmony_ci		progress++;
42558c2ecf20Sopenharmony_ci
42568c2ecf20Sopenharmony_ci	if (progress)
42578c2ecf20Sopenharmony_ci		return 0;
42588c2ecf20Sopenharmony_ci	return 1;
42598c2ecf20Sopenharmony_ci}
42608c2ecf20Sopenharmony_ci
42618c2ecf20Sopenharmony_ci/*
42628c2ecf20Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size
42638c2ecf20Sopenharmony_ci * available for the resulting leaf level of the path.
42648c2ecf20Sopenharmony_ci *
42658c2ecf20Sopenharmony_ci * returns 0 if all went well and < 0 on failure.
42668c2ecf20Sopenharmony_ci */
42678c2ecf20Sopenharmony_cistatic noinline int split_leaf(struct btrfs_trans_handle *trans,
42688c2ecf20Sopenharmony_ci			       struct btrfs_root *root,
42698c2ecf20Sopenharmony_ci			       const struct btrfs_key *ins_key,
42708c2ecf20Sopenharmony_ci			       struct btrfs_path *path, int data_size,
42718c2ecf20Sopenharmony_ci			       int extend)
42728c2ecf20Sopenharmony_ci{
42738c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
42748c2ecf20Sopenharmony_ci	struct extent_buffer *l;
42758c2ecf20Sopenharmony_ci	u32 nritems;
42768c2ecf20Sopenharmony_ci	int mid;
42778c2ecf20Sopenharmony_ci	int slot;
42788c2ecf20Sopenharmony_ci	struct extent_buffer *right;
42798c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
42808c2ecf20Sopenharmony_ci	int ret = 0;
42818c2ecf20Sopenharmony_ci	int wret;
42828c2ecf20Sopenharmony_ci	int split;
42838c2ecf20Sopenharmony_ci	int num_doubles = 0;
42848c2ecf20Sopenharmony_ci	int tried_avoid_double = 0;
42858c2ecf20Sopenharmony_ci
42868c2ecf20Sopenharmony_ci	l = path->nodes[0];
42878c2ecf20Sopenharmony_ci	slot = path->slots[0];
42888c2ecf20Sopenharmony_ci	if (extend && data_size + btrfs_item_size_nr(l, slot) +
42898c2ecf20Sopenharmony_ci	    sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info))
42908c2ecf20Sopenharmony_ci		return -EOVERFLOW;
42918c2ecf20Sopenharmony_ci
42928c2ecf20Sopenharmony_ci	/* first try to make some room by pushing left and right */
42938c2ecf20Sopenharmony_ci	if (data_size && path->nodes[1]) {
42948c2ecf20Sopenharmony_ci		int space_needed = data_size;
42958c2ecf20Sopenharmony_ci
42968c2ecf20Sopenharmony_ci		if (slot < btrfs_header_nritems(l))
42978c2ecf20Sopenharmony_ci			space_needed -= btrfs_leaf_free_space(l);
42988c2ecf20Sopenharmony_ci
42998c2ecf20Sopenharmony_ci		wret = push_leaf_right(trans, root, path, space_needed,
43008c2ecf20Sopenharmony_ci				       space_needed, 0, 0);
43018c2ecf20Sopenharmony_ci		if (wret < 0)
43028c2ecf20Sopenharmony_ci			return wret;
43038c2ecf20Sopenharmony_ci		if (wret) {
43048c2ecf20Sopenharmony_ci			space_needed = data_size;
43058c2ecf20Sopenharmony_ci			if (slot > 0)
43068c2ecf20Sopenharmony_ci				space_needed -= btrfs_leaf_free_space(l);
43078c2ecf20Sopenharmony_ci			wret = push_leaf_left(trans, root, path, space_needed,
43088c2ecf20Sopenharmony_ci					      space_needed, 0, (u32)-1);
43098c2ecf20Sopenharmony_ci			if (wret < 0)
43108c2ecf20Sopenharmony_ci				return wret;
43118c2ecf20Sopenharmony_ci		}
43128c2ecf20Sopenharmony_ci		l = path->nodes[0];
43138c2ecf20Sopenharmony_ci
43148c2ecf20Sopenharmony_ci		/* did the pushes work? */
43158c2ecf20Sopenharmony_ci		if (btrfs_leaf_free_space(l) >= data_size)
43168c2ecf20Sopenharmony_ci			return 0;
43178c2ecf20Sopenharmony_ci	}
43188c2ecf20Sopenharmony_ci
43198c2ecf20Sopenharmony_ci	if (!path->nodes[1]) {
43208c2ecf20Sopenharmony_ci		ret = insert_new_root(trans, root, path, 1);
43218c2ecf20Sopenharmony_ci		if (ret)
43228c2ecf20Sopenharmony_ci			return ret;
43238c2ecf20Sopenharmony_ci	}
43248c2ecf20Sopenharmony_ciagain:
43258c2ecf20Sopenharmony_ci	split = 1;
43268c2ecf20Sopenharmony_ci	l = path->nodes[0];
43278c2ecf20Sopenharmony_ci	slot = path->slots[0];
43288c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(l);
43298c2ecf20Sopenharmony_ci	mid = (nritems + 1) / 2;
43308c2ecf20Sopenharmony_ci
43318c2ecf20Sopenharmony_ci	if (mid <= slot) {
43328c2ecf20Sopenharmony_ci		if (nritems == 1 ||
43338c2ecf20Sopenharmony_ci		    leaf_space_used(l, mid, nritems - mid) + data_size >
43348c2ecf20Sopenharmony_ci			BTRFS_LEAF_DATA_SIZE(fs_info)) {
43358c2ecf20Sopenharmony_ci			if (slot >= nritems) {
43368c2ecf20Sopenharmony_ci				split = 0;
43378c2ecf20Sopenharmony_ci			} else {
43388c2ecf20Sopenharmony_ci				mid = slot;
43398c2ecf20Sopenharmony_ci				if (mid != nritems &&
43408c2ecf20Sopenharmony_ci				    leaf_space_used(l, mid, nritems - mid) +
43418c2ecf20Sopenharmony_ci				    data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
43428c2ecf20Sopenharmony_ci					if (data_size && !tried_avoid_double)
43438c2ecf20Sopenharmony_ci						goto push_for_double;
43448c2ecf20Sopenharmony_ci					split = 2;
43458c2ecf20Sopenharmony_ci				}
43468c2ecf20Sopenharmony_ci			}
43478c2ecf20Sopenharmony_ci		}
43488c2ecf20Sopenharmony_ci	} else {
43498c2ecf20Sopenharmony_ci		if (leaf_space_used(l, 0, mid) + data_size >
43508c2ecf20Sopenharmony_ci			BTRFS_LEAF_DATA_SIZE(fs_info)) {
43518c2ecf20Sopenharmony_ci			if (!extend && data_size && slot == 0) {
43528c2ecf20Sopenharmony_ci				split = 0;
43538c2ecf20Sopenharmony_ci			} else if ((extend || !data_size) && slot == 0) {
43548c2ecf20Sopenharmony_ci				mid = 1;
43558c2ecf20Sopenharmony_ci			} else {
43568c2ecf20Sopenharmony_ci				mid = slot;
43578c2ecf20Sopenharmony_ci				if (mid != nritems &&
43588c2ecf20Sopenharmony_ci				    leaf_space_used(l, mid, nritems - mid) +
43598c2ecf20Sopenharmony_ci				    data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) {
43608c2ecf20Sopenharmony_ci					if (data_size && !tried_avoid_double)
43618c2ecf20Sopenharmony_ci						goto push_for_double;
43628c2ecf20Sopenharmony_ci					split = 2;
43638c2ecf20Sopenharmony_ci				}
43648c2ecf20Sopenharmony_ci			}
43658c2ecf20Sopenharmony_ci		}
43668c2ecf20Sopenharmony_ci	}
43678c2ecf20Sopenharmony_ci
43688c2ecf20Sopenharmony_ci	if (split == 0)
43698c2ecf20Sopenharmony_ci		btrfs_cpu_key_to_disk(&disk_key, ins_key);
43708c2ecf20Sopenharmony_ci	else
43718c2ecf20Sopenharmony_ci		btrfs_item_key(l, &disk_key, mid);
43728c2ecf20Sopenharmony_ci
43738c2ecf20Sopenharmony_ci	/*
43748c2ecf20Sopenharmony_ci	 * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double
43758c2ecf20Sopenharmony_ci	 * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES
43768c2ecf20Sopenharmony_ci	 * subclasses, which is 8 at the time of this patch, and we've maxed it
43778c2ecf20Sopenharmony_ci	 * out.  In the future we could add a
43788c2ecf20Sopenharmony_ci	 * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
43798c2ecf20Sopenharmony_ci	 * use BTRFS_NESTING_NEW_ROOT.
43808c2ecf20Sopenharmony_ci	 */
43818c2ecf20Sopenharmony_ci	right = alloc_tree_block_no_bg_flush(trans, root, 0, &disk_key, 0,
43828c2ecf20Sopenharmony_ci					     l->start, 0, num_doubles ?
43838c2ecf20Sopenharmony_ci					     BTRFS_NESTING_NEW_ROOT :
43848c2ecf20Sopenharmony_ci					     BTRFS_NESTING_SPLIT);
43858c2ecf20Sopenharmony_ci	if (IS_ERR(right))
43868c2ecf20Sopenharmony_ci		return PTR_ERR(right);
43878c2ecf20Sopenharmony_ci
43888c2ecf20Sopenharmony_ci	root_add_used(root, fs_info->nodesize);
43898c2ecf20Sopenharmony_ci
43908c2ecf20Sopenharmony_ci	if (split == 0) {
43918c2ecf20Sopenharmony_ci		if (mid <= slot) {
43928c2ecf20Sopenharmony_ci			btrfs_set_header_nritems(right, 0);
43938c2ecf20Sopenharmony_ci			insert_ptr(trans, path, &disk_key,
43948c2ecf20Sopenharmony_ci				   right->start, path->slots[1] + 1, 1);
43958c2ecf20Sopenharmony_ci			btrfs_tree_unlock(path->nodes[0]);
43968c2ecf20Sopenharmony_ci			free_extent_buffer(path->nodes[0]);
43978c2ecf20Sopenharmony_ci			path->nodes[0] = right;
43988c2ecf20Sopenharmony_ci			path->slots[0] = 0;
43998c2ecf20Sopenharmony_ci			path->slots[1] += 1;
44008c2ecf20Sopenharmony_ci		} else {
44018c2ecf20Sopenharmony_ci			btrfs_set_header_nritems(right, 0);
44028c2ecf20Sopenharmony_ci			insert_ptr(trans, path, &disk_key,
44038c2ecf20Sopenharmony_ci				   right->start, path->slots[1], 1);
44048c2ecf20Sopenharmony_ci			btrfs_tree_unlock(path->nodes[0]);
44058c2ecf20Sopenharmony_ci			free_extent_buffer(path->nodes[0]);
44068c2ecf20Sopenharmony_ci			path->nodes[0] = right;
44078c2ecf20Sopenharmony_ci			path->slots[0] = 0;
44088c2ecf20Sopenharmony_ci			if (path->slots[1] == 0)
44098c2ecf20Sopenharmony_ci				fixup_low_keys(path, &disk_key, 1);
44108c2ecf20Sopenharmony_ci		}
44118c2ecf20Sopenharmony_ci		/*
44128c2ecf20Sopenharmony_ci		 * We create a new leaf 'right' for the required ins_len and
44138c2ecf20Sopenharmony_ci		 * we'll do btrfs_mark_buffer_dirty() on this leaf after copying
44148c2ecf20Sopenharmony_ci		 * the content of ins_len to 'right'.
44158c2ecf20Sopenharmony_ci		 */
44168c2ecf20Sopenharmony_ci		return ret;
44178c2ecf20Sopenharmony_ci	}
44188c2ecf20Sopenharmony_ci
44198c2ecf20Sopenharmony_ci	copy_for_split(trans, path, l, right, slot, mid, nritems);
44208c2ecf20Sopenharmony_ci
44218c2ecf20Sopenharmony_ci	if (split == 2) {
44228c2ecf20Sopenharmony_ci		BUG_ON(num_doubles != 0);
44238c2ecf20Sopenharmony_ci		num_doubles++;
44248c2ecf20Sopenharmony_ci		goto again;
44258c2ecf20Sopenharmony_ci	}
44268c2ecf20Sopenharmony_ci
44278c2ecf20Sopenharmony_ci	return 0;
44288c2ecf20Sopenharmony_ci
44298c2ecf20Sopenharmony_cipush_for_double:
44308c2ecf20Sopenharmony_ci	push_for_double_split(trans, root, path, data_size);
44318c2ecf20Sopenharmony_ci	tried_avoid_double = 1;
44328c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(path->nodes[0]) >= data_size)
44338c2ecf20Sopenharmony_ci		return 0;
44348c2ecf20Sopenharmony_ci	goto again;
44358c2ecf20Sopenharmony_ci}
44368c2ecf20Sopenharmony_ci
44378c2ecf20Sopenharmony_cistatic noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
44388c2ecf20Sopenharmony_ci					 struct btrfs_root *root,
44398c2ecf20Sopenharmony_ci					 struct btrfs_path *path, int ins_len)
44408c2ecf20Sopenharmony_ci{
44418c2ecf20Sopenharmony_ci	struct btrfs_key key;
44428c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
44438c2ecf20Sopenharmony_ci	struct btrfs_file_extent_item *fi;
44448c2ecf20Sopenharmony_ci	u64 extent_len = 0;
44458c2ecf20Sopenharmony_ci	u32 item_size;
44468c2ecf20Sopenharmony_ci	int ret;
44478c2ecf20Sopenharmony_ci
44488c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
44498c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
44508c2ecf20Sopenharmony_ci
44518c2ecf20Sopenharmony_ci	BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
44528c2ecf20Sopenharmony_ci	       key.type != BTRFS_EXTENT_CSUM_KEY);
44538c2ecf20Sopenharmony_ci
44548c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) >= ins_len)
44558c2ecf20Sopenharmony_ci		return 0;
44568c2ecf20Sopenharmony_ci
44578c2ecf20Sopenharmony_ci	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
44588c2ecf20Sopenharmony_ci	if (key.type == BTRFS_EXTENT_DATA_KEY) {
44598c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
44608c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
44618c2ecf20Sopenharmony_ci		extent_len = btrfs_file_extent_num_bytes(leaf, fi);
44628c2ecf20Sopenharmony_ci	}
44638c2ecf20Sopenharmony_ci	btrfs_release_path(path);
44648c2ecf20Sopenharmony_ci
44658c2ecf20Sopenharmony_ci	path->keep_locks = 1;
44668c2ecf20Sopenharmony_ci	path->search_for_split = 1;
44678c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
44688c2ecf20Sopenharmony_ci	path->search_for_split = 0;
44698c2ecf20Sopenharmony_ci	if (ret > 0)
44708c2ecf20Sopenharmony_ci		ret = -EAGAIN;
44718c2ecf20Sopenharmony_ci	if (ret < 0)
44728c2ecf20Sopenharmony_ci		goto err;
44738c2ecf20Sopenharmony_ci
44748c2ecf20Sopenharmony_ci	ret = -EAGAIN;
44758c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
44768c2ecf20Sopenharmony_ci	/* if our item isn't there, return now */
44778c2ecf20Sopenharmony_ci	if (item_size != btrfs_item_size_nr(leaf, path->slots[0]))
44788c2ecf20Sopenharmony_ci		goto err;
44798c2ecf20Sopenharmony_ci
44808c2ecf20Sopenharmony_ci	/* the leaf has  changed, it now has room.  return now */
44818c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len)
44828c2ecf20Sopenharmony_ci		goto err;
44838c2ecf20Sopenharmony_ci
44848c2ecf20Sopenharmony_ci	if (key.type == BTRFS_EXTENT_DATA_KEY) {
44858c2ecf20Sopenharmony_ci		fi = btrfs_item_ptr(leaf, path->slots[0],
44868c2ecf20Sopenharmony_ci				    struct btrfs_file_extent_item);
44878c2ecf20Sopenharmony_ci		if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
44888c2ecf20Sopenharmony_ci			goto err;
44898c2ecf20Sopenharmony_ci	}
44908c2ecf20Sopenharmony_ci
44918c2ecf20Sopenharmony_ci	btrfs_set_path_blocking(path);
44928c2ecf20Sopenharmony_ci	ret = split_leaf(trans, root, &key, path, ins_len, 1);
44938c2ecf20Sopenharmony_ci	if (ret)
44948c2ecf20Sopenharmony_ci		goto err;
44958c2ecf20Sopenharmony_ci
44968c2ecf20Sopenharmony_ci	path->keep_locks = 0;
44978c2ecf20Sopenharmony_ci	btrfs_unlock_up_safe(path, 1);
44988c2ecf20Sopenharmony_ci	return 0;
44998c2ecf20Sopenharmony_cierr:
45008c2ecf20Sopenharmony_ci	path->keep_locks = 0;
45018c2ecf20Sopenharmony_ci	return ret;
45028c2ecf20Sopenharmony_ci}
45038c2ecf20Sopenharmony_ci
45048c2ecf20Sopenharmony_cistatic noinline int split_item(struct btrfs_path *path,
45058c2ecf20Sopenharmony_ci			       const struct btrfs_key *new_key,
45068c2ecf20Sopenharmony_ci			       unsigned long split_offset)
45078c2ecf20Sopenharmony_ci{
45088c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
45098c2ecf20Sopenharmony_ci	struct btrfs_item *item;
45108c2ecf20Sopenharmony_ci	struct btrfs_item *new_item;
45118c2ecf20Sopenharmony_ci	int slot;
45128c2ecf20Sopenharmony_ci	char *buf;
45138c2ecf20Sopenharmony_ci	u32 nritems;
45148c2ecf20Sopenharmony_ci	u32 item_size;
45158c2ecf20Sopenharmony_ci	u32 orig_offset;
45168c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
45178c2ecf20Sopenharmony_ci
45188c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
45198c2ecf20Sopenharmony_ci	BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
45208c2ecf20Sopenharmony_ci
45218c2ecf20Sopenharmony_ci	btrfs_set_path_blocking(path);
45228c2ecf20Sopenharmony_ci
45238c2ecf20Sopenharmony_ci	item = btrfs_item_nr(path->slots[0]);
45248c2ecf20Sopenharmony_ci	orig_offset = btrfs_item_offset(leaf, item);
45258c2ecf20Sopenharmony_ci	item_size = btrfs_item_size(leaf, item);
45268c2ecf20Sopenharmony_ci
45278c2ecf20Sopenharmony_ci	buf = kmalloc(item_size, GFP_NOFS);
45288c2ecf20Sopenharmony_ci	if (!buf)
45298c2ecf20Sopenharmony_ci		return -ENOMEM;
45308c2ecf20Sopenharmony_ci
45318c2ecf20Sopenharmony_ci	read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
45328c2ecf20Sopenharmony_ci			    path->slots[0]), item_size);
45338c2ecf20Sopenharmony_ci
45348c2ecf20Sopenharmony_ci	slot = path->slots[0] + 1;
45358c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
45368c2ecf20Sopenharmony_ci	if (slot != nritems) {
45378c2ecf20Sopenharmony_ci		/* shift the items */
45388c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
45398c2ecf20Sopenharmony_ci				btrfs_item_nr_offset(slot),
45408c2ecf20Sopenharmony_ci				(nritems - slot) * sizeof(struct btrfs_item));
45418c2ecf20Sopenharmony_ci	}
45428c2ecf20Sopenharmony_ci
45438c2ecf20Sopenharmony_ci	btrfs_cpu_key_to_disk(&disk_key, new_key);
45448c2ecf20Sopenharmony_ci	btrfs_set_item_key(leaf, &disk_key, slot);
45458c2ecf20Sopenharmony_ci
45468c2ecf20Sopenharmony_ci	new_item = btrfs_item_nr(slot);
45478c2ecf20Sopenharmony_ci
45488c2ecf20Sopenharmony_ci	btrfs_set_item_offset(leaf, new_item, orig_offset);
45498c2ecf20Sopenharmony_ci	btrfs_set_item_size(leaf, new_item, item_size - split_offset);
45508c2ecf20Sopenharmony_ci
45518c2ecf20Sopenharmony_ci	btrfs_set_item_offset(leaf, item,
45528c2ecf20Sopenharmony_ci			      orig_offset + item_size - split_offset);
45538c2ecf20Sopenharmony_ci	btrfs_set_item_size(leaf, item, split_offset);
45548c2ecf20Sopenharmony_ci
45558c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(leaf, nritems + 1);
45568c2ecf20Sopenharmony_ci
45578c2ecf20Sopenharmony_ci	/* write the data for the start of the original item */
45588c2ecf20Sopenharmony_ci	write_extent_buffer(leaf, buf,
45598c2ecf20Sopenharmony_ci			    btrfs_item_ptr_offset(leaf, path->slots[0]),
45608c2ecf20Sopenharmony_ci			    split_offset);
45618c2ecf20Sopenharmony_ci
45628c2ecf20Sopenharmony_ci	/* write the data for the new item */
45638c2ecf20Sopenharmony_ci	write_extent_buffer(leaf, buf + split_offset,
45648c2ecf20Sopenharmony_ci			    btrfs_item_ptr_offset(leaf, slot),
45658c2ecf20Sopenharmony_ci			    item_size - split_offset);
45668c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(leaf);
45678c2ecf20Sopenharmony_ci
45688c2ecf20Sopenharmony_ci	BUG_ON(btrfs_leaf_free_space(leaf) < 0);
45698c2ecf20Sopenharmony_ci	kfree(buf);
45708c2ecf20Sopenharmony_ci	return 0;
45718c2ecf20Sopenharmony_ci}
45728c2ecf20Sopenharmony_ci
45738c2ecf20Sopenharmony_ci/*
45748c2ecf20Sopenharmony_ci * This function splits a single item into two items,
45758c2ecf20Sopenharmony_ci * giving 'new_key' to the new item and splitting the
45768c2ecf20Sopenharmony_ci * old one at split_offset (from the start of the item).
45778c2ecf20Sopenharmony_ci *
45788c2ecf20Sopenharmony_ci * The path may be released by this operation.  After
45798c2ecf20Sopenharmony_ci * the split, the path is pointing to the old item.  The
45808c2ecf20Sopenharmony_ci * new item is going to be in the same node as the old one.
45818c2ecf20Sopenharmony_ci *
45828c2ecf20Sopenharmony_ci * Note, the item being split must be smaller enough to live alone on
45838c2ecf20Sopenharmony_ci * a tree block with room for one extra struct btrfs_item
45848c2ecf20Sopenharmony_ci *
45858c2ecf20Sopenharmony_ci * This allows us to split the item in place, keeping a lock on the
45868c2ecf20Sopenharmony_ci * leaf the entire time.
45878c2ecf20Sopenharmony_ci */
45888c2ecf20Sopenharmony_ciint btrfs_split_item(struct btrfs_trans_handle *trans,
45898c2ecf20Sopenharmony_ci		     struct btrfs_root *root,
45908c2ecf20Sopenharmony_ci		     struct btrfs_path *path,
45918c2ecf20Sopenharmony_ci		     const struct btrfs_key *new_key,
45928c2ecf20Sopenharmony_ci		     unsigned long split_offset)
45938c2ecf20Sopenharmony_ci{
45948c2ecf20Sopenharmony_ci	int ret;
45958c2ecf20Sopenharmony_ci	ret = setup_leaf_for_split(trans, root, path,
45968c2ecf20Sopenharmony_ci				   sizeof(struct btrfs_item));
45978c2ecf20Sopenharmony_ci	if (ret)
45988c2ecf20Sopenharmony_ci		return ret;
45998c2ecf20Sopenharmony_ci
46008c2ecf20Sopenharmony_ci	ret = split_item(path, new_key, split_offset);
46018c2ecf20Sopenharmony_ci	return ret;
46028c2ecf20Sopenharmony_ci}
46038c2ecf20Sopenharmony_ci
46048c2ecf20Sopenharmony_ci/*
46058c2ecf20Sopenharmony_ci * This function duplicate a item, giving 'new_key' to the new item.
46068c2ecf20Sopenharmony_ci * It guarantees both items live in the same tree leaf and the new item
46078c2ecf20Sopenharmony_ci * is contiguous with the original item.
46088c2ecf20Sopenharmony_ci *
46098c2ecf20Sopenharmony_ci * This allows us to split file extent in place, keeping a lock on the
46108c2ecf20Sopenharmony_ci * leaf the entire time.
46118c2ecf20Sopenharmony_ci */
46128c2ecf20Sopenharmony_ciint btrfs_duplicate_item(struct btrfs_trans_handle *trans,
46138c2ecf20Sopenharmony_ci			 struct btrfs_root *root,
46148c2ecf20Sopenharmony_ci			 struct btrfs_path *path,
46158c2ecf20Sopenharmony_ci			 const struct btrfs_key *new_key)
46168c2ecf20Sopenharmony_ci{
46178c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
46188c2ecf20Sopenharmony_ci	int ret;
46198c2ecf20Sopenharmony_ci	u32 item_size;
46208c2ecf20Sopenharmony_ci
46218c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
46228c2ecf20Sopenharmony_ci	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
46238c2ecf20Sopenharmony_ci	ret = setup_leaf_for_split(trans, root, path,
46248c2ecf20Sopenharmony_ci				   item_size + sizeof(struct btrfs_item));
46258c2ecf20Sopenharmony_ci	if (ret)
46268c2ecf20Sopenharmony_ci		return ret;
46278c2ecf20Sopenharmony_ci
46288c2ecf20Sopenharmony_ci	path->slots[0]++;
46298c2ecf20Sopenharmony_ci	setup_items_for_insert(root, path, new_key, &item_size, 1);
46308c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
46318c2ecf20Sopenharmony_ci	memcpy_extent_buffer(leaf,
46328c2ecf20Sopenharmony_ci			     btrfs_item_ptr_offset(leaf, path->slots[0]),
46338c2ecf20Sopenharmony_ci			     btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
46348c2ecf20Sopenharmony_ci			     item_size);
46358c2ecf20Sopenharmony_ci	return 0;
46368c2ecf20Sopenharmony_ci}
46378c2ecf20Sopenharmony_ci
46388c2ecf20Sopenharmony_ci/*
46398c2ecf20Sopenharmony_ci * make the item pointed to by the path smaller.  new_size indicates
46408c2ecf20Sopenharmony_ci * how small to make it, and from_end tells us if we just chop bytes
46418c2ecf20Sopenharmony_ci * off the end of the item or if we shift the item to chop bytes off
46428c2ecf20Sopenharmony_ci * the front.
46438c2ecf20Sopenharmony_ci */
46448c2ecf20Sopenharmony_civoid btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end)
46458c2ecf20Sopenharmony_ci{
46468c2ecf20Sopenharmony_ci	int slot;
46478c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
46488c2ecf20Sopenharmony_ci	struct btrfs_item *item;
46498c2ecf20Sopenharmony_ci	u32 nritems;
46508c2ecf20Sopenharmony_ci	unsigned int data_end;
46518c2ecf20Sopenharmony_ci	unsigned int old_data_start;
46528c2ecf20Sopenharmony_ci	unsigned int old_size;
46538c2ecf20Sopenharmony_ci	unsigned int size_diff;
46548c2ecf20Sopenharmony_ci	int i;
46558c2ecf20Sopenharmony_ci	struct btrfs_map_token token;
46568c2ecf20Sopenharmony_ci
46578c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
46588c2ecf20Sopenharmony_ci	slot = path->slots[0];
46598c2ecf20Sopenharmony_ci
46608c2ecf20Sopenharmony_ci	old_size = btrfs_item_size_nr(leaf, slot);
46618c2ecf20Sopenharmony_ci	if (old_size == new_size)
46628c2ecf20Sopenharmony_ci		return;
46638c2ecf20Sopenharmony_ci
46648c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
46658c2ecf20Sopenharmony_ci	data_end = leaf_data_end(leaf);
46668c2ecf20Sopenharmony_ci
46678c2ecf20Sopenharmony_ci	old_data_start = btrfs_item_offset_nr(leaf, slot);
46688c2ecf20Sopenharmony_ci
46698c2ecf20Sopenharmony_ci	size_diff = old_size - new_size;
46708c2ecf20Sopenharmony_ci
46718c2ecf20Sopenharmony_ci	BUG_ON(slot < 0);
46728c2ecf20Sopenharmony_ci	BUG_ON(slot >= nritems);
46738c2ecf20Sopenharmony_ci
46748c2ecf20Sopenharmony_ci	/*
46758c2ecf20Sopenharmony_ci	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
46768c2ecf20Sopenharmony_ci	 */
46778c2ecf20Sopenharmony_ci	/* first correct the data pointers */
46788c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, leaf);
46798c2ecf20Sopenharmony_ci	for (i = slot; i < nritems; i++) {
46808c2ecf20Sopenharmony_ci		u32 ioff;
46818c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
46828c2ecf20Sopenharmony_ci
46838c2ecf20Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, item);
46848c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item, ioff + size_diff);
46858c2ecf20Sopenharmony_ci	}
46868c2ecf20Sopenharmony_ci
46878c2ecf20Sopenharmony_ci	/* shift the data */
46888c2ecf20Sopenharmony_ci	if (from_end) {
46898c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
46908c2ecf20Sopenharmony_ci			      data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
46918c2ecf20Sopenharmony_ci			      data_end, old_data_start + new_size - data_end);
46928c2ecf20Sopenharmony_ci	} else {
46938c2ecf20Sopenharmony_ci		struct btrfs_disk_key disk_key;
46948c2ecf20Sopenharmony_ci		u64 offset;
46958c2ecf20Sopenharmony_ci
46968c2ecf20Sopenharmony_ci		btrfs_item_key(leaf, &disk_key, slot);
46978c2ecf20Sopenharmony_ci
46988c2ecf20Sopenharmony_ci		if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
46998c2ecf20Sopenharmony_ci			unsigned long ptr;
47008c2ecf20Sopenharmony_ci			struct btrfs_file_extent_item *fi;
47018c2ecf20Sopenharmony_ci
47028c2ecf20Sopenharmony_ci			fi = btrfs_item_ptr(leaf, slot,
47038c2ecf20Sopenharmony_ci					    struct btrfs_file_extent_item);
47048c2ecf20Sopenharmony_ci			fi = (struct btrfs_file_extent_item *)(
47058c2ecf20Sopenharmony_ci			     (unsigned long)fi - size_diff);
47068c2ecf20Sopenharmony_ci
47078c2ecf20Sopenharmony_ci			if (btrfs_file_extent_type(leaf, fi) ==
47088c2ecf20Sopenharmony_ci			    BTRFS_FILE_EXTENT_INLINE) {
47098c2ecf20Sopenharmony_ci				ptr = btrfs_item_ptr_offset(leaf, slot);
47108c2ecf20Sopenharmony_ci				memmove_extent_buffer(leaf, ptr,
47118c2ecf20Sopenharmony_ci				      (unsigned long)fi,
47128c2ecf20Sopenharmony_ci				      BTRFS_FILE_EXTENT_INLINE_DATA_START);
47138c2ecf20Sopenharmony_ci			}
47148c2ecf20Sopenharmony_ci		}
47158c2ecf20Sopenharmony_ci
47168c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
47178c2ecf20Sopenharmony_ci			      data_end + size_diff, BTRFS_LEAF_DATA_OFFSET +
47188c2ecf20Sopenharmony_ci			      data_end, old_data_start - data_end);
47198c2ecf20Sopenharmony_ci
47208c2ecf20Sopenharmony_ci		offset = btrfs_disk_key_offset(&disk_key);
47218c2ecf20Sopenharmony_ci		btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
47228c2ecf20Sopenharmony_ci		btrfs_set_item_key(leaf, &disk_key, slot);
47238c2ecf20Sopenharmony_ci		if (slot == 0)
47248c2ecf20Sopenharmony_ci			fixup_low_keys(path, &disk_key, 1);
47258c2ecf20Sopenharmony_ci	}
47268c2ecf20Sopenharmony_ci
47278c2ecf20Sopenharmony_ci	item = btrfs_item_nr(slot);
47288c2ecf20Sopenharmony_ci	btrfs_set_item_size(leaf, item, new_size);
47298c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(leaf);
47308c2ecf20Sopenharmony_ci
47318c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < 0) {
47328c2ecf20Sopenharmony_ci		btrfs_print_leaf(leaf);
47338c2ecf20Sopenharmony_ci		BUG();
47348c2ecf20Sopenharmony_ci	}
47358c2ecf20Sopenharmony_ci}
47368c2ecf20Sopenharmony_ci
47378c2ecf20Sopenharmony_ci/*
47388c2ecf20Sopenharmony_ci * make the item pointed to by the path bigger, data_size is the added size.
47398c2ecf20Sopenharmony_ci */
47408c2ecf20Sopenharmony_civoid btrfs_extend_item(struct btrfs_path *path, u32 data_size)
47418c2ecf20Sopenharmony_ci{
47428c2ecf20Sopenharmony_ci	int slot;
47438c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
47448c2ecf20Sopenharmony_ci	struct btrfs_item *item;
47458c2ecf20Sopenharmony_ci	u32 nritems;
47468c2ecf20Sopenharmony_ci	unsigned int data_end;
47478c2ecf20Sopenharmony_ci	unsigned int old_data;
47488c2ecf20Sopenharmony_ci	unsigned int old_size;
47498c2ecf20Sopenharmony_ci	int i;
47508c2ecf20Sopenharmony_ci	struct btrfs_map_token token;
47518c2ecf20Sopenharmony_ci
47528c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
47538c2ecf20Sopenharmony_ci
47548c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
47558c2ecf20Sopenharmony_ci	data_end = leaf_data_end(leaf);
47568c2ecf20Sopenharmony_ci
47578c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < data_size) {
47588c2ecf20Sopenharmony_ci		btrfs_print_leaf(leaf);
47598c2ecf20Sopenharmony_ci		BUG();
47608c2ecf20Sopenharmony_ci	}
47618c2ecf20Sopenharmony_ci	slot = path->slots[0];
47628c2ecf20Sopenharmony_ci	old_data = btrfs_item_end_nr(leaf, slot);
47638c2ecf20Sopenharmony_ci
47648c2ecf20Sopenharmony_ci	BUG_ON(slot < 0);
47658c2ecf20Sopenharmony_ci	if (slot >= nritems) {
47668c2ecf20Sopenharmony_ci		btrfs_print_leaf(leaf);
47678c2ecf20Sopenharmony_ci		btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d",
47688c2ecf20Sopenharmony_ci			   slot, nritems);
47698c2ecf20Sopenharmony_ci		BUG();
47708c2ecf20Sopenharmony_ci	}
47718c2ecf20Sopenharmony_ci
47728c2ecf20Sopenharmony_ci	/*
47738c2ecf20Sopenharmony_ci	 * item0..itemN ... dataN.offset..dataN.size .. data0.size
47748c2ecf20Sopenharmony_ci	 */
47758c2ecf20Sopenharmony_ci	/* first correct the data pointers */
47768c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, leaf);
47778c2ecf20Sopenharmony_ci	for (i = slot; i < nritems; i++) {
47788c2ecf20Sopenharmony_ci		u32 ioff;
47798c2ecf20Sopenharmony_ci		item = btrfs_item_nr(i);
47808c2ecf20Sopenharmony_ci
47818c2ecf20Sopenharmony_ci		ioff = btrfs_token_item_offset(&token, item);
47828c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item, ioff - data_size);
47838c2ecf20Sopenharmony_ci	}
47848c2ecf20Sopenharmony_ci
47858c2ecf20Sopenharmony_ci	/* shift the data */
47868c2ecf20Sopenharmony_ci	memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
47878c2ecf20Sopenharmony_ci		      data_end - data_size, BTRFS_LEAF_DATA_OFFSET +
47888c2ecf20Sopenharmony_ci		      data_end, old_data - data_end);
47898c2ecf20Sopenharmony_ci
47908c2ecf20Sopenharmony_ci	data_end = old_data;
47918c2ecf20Sopenharmony_ci	old_size = btrfs_item_size_nr(leaf, slot);
47928c2ecf20Sopenharmony_ci	item = btrfs_item_nr(slot);
47938c2ecf20Sopenharmony_ci	btrfs_set_item_size(leaf, item, old_size + data_size);
47948c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(leaf);
47958c2ecf20Sopenharmony_ci
47968c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < 0) {
47978c2ecf20Sopenharmony_ci		btrfs_print_leaf(leaf);
47988c2ecf20Sopenharmony_ci		BUG();
47998c2ecf20Sopenharmony_ci	}
48008c2ecf20Sopenharmony_ci}
48018c2ecf20Sopenharmony_ci
48028c2ecf20Sopenharmony_ci/**
48038c2ecf20Sopenharmony_ci * setup_items_for_insert - Helper called before inserting one or more items
48048c2ecf20Sopenharmony_ci * to a leaf. Main purpose is to save stack depth by doing the bulk of the work
48058c2ecf20Sopenharmony_ci * in a function that doesn't call btrfs_search_slot
48068c2ecf20Sopenharmony_ci *
48078c2ecf20Sopenharmony_ci * @root:	root we are inserting items to
48088c2ecf20Sopenharmony_ci * @path:	points to the leaf/slot where we are going to insert new items
48098c2ecf20Sopenharmony_ci * @cpu_key:	array of keys for items to be inserted
48108c2ecf20Sopenharmony_ci * @data_size:	size of the body of each item we are going to insert
48118c2ecf20Sopenharmony_ci * @nr:		size of @cpu_key/@data_size arrays
48128c2ecf20Sopenharmony_ci */
48138c2ecf20Sopenharmony_civoid setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
48148c2ecf20Sopenharmony_ci			    const struct btrfs_key *cpu_key, u32 *data_size,
48158c2ecf20Sopenharmony_ci			    int nr)
48168c2ecf20Sopenharmony_ci{
48178c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
48188c2ecf20Sopenharmony_ci	struct btrfs_item *item;
48198c2ecf20Sopenharmony_ci	int i;
48208c2ecf20Sopenharmony_ci	u32 nritems;
48218c2ecf20Sopenharmony_ci	unsigned int data_end;
48228c2ecf20Sopenharmony_ci	struct btrfs_disk_key disk_key;
48238c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
48248c2ecf20Sopenharmony_ci	int slot;
48258c2ecf20Sopenharmony_ci	struct btrfs_map_token token;
48268c2ecf20Sopenharmony_ci	u32 total_size;
48278c2ecf20Sopenharmony_ci	u32 total_data = 0;
48288c2ecf20Sopenharmony_ci
48298c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++)
48308c2ecf20Sopenharmony_ci		total_data += data_size[i];
48318c2ecf20Sopenharmony_ci	total_size = total_data + (nr * sizeof(struct btrfs_item));
48328c2ecf20Sopenharmony_ci
48338c2ecf20Sopenharmony_ci	if (path->slots[0] == 0) {
48348c2ecf20Sopenharmony_ci		btrfs_cpu_key_to_disk(&disk_key, cpu_key);
48358c2ecf20Sopenharmony_ci		fixup_low_keys(path, &disk_key, 1);
48368c2ecf20Sopenharmony_ci	}
48378c2ecf20Sopenharmony_ci	btrfs_unlock_up_safe(path, 1);
48388c2ecf20Sopenharmony_ci
48398c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
48408c2ecf20Sopenharmony_ci	slot = path->slots[0];
48418c2ecf20Sopenharmony_ci
48428c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
48438c2ecf20Sopenharmony_ci	data_end = leaf_data_end(leaf);
48448c2ecf20Sopenharmony_ci
48458c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < total_size) {
48468c2ecf20Sopenharmony_ci		btrfs_print_leaf(leaf);
48478c2ecf20Sopenharmony_ci		btrfs_crit(fs_info, "not enough freespace need %u have %d",
48488c2ecf20Sopenharmony_ci			   total_size, btrfs_leaf_free_space(leaf));
48498c2ecf20Sopenharmony_ci		BUG();
48508c2ecf20Sopenharmony_ci	}
48518c2ecf20Sopenharmony_ci
48528c2ecf20Sopenharmony_ci	btrfs_init_map_token(&token, leaf);
48538c2ecf20Sopenharmony_ci	if (slot != nritems) {
48548c2ecf20Sopenharmony_ci		unsigned int old_data = btrfs_item_end_nr(leaf, slot);
48558c2ecf20Sopenharmony_ci
48568c2ecf20Sopenharmony_ci		if (old_data < data_end) {
48578c2ecf20Sopenharmony_ci			btrfs_print_leaf(leaf);
48588c2ecf20Sopenharmony_ci			btrfs_crit(fs_info,
48598c2ecf20Sopenharmony_ci		"item at slot %d with data offset %u beyond data end of leaf %u",
48608c2ecf20Sopenharmony_ci				   slot, old_data, data_end);
48618c2ecf20Sopenharmony_ci			BUG();
48628c2ecf20Sopenharmony_ci		}
48638c2ecf20Sopenharmony_ci		/*
48648c2ecf20Sopenharmony_ci		 * item0..itemN ... dataN.offset..dataN.size .. data0.size
48658c2ecf20Sopenharmony_ci		 */
48668c2ecf20Sopenharmony_ci		/* first correct the data pointers */
48678c2ecf20Sopenharmony_ci		for (i = slot; i < nritems; i++) {
48688c2ecf20Sopenharmony_ci			u32 ioff;
48698c2ecf20Sopenharmony_ci
48708c2ecf20Sopenharmony_ci			item = btrfs_item_nr(i);
48718c2ecf20Sopenharmony_ci			ioff = btrfs_token_item_offset(&token, item);
48728c2ecf20Sopenharmony_ci			btrfs_set_token_item_offset(&token, item,
48738c2ecf20Sopenharmony_ci						    ioff - total_data);
48748c2ecf20Sopenharmony_ci		}
48758c2ecf20Sopenharmony_ci		/* shift the items */
48768c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
48778c2ecf20Sopenharmony_ci			      btrfs_item_nr_offset(slot),
48788c2ecf20Sopenharmony_ci			      (nritems - slot) * sizeof(struct btrfs_item));
48798c2ecf20Sopenharmony_ci
48808c2ecf20Sopenharmony_ci		/* shift the data */
48818c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
48828c2ecf20Sopenharmony_ci			      data_end - total_data, BTRFS_LEAF_DATA_OFFSET +
48838c2ecf20Sopenharmony_ci			      data_end, old_data - data_end);
48848c2ecf20Sopenharmony_ci		data_end = old_data;
48858c2ecf20Sopenharmony_ci	}
48868c2ecf20Sopenharmony_ci
48878c2ecf20Sopenharmony_ci	/* setup the item for the new data */
48888c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++) {
48898c2ecf20Sopenharmony_ci		btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
48908c2ecf20Sopenharmony_ci		btrfs_set_item_key(leaf, &disk_key, slot + i);
48918c2ecf20Sopenharmony_ci		item = btrfs_item_nr(slot + i);
48928c2ecf20Sopenharmony_ci		data_end -= data_size[i];
48938c2ecf20Sopenharmony_ci		btrfs_set_token_item_offset(&token, item, data_end);
48948c2ecf20Sopenharmony_ci		btrfs_set_token_item_size(&token, item, data_size[i]);
48958c2ecf20Sopenharmony_ci	}
48968c2ecf20Sopenharmony_ci
48978c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(leaf, nritems + nr);
48988c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(leaf);
48998c2ecf20Sopenharmony_ci
49008c2ecf20Sopenharmony_ci	if (btrfs_leaf_free_space(leaf) < 0) {
49018c2ecf20Sopenharmony_ci		btrfs_print_leaf(leaf);
49028c2ecf20Sopenharmony_ci		BUG();
49038c2ecf20Sopenharmony_ci	}
49048c2ecf20Sopenharmony_ci}
49058c2ecf20Sopenharmony_ci
49068c2ecf20Sopenharmony_ci/*
49078c2ecf20Sopenharmony_ci * Given a key and some data, insert items into the tree.
49088c2ecf20Sopenharmony_ci * This does all the path init required, making room in the tree if needed.
49098c2ecf20Sopenharmony_ci */
49108c2ecf20Sopenharmony_ciint btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
49118c2ecf20Sopenharmony_ci			    struct btrfs_root *root,
49128c2ecf20Sopenharmony_ci			    struct btrfs_path *path,
49138c2ecf20Sopenharmony_ci			    const struct btrfs_key *cpu_key, u32 *data_size,
49148c2ecf20Sopenharmony_ci			    int nr)
49158c2ecf20Sopenharmony_ci{
49168c2ecf20Sopenharmony_ci	int ret = 0;
49178c2ecf20Sopenharmony_ci	int slot;
49188c2ecf20Sopenharmony_ci	int i;
49198c2ecf20Sopenharmony_ci	u32 total_size = 0;
49208c2ecf20Sopenharmony_ci	u32 total_data = 0;
49218c2ecf20Sopenharmony_ci
49228c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++)
49238c2ecf20Sopenharmony_ci		total_data += data_size[i];
49248c2ecf20Sopenharmony_ci
49258c2ecf20Sopenharmony_ci	total_size = total_data + (nr * sizeof(struct btrfs_item));
49268c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
49278c2ecf20Sopenharmony_ci	if (ret == 0)
49288c2ecf20Sopenharmony_ci		return -EEXIST;
49298c2ecf20Sopenharmony_ci	if (ret < 0)
49308c2ecf20Sopenharmony_ci		return ret;
49318c2ecf20Sopenharmony_ci
49328c2ecf20Sopenharmony_ci	slot = path->slots[0];
49338c2ecf20Sopenharmony_ci	BUG_ON(slot < 0);
49348c2ecf20Sopenharmony_ci
49358c2ecf20Sopenharmony_ci	setup_items_for_insert(root, path, cpu_key, data_size, nr);
49368c2ecf20Sopenharmony_ci	return 0;
49378c2ecf20Sopenharmony_ci}
49388c2ecf20Sopenharmony_ci
49398c2ecf20Sopenharmony_ci/*
49408c2ecf20Sopenharmony_ci * Given a key and some data, insert an item into the tree.
49418c2ecf20Sopenharmony_ci * This does all the path init required, making room in the tree if needed.
49428c2ecf20Sopenharmony_ci */
49438c2ecf20Sopenharmony_ciint btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
49448c2ecf20Sopenharmony_ci		      const struct btrfs_key *cpu_key, void *data,
49458c2ecf20Sopenharmony_ci		      u32 data_size)
49468c2ecf20Sopenharmony_ci{
49478c2ecf20Sopenharmony_ci	int ret = 0;
49488c2ecf20Sopenharmony_ci	struct btrfs_path *path;
49498c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
49508c2ecf20Sopenharmony_ci	unsigned long ptr;
49518c2ecf20Sopenharmony_ci
49528c2ecf20Sopenharmony_ci	path = btrfs_alloc_path();
49538c2ecf20Sopenharmony_ci	if (!path)
49548c2ecf20Sopenharmony_ci		return -ENOMEM;
49558c2ecf20Sopenharmony_ci	ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
49568c2ecf20Sopenharmony_ci	if (!ret) {
49578c2ecf20Sopenharmony_ci		leaf = path->nodes[0];
49588c2ecf20Sopenharmony_ci		ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
49598c2ecf20Sopenharmony_ci		write_extent_buffer(leaf, data, ptr, data_size);
49608c2ecf20Sopenharmony_ci		btrfs_mark_buffer_dirty(leaf);
49618c2ecf20Sopenharmony_ci	}
49628c2ecf20Sopenharmony_ci	btrfs_free_path(path);
49638c2ecf20Sopenharmony_ci	return ret;
49648c2ecf20Sopenharmony_ci}
49658c2ecf20Sopenharmony_ci
49668c2ecf20Sopenharmony_ci/*
49678c2ecf20Sopenharmony_ci * delete the pointer from a given node.
49688c2ecf20Sopenharmony_ci *
49698c2ecf20Sopenharmony_ci * the tree should have been previously balanced so the deletion does not
49708c2ecf20Sopenharmony_ci * empty a node.
49718c2ecf20Sopenharmony_ci */
49728c2ecf20Sopenharmony_cistatic void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
49738c2ecf20Sopenharmony_ci		    int level, int slot)
49748c2ecf20Sopenharmony_ci{
49758c2ecf20Sopenharmony_ci	struct extent_buffer *parent = path->nodes[level];
49768c2ecf20Sopenharmony_ci	u32 nritems;
49778c2ecf20Sopenharmony_ci	int ret;
49788c2ecf20Sopenharmony_ci
49798c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(parent);
49808c2ecf20Sopenharmony_ci	if (slot != nritems - 1) {
49818c2ecf20Sopenharmony_ci		if (level) {
49828c2ecf20Sopenharmony_ci			ret = tree_mod_log_insert_move(parent, slot, slot + 1,
49838c2ecf20Sopenharmony_ci					nritems - slot - 1);
49848c2ecf20Sopenharmony_ci			BUG_ON(ret < 0);
49858c2ecf20Sopenharmony_ci		}
49868c2ecf20Sopenharmony_ci		memmove_extent_buffer(parent,
49878c2ecf20Sopenharmony_ci			      btrfs_node_key_ptr_offset(slot),
49888c2ecf20Sopenharmony_ci			      btrfs_node_key_ptr_offset(slot + 1),
49898c2ecf20Sopenharmony_ci			      sizeof(struct btrfs_key_ptr) *
49908c2ecf20Sopenharmony_ci			      (nritems - slot - 1));
49918c2ecf20Sopenharmony_ci	} else if (level) {
49928c2ecf20Sopenharmony_ci		ret = tree_mod_log_insert_key(parent, slot, MOD_LOG_KEY_REMOVE,
49938c2ecf20Sopenharmony_ci				GFP_NOFS);
49948c2ecf20Sopenharmony_ci		BUG_ON(ret < 0);
49958c2ecf20Sopenharmony_ci	}
49968c2ecf20Sopenharmony_ci
49978c2ecf20Sopenharmony_ci	nritems--;
49988c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(parent, nritems);
49998c2ecf20Sopenharmony_ci	if (nritems == 0 && parent == root->node) {
50008c2ecf20Sopenharmony_ci		BUG_ON(btrfs_header_level(root->node) != 1);
50018c2ecf20Sopenharmony_ci		/* just turn the root into a leaf and break */
50028c2ecf20Sopenharmony_ci		btrfs_set_header_level(root->node, 0);
50038c2ecf20Sopenharmony_ci	} else if (slot == 0) {
50048c2ecf20Sopenharmony_ci		struct btrfs_disk_key disk_key;
50058c2ecf20Sopenharmony_ci
50068c2ecf20Sopenharmony_ci		btrfs_node_key(parent, &disk_key, 0);
50078c2ecf20Sopenharmony_ci		fixup_low_keys(path, &disk_key, level + 1);
50088c2ecf20Sopenharmony_ci	}
50098c2ecf20Sopenharmony_ci	btrfs_mark_buffer_dirty(parent);
50108c2ecf20Sopenharmony_ci}
50118c2ecf20Sopenharmony_ci
50128c2ecf20Sopenharmony_ci/*
50138c2ecf20Sopenharmony_ci * a helper function to delete the leaf pointed to by path->slots[1] and
50148c2ecf20Sopenharmony_ci * path->nodes[1].
50158c2ecf20Sopenharmony_ci *
50168c2ecf20Sopenharmony_ci * This deletes the pointer in path->nodes[1] and frees the leaf
50178c2ecf20Sopenharmony_ci * block extent.  zero is returned if it all worked out, < 0 otherwise.
50188c2ecf20Sopenharmony_ci *
50198c2ecf20Sopenharmony_ci * The path must have already been setup for deleting the leaf, including
50208c2ecf20Sopenharmony_ci * all the proper balancing.  path->nodes[1] must be locked.
50218c2ecf20Sopenharmony_ci */
50228c2ecf20Sopenharmony_cistatic noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
50238c2ecf20Sopenharmony_ci				    struct btrfs_root *root,
50248c2ecf20Sopenharmony_ci				    struct btrfs_path *path,
50258c2ecf20Sopenharmony_ci				    struct extent_buffer *leaf)
50268c2ecf20Sopenharmony_ci{
50278c2ecf20Sopenharmony_ci	WARN_ON(btrfs_header_generation(leaf) != trans->transid);
50288c2ecf20Sopenharmony_ci	del_ptr(root, path, 1, path->slots[1]);
50298c2ecf20Sopenharmony_ci
50308c2ecf20Sopenharmony_ci	/*
50318c2ecf20Sopenharmony_ci	 * btrfs_free_extent is expensive, we want to make sure we
50328c2ecf20Sopenharmony_ci	 * aren't holding any locks when we call it
50338c2ecf20Sopenharmony_ci	 */
50348c2ecf20Sopenharmony_ci	btrfs_unlock_up_safe(path, 0);
50358c2ecf20Sopenharmony_ci
50368c2ecf20Sopenharmony_ci	root_sub_used(root, leaf->len);
50378c2ecf20Sopenharmony_ci
50388c2ecf20Sopenharmony_ci	atomic_inc(&leaf->refs);
50398c2ecf20Sopenharmony_ci	btrfs_free_tree_block(trans, root, leaf, 0, 1);
50408c2ecf20Sopenharmony_ci	free_extent_buffer_stale(leaf);
50418c2ecf20Sopenharmony_ci}
50428c2ecf20Sopenharmony_ci/*
50438c2ecf20Sopenharmony_ci * delete the item at the leaf level in path.  If that empties
50448c2ecf20Sopenharmony_ci * the leaf, remove it from the tree
50458c2ecf20Sopenharmony_ci */
50468c2ecf20Sopenharmony_ciint btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
50478c2ecf20Sopenharmony_ci		    struct btrfs_path *path, int slot, int nr)
50488c2ecf20Sopenharmony_ci{
50498c2ecf20Sopenharmony_ci	struct btrfs_fs_info *fs_info = root->fs_info;
50508c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
50518c2ecf20Sopenharmony_ci	struct btrfs_item *item;
50528c2ecf20Sopenharmony_ci	u32 last_off;
50538c2ecf20Sopenharmony_ci	u32 dsize = 0;
50548c2ecf20Sopenharmony_ci	int ret = 0;
50558c2ecf20Sopenharmony_ci	int wret;
50568c2ecf20Sopenharmony_ci	int i;
50578c2ecf20Sopenharmony_ci	u32 nritems;
50588c2ecf20Sopenharmony_ci
50598c2ecf20Sopenharmony_ci	leaf = path->nodes[0];
50608c2ecf20Sopenharmony_ci	last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
50618c2ecf20Sopenharmony_ci
50628c2ecf20Sopenharmony_ci	for (i = 0; i < nr; i++)
50638c2ecf20Sopenharmony_ci		dsize += btrfs_item_size_nr(leaf, slot + i);
50648c2ecf20Sopenharmony_ci
50658c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(leaf);
50668c2ecf20Sopenharmony_ci
50678c2ecf20Sopenharmony_ci	if (slot + nr != nritems) {
50688c2ecf20Sopenharmony_ci		int data_end = leaf_data_end(leaf);
50698c2ecf20Sopenharmony_ci		struct btrfs_map_token token;
50708c2ecf20Sopenharmony_ci
50718c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
50728c2ecf20Sopenharmony_ci			      data_end + dsize,
50738c2ecf20Sopenharmony_ci			      BTRFS_LEAF_DATA_OFFSET + data_end,
50748c2ecf20Sopenharmony_ci			      last_off - data_end);
50758c2ecf20Sopenharmony_ci
50768c2ecf20Sopenharmony_ci		btrfs_init_map_token(&token, leaf);
50778c2ecf20Sopenharmony_ci		for (i = slot + nr; i < nritems; i++) {
50788c2ecf20Sopenharmony_ci			u32 ioff;
50798c2ecf20Sopenharmony_ci
50808c2ecf20Sopenharmony_ci			item = btrfs_item_nr(i);
50818c2ecf20Sopenharmony_ci			ioff = btrfs_token_item_offset(&token, item);
50828c2ecf20Sopenharmony_ci			btrfs_set_token_item_offset(&token, item, ioff + dsize);
50838c2ecf20Sopenharmony_ci		}
50848c2ecf20Sopenharmony_ci
50858c2ecf20Sopenharmony_ci		memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
50868c2ecf20Sopenharmony_ci			      btrfs_item_nr_offset(slot + nr),
50878c2ecf20Sopenharmony_ci			      sizeof(struct btrfs_item) *
50888c2ecf20Sopenharmony_ci			      (nritems - slot - nr));
50898c2ecf20Sopenharmony_ci	}
50908c2ecf20Sopenharmony_ci	btrfs_set_header_nritems(leaf, nritems - nr);
50918c2ecf20Sopenharmony_ci	nritems -= nr;
50928c2ecf20Sopenharmony_ci
50938c2ecf20Sopenharmony_ci	/* delete the leaf if we've emptied it */
50948c2ecf20Sopenharmony_ci	if (nritems == 0) {
50958c2ecf20Sopenharmony_ci		if (leaf == root->node) {
50968c2ecf20Sopenharmony_ci			btrfs_set_header_level(leaf, 0);
50978c2ecf20Sopenharmony_ci		} else {
50988c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(path);
50998c2ecf20Sopenharmony_ci			btrfs_clean_tree_block(leaf);
51008c2ecf20Sopenharmony_ci			btrfs_del_leaf(trans, root, path, leaf);
51018c2ecf20Sopenharmony_ci		}
51028c2ecf20Sopenharmony_ci	} else {
51038c2ecf20Sopenharmony_ci		int used = leaf_space_used(leaf, 0, nritems);
51048c2ecf20Sopenharmony_ci		if (slot == 0) {
51058c2ecf20Sopenharmony_ci			struct btrfs_disk_key disk_key;
51068c2ecf20Sopenharmony_ci
51078c2ecf20Sopenharmony_ci			btrfs_item_key(leaf, &disk_key, 0);
51088c2ecf20Sopenharmony_ci			fixup_low_keys(path, &disk_key, 1);
51098c2ecf20Sopenharmony_ci		}
51108c2ecf20Sopenharmony_ci
51118c2ecf20Sopenharmony_ci		/* delete the leaf if it is mostly empty */
51128c2ecf20Sopenharmony_ci		if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
51138c2ecf20Sopenharmony_ci			/* push_leaf_left fixes the path.
51148c2ecf20Sopenharmony_ci			 * make sure the path still points to our leaf
51158c2ecf20Sopenharmony_ci			 * for possible call to del_ptr below
51168c2ecf20Sopenharmony_ci			 */
51178c2ecf20Sopenharmony_ci			slot = path->slots[1];
51188c2ecf20Sopenharmony_ci			atomic_inc(&leaf->refs);
51198c2ecf20Sopenharmony_ci
51208c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(path);
51218c2ecf20Sopenharmony_ci			wret = push_leaf_left(trans, root, path, 1, 1,
51228c2ecf20Sopenharmony_ci					      1, (u32)-1);
51238c2ecf20Sopenharmony_ci			if (wret < 0 && wret != -ENOSPC)
51248c2ecf20Sopenharmony_ci				ret = wret;
51258c2ecf20Sopenharmony_ci
51268c2ecf20Sopenharmony_ci			if (path->nodes[0] == leaf &&
51278c2ecf20Sopenharmony_ci			    btrfs_header_nritems(leaf)) {
51288c2ecf20Sopenharmony_ci				wret = push_leaf_right(trans, root, path, 1,
51298c2ecf20Sopenharmony_ci						       1, 1, 0);
51308c2ecf20Sopenharmony_ci				if (wret < 0 && wret != -ENOSPC)
51318c2ecf20Sopenharmony_ci					ret = wret;
51328c2ecf20Sopenharmony_ci			}
51338c2ecf20Sopenharmony_ci
51348c2ecf20Sopenharmony_ci			if (btrfs_header_nritems(leaf) == 0) {
51358c2ecf20Sopenharmony_ci				path->slots[1] = slot;
51368c2ecf20Sopenharmony_ci				btrfs_del_leaf(trans, root, path, leaf);
51378c2ecf20Sopenharmony_ci				free_extent_buffer(leaf);
51388c2ecf20Sopenharmony_ci				ret = 0;
51398c2ecf20Sopenharmony_ci			} else {
51408c2ecf20Sopenharmony_ci				/* if we're still in the path, make sure
51418c2ecf20Sopenharmony_ci				 * we're dirty.  Otherwise, one of the
51428c2ecf20Sopenharmony_ci				 * push_leaf functions must have already
51438c2ecf20Sopenharmony_ci				 * dirtied this buffer
51448c2ecf20Sopenharmony_ci				 */
51458c2ecf20Sopenharmony_ci				if (path->nodes[0] == leaf)
51468c2ecf20Sopenharmony_ci					btrfs_mark_buffer_dirty(leaf);
51478c2ecf20Sopenharmony_ci				free_extent_buffer(leaf);
51488c2ecf20Sopenharmony_ci			}
51498c2ecf20Sopenharmony_ci		} else {
51508c2ecf20Sopenharmony_ci			btrfs_mark_buffer_dirty(leaf);
51518c2ecf20Sopenharmony_ci		}
51528c2ecf20Sopenharmony_ci	}
51538c2ecf20Sopenharmony_ci	return ret;
51548c2ecf20Sopenharmony_ci}
51558c2ecf20Sopenharmony_ci
51568c2ecf20Sopenharmony_ci/*
51578c2ecf20Sopenharmony_ci * search the tree again to find a leaf with lesser keys
51588c2ecf20Sopenharmony_ci * returns 0 if it found something or 1 if there are no lesser leaves.
51598c2ecf20Sopenharmony_ci * returns < 0 on io errors.
51608c2ecf20Sopenharmony_ci *
51618c2ecf20Sopenharmony_ci * This may release the path, and so you may lose any locks held at the
51628c2ecf20Sopenharmony_ci * time you call it.
51638c2ecf20Sopenharmony_ci */
51648c2ecf20Sopenharmony_ciint btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
51658c2ecf20Sopenharmony_ci{
51668c2ecf20Sopenharmony_ci	struct btrfs_key key;
51678c2ecf20Sopenharmony_ci	struct btrfs_key orig_key;
51688c2ecf20Sopenharmony_ci	struct btrfs_disk_key found_key;
51698c2ecf20Sopenharmony_ci	int ret;
51708c2ecf20Sopenharmony_ci
51718c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
51728c2ecf20Sopenharmony_ci	orig_key = key;
51738c2ecf20Sopenharmony_ci
51748c2ecf20Sopenharmony_ci	if (key.offset > 0) {
51758c2ecf20Sopenharmony_ci		key.offset--;
51768c2ecf20Sopenharmony_ci	} else if (key.type > 0) {
51778c2ecf20Sopenharmony_ci		key.type--;
51788c2ecf20Sopenharmony_ci		key.offset = (u64)-1;
51798c2ecf20Sopenharmony_ci	} else if (key.objectid > 0) {
51808c2ecf20Sopenharmony_ci		key.objectid--;
51818c2ecf20Sopenharmony_ci		key.type = (u8)-1;
51828c2ecf20Sopenharmony_ci		key.offset = (u64)-1;
51838c2ecf20Sopenharmony_ci	} else {
51848c2ecf20Sopenharmony_ci		return 1;
51858c2ecf20Sopenharmony_ci	}
51868c2ecf20Sopenharmony_ci
51878c2ecf20Sopenharmony_ci	btrfs_release_path(path);
51888c2ecf20Sopenharmony_ci	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
51898c2ecf20Sopenharmony_ci	if (ret <= 0)
51908c2ecf20Sopenharmony_ci		return ret;
51918c2ecf20Sopenharmony_ci
51928c2ecf20Sopenharmony_ci	/*
51938c2ecf20Sopenharmony_ci	 * Previous key not found. Even if we were at slot 0 of the leaf we had
51948c2ecf20Sopenharmony_ci	 * before releasing the path and calling btrfs_search_slot(), we now may
51958c2ecf20Sopenharmony_ci	 * be in a slot pointing to the same original key - this can happen if
51968c2ecf20Sopenharmony_ci	 * after we released the path, one of more items were moved from a
51978c2ecf20Sopenharmony_ci	 * sibling leaf into the front of the leaf we had due to an insertion
51988c2ecf20Sopenharmony_ci	 * (see push_leaf_right()).
51998c2ecf20Sopenharmony_ci	 * If we hit this case and our slot is > 0 and just decrement the slot
52008c2ecf20Sopenharmony_ci	 * so that the caller does not process the same key again, which may or
52018c2ecf20Sopenharmony_ci	 * may not break the caller, depending on its logic.
52028c2ecf20Sopenharmony_ci	 */
52038c2ecf20Sopenharmony_ci	if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
52048c2ecf20Sopenharmony_ci		btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
52058c2ecf20Sopenharmony_ci		ret = comp_keys(&found_key, &orig_key);
52068c2ecf20Sopenharmony_ci		if (ret == 0) {
52078c2ecf20Sopenharmony_ci			if (path->slots[0] > 0) {
52088c2ecf20Sopenharmony_ci				path->slots[0]--;
52098c2ecf20Sopenharmony_ci				return 0;
52108c2ecf20Sopenharmony_ci			}
52118c2ecf20Sopenharmony_ci			/*
52128c2ecf20Sopenharmony_ci			 * At slot 0, same key as before, it means orig_key is
52138c2ecf20Sopenharmony_ci			 * the lowest, leftmost, key in the tree. We're done.
52148c2ecf20Sopenharmony_ci			 */
52158c2ecf20Sopenharmony_ci			return 1;
52168c2ecf20Sopenharmony_ci		}
52178c2ecf20Sopenharmony_ci	}
52188c2ecf20Sopenharmony_ci
52198c2ecf20Sopenharmony_ci	btrfs_item_key(path->nodes[0], &found_key, 0);
52208c2ecf20Sopenharmony_ci	ret = comp_keys(&found_key, &key);
52218c2ecf20Sopenharmony_ci	/*
52228c2ecf20Sopenharmony_ci	 * We might have had an item with the previous key in the tree right
52238c2ecf20Sopenharmony_ci	 * before we released our path. And after we released our path, that
52248c2ecf20Sopenharmony_ci	 * item might have been pushed to the first slot (0) of the leaf we
52258c2ecf20Sopenharmony_ci	 * were holding due to a tree balance. Alternatively, an item with the
52268c2ecf20Sopenharmony_ci	 * previous key can exist as the only element of a leaf (big fat item).
52278c2ecf20Sopenharmony_ci	 * Therefore account for these 2 cases, so that our callers (like
52288c2ecf20Sopenharmony_ci	 * btrfs_previous_item) don't miss an existing item with a key matching
52298c2ecf20Sopenharmony_ci	 * the previous key we computed above.
52308c2ecf20Sopenharmony_ci	 */
52318c2ecf20Sopenharmony_ci	if (ret <= 0)
52328c2ecf20Sopenharmony_ci		return 0;
52338c2ecf20Sopenharmony_ci	return 1;
52348c2ecf20Sopenharmony_ci}
52358c2ecf20Sopenharmony_ci
52368c2ecf20Sopenharmony_ci/*
52378c2ecf20Sopenharmony_ci * A helper function to walk down the tree starting at min_key, and looking
52388c2ecf20Sopenharmony_ci * for nodes or leaves that are have a minimum transaction id.
52398c2ecf20Sopenharmony_ci * This is used by the btree defrag code, and tree logging
52408c2ecf20Sopenharmony_ci *
52418c2ecf20Sopenharmony_ci * This does not cow, but it does stuff the starting key it finds back
52428c2ecf20Sopenharmony_ci * into min_key, so you can call btrfs_search_slot with cow=1 on the
52438c2ecf20Sopenharmony_ci * key and get a writable path.
52448c2ecf20Sopenharmony_ci *
52458c2ecf20Sopenharmony_ci * This honors path->lowest_level to prevent descent past a given level
52468c2ecf20Sopenharmony_ci * of the tree.
52478c2ecf20Sopenharmony_ci *
52488c2ecf20Sopenharmony_ci * min_trans indicates the oldest transaction that you are interested
52498c2ecf20Sopenharmony_ci * in walking through.  Any nodes or leaves older than min_trans are
52508c2ecf20Sopenharmony_ci * skipped over (without reading them).
52518c2ecf20Sopenharmony_ci *
52528c2ecf20Sopenharmony_ci * returns zero if something useful was found, < 0 on error and 1 if there
52538c2ecf20Sopenharmony_ci * was nothing in the tree that matched the search criteria.
52548c2ecf20Sopenharmony_ci */
52558c2ecf20Sopenharmony_ciint btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
52568c2ecf20Sopenharmony_ci			 struct btrfs_path *path,
52578c2ecf20Sopenharmony_ci			 u64 min_trans)
52588c2ecf20Sopenharmony_ci{
52598c2ecf20Sopenharmony_ci	struct extent_buffer *cur;
52608c2ecf20Sopenharmony_ci	struct btrfs_key found_key;
52618c2ecf20Sopenharmony_ci	int slot;
52628c2ecf20Sopenharmony_ci	int sret;
52638c2ecf20Sopenharmony_ci	u32 nritems;
52648c2ecf20Sopenharmony_ci	int level;
52658c2ecf20Sopenharmony_ci	int ret = 1;
52668c2ecf20Sopenharmony_ci	int keep_locks = path->keep_locks;
52678c2ecf20Sopenharmony_ci
52688c2ecf20Sopenharmony_ci	path->keep_locks = 1;
52698c2ecf20Sopenharmony_ciagain:
52708c2ecf20Sopenharmony_ci	cur = btrfs_read_lock_root_node(root);
52718c2ecf20Sopenharmony_ci	level = btrfs_header_level(cur);
52728c2ecf20Sopenharmony_ci	WARN_ON(path->nodes[level]);
52738c2ecf20Sopenharmony_ci	path->nodes[level] = cur;
52748c2ecf20Sopenharmony_ci	path->locks[level] = BTRFS_READ_LOCK;
52758c2ecf20Sopenharmony_ci
52768c2ecf20Sopenharmony_ci	if (btrfs_header_generation(cur) < min_trans) {
52778c2ecf20Sopenharmony_ci		ret = 1;
52788c2ecf20Sopenharmony_ci		goto out;
52798c2ecf20Sopenharmony_ci	}
52808c2ecf20Sopenharmony_ci	while (1) {
52818c2ecf20Sopenharmony_ci		nritems = btrfs_header_nritems(cur);
52828c2ecf20Sopenharmony_ci		level = btrfs_header_level(cur);
52838c2ecf20Sopenharmony_ci		sret = btrfs_bin_search(cur, min_key, &slot);
52848c2ecf20Sopenharmony_ci		if (sret < 0) {
52858c2ecf20Sopenharmony_ci			ret = sret;
52868c2ecf20Sopenharmony_ci			goto out;
52878c2ecf20Sopenharmony_ci		}
52888c2ecf20Sopenharmony_ci
52898c2ecf20Sopenharmony_ci		/* at the lowest level, we're done, setup the path and exit */
52908c2ecf20Sopenharmony_ci		if (level == path->lowest_level) {
52918c2ecf20Sopenharmony_ci			if (slot >= nritems)
52928c2ecf20Sopenharmony_ci				goto find_next_key;
52938c2ecf20Sopenharmony_ci			ret = 0;
52948c2ecf20Sopenharmony_ci			path->slots[level] = slot;
52958c2ecf20Sopenharmony_ci			btrfs_item_key_to_cpu(cur, &found_key, slot);
52968c2ecf20Sopenharmony_ci			goto out;
52978c2ecf20Sopenharmony_ci		}
52988c2ecf20Sopenharmony_ci		if (sret && slot > 0)
52998c2ecf20Sopenharmony_ci			slot--;
53008c2ecf20Sopenharmony_ci		/*
53018c2ecf20Sopenharmony_ci		 * check this node pointer against the min_trans parameters.
53028c2ecf20Sopenharmony_ci		 * If it is too old, skip to the next one.
53038c2ecf20Sopenharmony_ci		 */
53048c2ecf20Sopenharmony_ci		while (slot < nritems) {
53058c2ecf20Sopenharmony_ci			u64 gen;
53068c2ecf20Sopenharmony_ci
53078c2ecf20Sopenharmony_ci			gen = btrfs_node_ptr_generation(cur, slot);
53088c2ecf20Sopenharmony_ci			if (gen < min_trans) {
53098c2ecf20Sopenharmony_ci				slot++;
53108c2ecf20Sopenharmony_ci				continue;
53118c2ecf20Sopenharmony_ci			}
53128c2ecf20Sopenharmony_ci			break;
53138c2ecf20Sopenharmony_ci		}
53148c2ecf20Sopenharmony_cifind_next_key:
53158c2ecf20Sopenharmony_ci		/*
53168c2ecf20Sopenharmony_ci		 * we didn't find a candidate key in this node, walk forward
53178c2ecf20Sopenharmony_ci		 * and find another one
53188c2ecf20Sopenharmony_ci		 */
53198c2ecf20Sopenharmony_ci		if (slot >= nritems) {
53208c2ecf20Sopenharmony_ci			path->slots[level] = slot;
53218c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(path);
53228c2ecf20Sopenharmony_ci			sret = btrfs_find_next_key(root, path, min_key, level,
53238c2ecf20Sopenharmony_ci						  min_trans);
53248c2ecf20Sopenharmony_ci			if (sret == 0) {
53258c2ecf20Sopenharmony_ci				btrfs_release_path(path);
53268c2ecf20Sopenharmony_ci				goto again;
53278c2ecf20Sopenharmony_ci			} else {
53288c2ecf20Sopenharmony_ci				goto out;
53298c2ecf20Sopenharmony_ci			}
53308c2ecf20Sopenharmony_ci		}
53318c2ecf20Sopenharmony_ci		/* save our key for returning back */
53328c2ecf20Sopenharmony_ci		btrfs_node_key_to_cpu(cur, &found_key, slot);
53338c2ecf20Sopenharmony_ci		path->slots[level] = slot;
53348c2ecf20Sopenharmony_ci		if (level == path->lowest_level) {
53358c2ecf20Sopenharmony_ci			ret = 0;
53368c2ecf20Sopenharmony_ci			goto out;
53378c2ecf20Sopenharmony_ci		}
53388c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(path);
53398c2ecf20Sopenharmony_ci		cur = btrfs_read_node_slot(cur, slot);
53408c2ecf20Sopenharmony_ci		if (IS_ERR(cur)) {
53418c2ecf20Sopenharmony_ci			ret = PTR_ERR(cur);
53428c2ecf20Sopenharmony_ci			goto out;
53438c2ecf20Sopenharmony_ci		}
53448c2ecf20Sopenharmony_ci
53458c2ecf20Sopenharmony_ci		btrfs_tree_read_lock(cur);
53468c2ecf20Sopenharmony_ci
53478c2ecf20Sopenharmony_ci		path->locks[level - 1] = BTRFS_READ_LOCK;
53488c2ecf20Sopenharmony_ci		path->nodes[level - 1] = cur;
53498c2ecf20Sopenharmony_ci		unlock_up(path, level, 1, 0, NULL);
53508c2ecf20Sopenharmony_ci	}
53518c2ecf20Sopenharmony_ciout:
53528c2ecf20Sopenharmony_ci	path->keep_locks = keep_locks;
53538c2ecf20Sopenharmony_ci	if (ret == 0) {
53548c2ecf20Sopenharmony_ci		btrfs_unlock_up_safe(path, path->lowest_level + 1);
53558c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(path);
53568c2ecf20Sopenharmony_ci		memcpy(min_key, &found_key, sizeof(found_key));
53578c2ecf20Sopenharmony_ci	}
53588c2ecf20Sopenharmony_ci	return ret;
53598c2ecf20Sopenharmony_ci}
53608c2ecf20Sopenharmony_ci
53618c2ecf20Sopenharmony_ci/*
53628c2ecf20Sopenharmony_ci * this is similar to btrfs_next_leaf, but does not try to preserve
53638c2ecf20Sopenharmony_ci * and fixup the path.  It looks for and returns the next key in the
53648c2ecf20Sopenharmony_ci * tree based on the current path and the min_trans parameters.
53658c2ecf20Sopenharmony_ci *
53668c2ecf20Sopenharmony_ci * 0 is returned if another key is found, < 0 if there are any errors
53678c2ecf20Sopenharmony_ci * and 1 is returned if there are no higher keys in the tree
53688c2ecf20Sopenharmony_ci *
53698c2ecf20Sopenharmony_ci * path->keep_locks should be set to 1 on the search made before
53708c2ecf20Sopenharmony_ci * calling this function.
53718c2ecf20Sopenharmony_ci */
53728c2ecf20Sopenharmony_ciint btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
53738c2ecf20Sopenharmony_ci			struct btrfs_key *key, int level, u64 min_trans)
53748c2ecf20Sopenharmony_ci{
53758c2ecf20Sopenharmony_ci	int slot;
53768c2ecf20Sopenharmony_ci	struct extent_buffer *c;
53778c2ecf20Sopenharmony_ci
53788c2ecf20Sopenharmony_ci	WARN_ON(!path->keep_locks && !path->skip_locking);
53798c2ecf20Sopenharmony_ci	while (level < BTRFS_MAX_LEVEL) {
53808c2ecf20Sopenharmony_ci		if (!path->nodes[level])
53818c2ecf20Sopenharmony_ci			return 1;
53828c2ecf20Sopenharmony_ci
53838c2ecf20Sopenharmony_ci		slot = path->slots[level] + 1;
53848c2ecf20Sopenharmony_ci		c = path->nodes[level];
53858c2ecf20Sopenharmony_cinext:
53868c2ecf20Sopenharmony_ci		if (slot >= btrfs_header_nritems(c)) {
53878c2ecf20Sopenharmony_ci			int ret;
53888c2ecf20Sopenharmony_ci			int orig_lowest;
53898c2ecf20Sopenharmony_ci			struct btrfs_key cur_key;
53908c2ecf20Sopenharmony_ci			if (level + 1 >= BTRFS_MAX_LEVEL ||
53918c2ecf20Sopenharmony_ci			    !path->nodes[level + 1])
53928c2ecf20Sopenharmony_ci				return 1;
53938c2ecf20Sopenharmony_ci
53948c2ecf20Sopenharmony_ci			if (path->locks[level + 1] || path->skip_locking) {
53958c2ecf20Sopenharmony_ci				level++;
53968c2ecf20Sopenharmony_ci				continue;
53978c2ecf20Sopenharmony_ci			}
53988c2ecf20Sopenharmony_ci
53998c2ecf20Sopenharmony_ci			slot = btrfs_header_nritems(c) - 1;
54008c2ecf20Sopenharmony_ci			if (level == 0)
54018c2ecf20Sopenharmony_ci				btrfs_item_key_to_cpu(c, &cur_key, slot);
54028c2ecf20Sopenharmony_ci			else
54038c2ecf20Sopenharmony_ci				btrfs_node_key_to_cpu(c, &cur_key, slot);
54048c2ecf20Sopenharmony_ci
54058c2ecf20Sopenharmony_ci			orig_lowest = path->lowest_level;
54068c2ecf20Sopenharmony_ci			btrfs_release_path(path);
54078c2ecf20Sopenharmony_ci			path->lowest_level = level;
54088c2ecf20Sopenharmony_ci			ret = btrfs_search_slot(NULL, root, &cur_key, path,
54098c2ecf20Sopenharmony_ci						0, 0);
54108c2ecf20Sopenharmony_ci			path->lowest_level = orig_lowest;
54118c2ecf20Sopenharmony_ci			if (ret < 0)
54128c2ecf20Sopenharmony_ci				return ret;
54138c2ecf20Sopenharmony_ci
54148c2ecf20Sopenharmony_ci			c = path->nodes[level];
54158c2ecf20Sopenharmony_ci			slot = path->slots[level];
54168c2ecf20Sopenharmony_ci			if (ret == 0)
54178c2ecf20Sopenharmony_ci				slot++;
54188c2ecf20Sopenharmony_ci			goto next;
54198c2ecf20Sopenharmony_ci		}
54208c2ecf20Sopenharmony_ci
54218c2ecf20Sopenharmony_ci		if (level == 0)
54228c2ecf20Sopenharmony_ci			btrfs_item_key_to_cpu(c, key, slot);
54238c2ecf20Sopenharmony_ci		else {
54248c2ecf20Sopenharmony_ci			u64 gen = btrfs_node_ptr_generation(c, slot);
54258c2ecf20Sopenharmony_ci
54268c2ecf20Sopenharmony_ci			if (gen < min_trans) {
54278c2ecf20Sopenharmony_ci				slot++;
54288c2ecf20Sopenharmony_ci				goto next;
54298c2ecf20Sopenharmony_ci			}
54308c2ecf20Sopenharmony_ci			btrfs_node_key_to_cpu(c, key, slot);
54318c2ecf20Sopenharmony_ci		}
54328c2ecf20Sopenharmony_ci		return 0;
54338c2ecf20Sopenharmony_ci	}
54348c2ecf20Sopenharmony_ci	return 1;
54358c2ecf20Sopenharmony_ci}
54368c2ecf20Sopenharmony_ci
54378c2ecf20Sopenharmony_ci/*
54388c2ecf20Sopenharmony_ci * search the tree again to find a leaf with greater keys
54398c2ecf20Sopenharmony_ci * returns 0 if it found something or 1 if there are no greater leaves.
54408c2ecf20Sopenharmony_ci * returns < 0 on io errors.
54418c2ecf20Sopenharmony_ci */
54428c2ecf20Sopenharmony_ciint btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
54438c2ecf20Sopenharmony_ci{
54448c2ecf20Sopenharmony_ci	return btrfs_next_old_leaf(root, path, 0);
54458c2ecf20Sopenharmony_ci}
54468c2ecf20Sopenharmony_ci
54478c2ecf20Sopenharmony_ciint btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
54488c2ecf20Sopenharmony_ci			u64 time_seq)
54498c2ecf20Sopenharmony_ci{
54508c2ecf20Sopenharmony_ci	int slot;
54518c2ecf20Sopenharmony_ci	int level;
54528c2ecf20Sopenharmony_ci	struct extent_buffer *c;
54538c2ecf20Sopenharmony_ci	struct extent_buffer *next;
54548c2ecf20Sopenharmony_ci	struct btrfs_key key;
54558c2ecf20Sopenharmony_ci	u32 nritems;
54568c2ecf20Sopenharmony_ci	int ret;
54578c2ecf20Sopenharmony_ci	int old_spinning = path->leave_spinning;
54588c2ecf20Sopenharmony_ci	int next_rw_lock = 0;
54598c2ecf20Sopenharmony_ci
54608c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(path->nodes[0]);
54618c2ecf20Sopenharmony_ci	if (nritems == 0)
54628c2ecf20Sopenharmony_ci		return 1;
54638c2ecf20Sopenharmony_ci
54648c2ecf20Sopenharmony_ci	btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
54658c2ecf20Sopenharmony_ciagain:
54668c2ecf20Sopenharmony_ci	level = 1;
54678c2ecf20Sopenharmony_ci	next = NULL;
54688c2ecf20Sopenharmony_ci	next_rw_lock = 0;
54698c2ecf20Sopenharmony_ci	btrfs_release_path(path);
54708c2ecf20Sopenharmony_ci
54718c2ecf20Sopenharmony_ci	path->keep_locks = 1;
54728c2ecf20Sopenharmony_ci	path->leave_spinning = 1;
54738c2ecf20Sopenharmony_ci
54748c2ecf20Sopenharmony_ci	if (time_seq)
54758c2ecf20Sopenharmony_ci		ret = btrfs_search_old_slot(root, &key, path, time_seq);
54768c2ecf20Sopenharmony_ci	else
54778c2ecf20Sopenharmony_ci		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
54788c2ecf20Sopenharmony_ci	path->keep_locks = 0;
54798c2ecf20Sopenharmony_ci
54808c2ecf20Sopenharmony_ci	if (ret < 0)
54818c2ecf20Sopenharmony_ci		return ret;
54828c2ecf20Sopenharmony_ci
54838c2ecf20Sopenharmony_ci	nritems = btrfs_header_nritems(path->nodes[0]);
54848c2ecf20Sopenharmony_ci	/*
54858c2ecf20Sopenharmony_ci	 * by releasing the path above we dropped all our locks.  A balance
54868c2ecf20Sopenharmony_ci	 * could have added more items next to the key that used to be
54878c2ecf20Sopenharmony_ci	 * at the very end of the block.  So, check again here and
54888c2ecf20Sopenharmony_ci	 * advance the path if there are now more items available.
54898c2ecf20Sopenharmony_ci	 */
54908c2ecf20Sopenharmony_ci	if (nritems > 0 && path->slots[0] < nritems - 1) {
54918c2ecf20Sopenharmony_ci		if (ret == 0)
54928c2ecf20Sopenharmony_ci			path->slots[0]++;
54938c2ecf20Sopenharmony_ci		ret = 0;
54948c2ecf20Sopenharmony_ci		goto done;
54958c2ecf20Sopenharmony_ci	}
54968c2ecf20Sopenharmony_ci	/*
54978c2ecf20Sopenharmony_ci	 * So the above check misses one case:
54988c2ecf20Sopenharmony_ci	 * - after releasing the path above, someone has removed the item that
54998c2ecf20Sopenharmony_ci	 *   used to be at the very end of the block, and balance between leafs
55008c2ecf20Sopenharmony_ci	 *   gets another one with bigger key.offset to replace it.
55018c2ecf20Sopenharmony_ci	 *
55028c2ecf20Sopenharmony_ci	 * This one should be returned as well, or we can get leaf corruption
55038c2ecf20Sopenharmony_ci	 * later(esp. in __btrfs_drop_extents()).
55048c2ecf20Sopenharmony_ci	 *
55058c2ecf20Sopenharmony_ci	 * And a bit more explanation about this check,
55068c2ecf20Sopenharmony_ci	 * with ret > 0, the key isn't found, the path points to the slot
55078c2ecf20Sopenharmony_ci	 * where it should be inserted, so the path->slots[0] item must be the
55088c2ecf20Sopenharmony_ci	 * bigger one.
55098c2ecf20Sopenharmony_ci	 */
55108c2ecf20Sopenharmony_ci	if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
55118c2ecf20Sopenharmony_ci		ret = 0;
55128c2ecf20Sopenharmony_ci		goto done;
55138c2ecf20Sopenharmony_ci	}
55148c2ecf20Sopenharmony_ci
55158c2ecf20Sopenharmony_ci	while (level < BTRFS_MAX_LEVEL) {
55168c2ecf20Sopenharmony_ci		if (!path->nodes[level]) {
55178c2ecf20Sopenharmony_ci			ret = 1;
55188c2ecf20Sopenharmony_ci			goto done;
55198c2ecf20Sopenharmony_ci		}
55208c2ecf20Sopenharmony_ci
55218c2ecf20Sopenharmony_ci		slot = path->slots[level] + 1;
55228c2ecf20Sopenharmony_ci		c = path->nodes[level];
55238c2ecf20Sopenharmony_ci		if (slot >= btrfs_header_nritems(c)) {
55248c2ecf20Sopenharmony_ci			level++;
55258c2ecf20Sopenharmony_ci			if (level == BTRFS_MAX_LEVEL) {
55268c2ecf20Sopenharmony_ci				ret = 1;
55278c2ecf20Sopenharmony_ci				goto done;
55288c2ecf20Sopenharmony_ci			}
55298c2ecf20Sopenharmony_ci			continue;
55308c2ecf20Sopenharmony_ci		}
55318c2ecf20Sopenharmony_ci
55328c2ecf20Sopenharmony_ci		if (next) {
55338c2ecf20Sopenharmony_ci			btrfs_tree_unlock_rw(next, next_rw_lock);
55348c2ecf20Sopenharmony_ci			free_extent_buffer(next);
55358c2ecf20Sopenharmony_ci		}
55368c2ecf20Sopenharmony_ci
55378c2ecf20Sopenharmony_ci		next = c;
55388c2ecf20Sopenharmony_ci		next_rw_lock = path->locks[level];
55398c2ecf20Sopenharmony_ci		ret = read_block_for_search(root, path, &next, level,
55408c2ecf20Sopenharmony_ci					    slot, &key);
55418c2ecf20Sopenharmony_ci		if (ret == -EAGAIN)
55428c2ecf20Sopenharmony_ci			goto again;
55438c2ecf20Sopenharmony_ci
55448c2ecf20Sopenharmony_ci		if (ret < 0) {
55458c2ecf20Sopenharmony_ci			btrfs_release_path(path);
55468c2ecf20Sopenharmony_ci			goto done;
55478c2ecf20Sopenharmony_ci		}
55488c2ecf20Sopenharmony_ci
55498c2ecf20Sopenharmony_ci		if (!path->skip_locking) {
55508c2ecf20Sopenharmony_ci			ret = btrfs_try_tree_read_lock(next);
55518c2ecf20Sopenharmony_ci			if (!ret && time_seq) {
55528c2ecf20Sopenharmony_ci				/*
55538c2ecf20Sopenharmony_ci				 * If we don't get the lock, we may be racing
55548c2ecf20Sopenharmony_ci				 * with push_leaf_left, holding that lock while
55558c2ecf20Sopenharmony_ci				 * itself waiting for the leaf we've currently
55568c2ecf20Sopenharmony_ci				 * locked. To solve this situation, we give up
55578c2ecf20Sopenharmony_ci				 * on our lock and cycle.
55588c2ecf20Sopenharmony_ci				 */
55598c2ecf20Sopenharmony_ci				free_extent_buffer(next);
55608c2ecf20Sopenharmony_ci				btrfs_release_path(path);
55618c2ecf20Sopenharmony_ci				cond_resched();
55628c2ecf20Sopenharmony_ci				goto again;
55638c2ecf20Sopenharmony_ci			}
55648c2ecf20Sopenharmony_ci			if (!ret) {
55658c2ecf20Sopenharmony_ci				btrfs_set_path_blocking(path);
55668c2ecf20Sopenharmony_ci				__btrfs_tree_read_lock(next,
55678c2ecf20Sopenharmony_ci						       BTRFS_NESTING_RIGHT,
55688c2ecf20Sopenharmony_ci						       path->recurse);
55698c2ecf20Sopenharmony_ci			}
55708c2ecf20Sopenharmony_ci			next_rw_lock = BTRFS_READ_LOCK;
55718c2ecf20Sopenharmony_ci		}
55728c2ecf20Sopenharmony_ci		break;
55738c2ecf20Sopenharmony_ci	}
55748c2ecf20Sopenharmony_ci	path->slots[level] = slot;
55758c2ecf20Sopenharmony_ci	while (1) {
55768c2ecf20Sopenharmony_ci		level--;
55778c2ecf20Sopenharmony_ci		c = path->nodes[level];
55788c2ecf20Sopenharmony_ci		if (path->locks[level])
55798c2ecf20Sopenharmony_ci			btrfs_tree_unlock_rw(c, path->locks[level]);
55808c2ecf20Sopenharmony_ci
55818c2ecf20Sopenharmony_ci		free_extent_buffer(c);
55828c2ecf20Sopenharmony_ci		path->nodes[level] = next;
55838c2ecf20Sopenharmony_ci		path->slots[level] = 0;
55848c2ecf20Sopenharmony_ci		if (!path->skip_locking)
55858c2ecf20Sopenharmony_ci			path->locks[level] = next_rw_lock;
55868c2ecf20Sopenharmony_ci		if (!level)
55878c2ecf20Sopenharmony_ci			break;
55888c2ecf20Sopenharmony_ci
55898c2ecf20Sopenharmony_ci		ret = read_block_for_search(root, path, &next, level,
55908c2ecf20Sopenharmony_ci					    0, &key);
55918c2ecf20Sopenharmony_ci		if (ret == -EAGAIN)
55928c2ecf20Sopenharmony_ci			goto again;
55938c2ecf20Sopenharmony_ci
55948c2ecf20Sopenharmony_ci		if (ret < 0) {
55958c2ecf20Sopenharmony_ci			btrfs_release_path(path);
55968c2ecf20Sopenharmony_ci			goto done;
55978c2ecf20Sopenharmony_ci		}
55988c2ecf20Sopenharmony_ci
55998c2ecf20Sopenharmony_ci		if (!path->skip_locking) {
56008c2ecf20Sopenharmony_ci			ret = btrfs_try_tree_read_lock(next);
56018c2ecf20Sopenharmony_ci			if (!ret) {
56028c2ecf20Sopenharmony_ci				btrfs_set_path_blocking(path);
56038c2ecf20Sopenharmony_ci				__btrfs_tree_read_lock(next,
56048c2ecf20Sopenharmony_ci						       BTRFS_NESTING_RIGHT,
56058c2ecf20Sopenharmony_ci						       path->recurse);
56068c2ecf20Sopenharmony_ci			}
56078c2ecf20Sopenharmony_ci			next_rw_lock = BTRFS_READ_LOCK;
56088c2ecf20Sopenharmony_ci		}
56098c2ecf20Sopenharmony_ci	}
56108c2ecf20Sopenharmony_ci	ret = 0;
56118c2ecf20Sopenharmony_cidone:
56128c2ecf20Sopenharmony_ci	unlock_up(path, 0, 1, 0, NULL);
56138c2ecf20Sopenharmony_ci	path->leave_spinning = old_spinning;
56148c2ecf20Sopenharmony_ci	if (!old_spinning)
56158c2ecf20Sopenharmony_ci		btrfs_set_path_blocking(path);
56168c2ecf20Sopenharmony_ci
56178c2ecf20Sopenharmony_ci	return ret;
56188c2ecf20Sopenharmony_ci}
56198c2ecf20Sopenharmony_ci
56208c2ecf20Sopenharmony_ci/*
56218c2ecf20Sopenharmony_ci * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
56228c2ecf20Sopenharmony_ci * searching until it gets past min_objectid or finds an item of 'type'
56238c2ecf20Sopenharmony_ci *
56248c2ecf20Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error
56258c2ecf20Sopenharmony_ci */
56268c2ecf20Sopenharmony_ciint btrfs_previous_item(struct btrfs_root *root,
56278c2ecf20Sopenharmony_ci			struct btrfs_path *path, u64 min_objectid,
56288c2ecf20Sopenharmony_ci			int type)
56298c2ecf20Sopenharmony_ci{
56308c2ecf20Sopenharmony_ci	struct btrfs_key found_key;
56318c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
56328c2ecf20Sopenharmony_ci	u32 nritems;
56338c2ecf20Sopenharmony_ci	int ret;
56348c2ecf20Sopenharmony_ci
56358c2ecf20Sopenharmony_ci	while (1) {
56368c2ecf20Sopenharmony_ci		if (path->slots[0] == 0) {
56378c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(path);
56388c2ecf20Sopenharmony_ci			ret = btrfs_prev_leaf(root, path);
56398c2ecf20Sopenharmony_ci			if (ret != 0)
56408c2ecf20Sopenharmony_ci				return ret;
56418c2ecf20Sopenharmony_ci		} else {
56428c2ecf20Sopenharmony_ci			path->slots[0]--;
56438c2ecf20Sopenharmony_ci		}
56448c2ecf20Sopenharmony_ci		leaf = path->nodes[0];
56458c2ecf20Sopenharmony_ci		nritems = btrfs_header_nritems(leaf);
56468c2ecf20Sopenharmony_ci		if (nritems == 0)
56478c2ecf20Sopenharmony_ci			return 1;
56488c2ecf20Sopenharmony_ci		if (path->slots[0] == nritems)
56498c2ecf20Sopenharmony_ci			path->slots[0]--;
56508c2ecf20Sopenharmony_ci
56518c2ecf20Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
56528c2ecf20Sopenharmony_ci		if (found_key.objectid < min_objectid)
56538c2ecf20Sopenharmony_ci			break;
56548c2ecf20Sopenharmony_ci		if (found_key.type == type)
56558c2ecf20Sopenharmony_ci			return 0;
56568c2ecf20Sopenharmony_ci		if (found_key.objectid == min_objectid &&
56578c2ecf20Sopenharmony_ci		    found_key.type < type)
56588c2ecf20Sopenharmony_ci			break;
56598c2ecf20Sopenharmony_ci	}
56608c2ecf20Sopenharmony_ci	return 1;
56618c2ecf20Sopenharmony_ci}
56628c2ecf20Sopenharmony_ci
56638c2ecf20Sopenharmony_ci/*
56648c2ecf20Sopenharmony_ci * search in extent tree to find a previous Metadata/Data extent item with
56658c2ecf20Sopenharmony_ci * min objecitd.
56668c2ecf20Sopenharmony_ci *
56678c2ecf20Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error
56688c2ecf20Sopenharmony_ci */
56698c2ecf20Sopenharmony_ciint btrfs_previous_extent_item(struct btrfs_root *root,
56708c2ecf20Sopenharmony_ci			struct btrfs_path *path, u64 min_objectid)
56718c2ecf20Sopenharmony_ci{
56728c2ecf20Sopenharmony_ci	struct btrfs_key found_key;
56738c2ecf20Sopenharmony_ci	struct extent_buffer *leaf;
56748c2ecf20Sopenharmony_ci	u32 nritems;
56758c2ecf20Sopenharmony_ci	int ret;
56768c2ecf20Sopenharmony_ci
56778c2ecf20Sopenharmony_ci	while (1) {
56788c2ecf20Sopenharmony_ci		if (path->slots[0] == 0) {
56798c2ecf20Sopenharmony_ci			btrfs_set_path_blocking(path);
56808c2ecf20Sopenharmony_ci			ret = btrfs_prev_leaf(root, path);
56818c2ecf20Sopenharmony_ci			if (ret != 0)
56828c2ecf20Sopenharmony_ci				return ret;
56838c2ecf20Sopenharmony_ci		} else {
56848c2ecf20Sopenharmony_ci			path->slots[0]--;
56858c2ecf20Sopenharmony_ci		}
56868c2ecf20Sopenharmony_ci		leaf = path->nodes[0];
56878c2ecf20Sopenharmony_ci		nritems = btrfs_header_nritems(leaf);
56888c2ecf20Sopenharmony_ci		if (nritems == 0)
56898c2ecf20Sopenharmony_ci			return 1;
56908c2ecf20Sopenharmony_ci		if (path->slots[0] == nritems)
56918c2ecf20Sopenharmony_ci			path->slots[0]--;
56928c2ecf20Sopenharmony_ci
56938c2ecf20Sopenharmony_ci		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
56948c2ecf20Sopenharmony_ci		if (found_key.objectid < min_objectid)
56958c2ecf20Sopenharmony_ci			break;
56968c2ecf20Sopenharmony_ci		if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
56978c2ecf20Sopenharmony_ci		    found_key.type == BTRFS_METADATA_ITEM_KEY)
56988c2ecf20Sopenharmony_ci			return 0;
56998c2ecf20Sopenharmony_ci		if (found_key.objectid == min_objectid &&
57008c2ecf20Sopenharmony_ci		    found_key.type < BTRFS_EXTENT_ITEM_KEY)
57018c2ecf20Sopenharmony_ci			break;
57028c2ecf20Sopenharmony_ci	}
57038c2ecf20Sopenharmony_ci	return 1;
57048c2ecf20Sopenharmony_ci}
5705