162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2007,2008 Oracle. All rights reserved. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/sched.h> 762306a36Sopenharmony_ci#include <linux/slab.h> 862306a36Sopenharmony_ci#include <linux/rbtree.h> 962306a36Sopenharmony_ci#include <linux/mm.h> 1062306a36Sopenharmony_ci#include <linux/error-injection.h> 1162306a36Sopenharmony_ci#include "messages.h" 1262306a36Sopenharmony_ci#include "ctree.h" 1362306a36Sopenharmony_ci#include "disk-io.h" 1462306a36Sopenharmony_ci#include "transaction.h" 1562306a36Sopenharmony_ci#include "print-tree.h" 1662306a36Sopenharmony_ci#include "locking.h" 1762306a36Sopenharmony_ci#include "volumes.h" 1862306a36Sopenharmony_ci#include "qgroup.h" 1962306a36Sopenharmony_ci#include "tree-mod-log.h" 2062306a36Sopenharmony_ci#include "tree-checker.h" 2162306a36Sopenharmony_ci#include "fs.h" 2262306a36Sopenharmony_ci#include "accessors.h" 2362306a36Sopenharmony_ci#include "extent-tree.h" 2462306a36Sopenharmony_ci#include "relocation.h" 2562306a36Sopenharmony_ci#include "file-item.h" 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic struct kmem_cache *btrfs_path_cachep; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_cistatic int split_node(struct btrfs_trans_handle *trans, struct btrfs_root 3062306a36Sopenharmony_ci *root, struct btrfs_path *path, int level); 3162306a36Sopenharmony_cistatic int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3262306a36Sopenharmony_ci const struct btrfs_key *ins_key, struct btrfs_path *path, 3362306a36Sopenharmony_ci int data_size, int extend); 3462306a36Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans, 3562306a36Sopenharmony_ci struct extent_buffer *dst, 3662306a36Sopenharmony_ci struct extent_buffer *src, int empty); 3762306a36Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans, 3862306a36Sopenharmony_ci struct extent_buffer *dst_buf, 3962306a36Sopenharmony_ci struct extent_buffer *src_buf); 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic const struct btrfs_csums { 4262306a36Sopenharmony_ci u16 size; 4362306a36Sopenharmony_ci const char name[10]; 4462306a36Sopenharmony_ci const char driver[12]; 4562306a36Sopenharmony_ci} btrfs_csums[] = { 4662306a36Sopenharmony_ci [BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" }, 4762306a36Sopenharmony_ci [BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" }, 4862306a36Sopenharmony_ci [BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" }, 4962306a36Sopenharmony_ci [BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b", 5062306a36Sopenharmony_ci .driver = "blake2b-256" }, 5162306a36Sopenharmony_ci}; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* 5462306a36Sopenharmony_ci * The leaf data grows from end-to-front in the node. this returns the address 5562306a36Sopenharmony_ci * of the start of the last item, which is the stop of the leaf data stack. 5662306a36Sopenharmony_ci */ 5762306a36Sopenharmony_cistatic unsigned int leaf_data_end(const struct extent_buffer *leaf) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci u32 nr = btrfs_header_nritems(leaf); 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci if (nr == 0) 6262306a36Sopenharmony_ci return BTRFS_LEAF_DATA_SIZE(leaf->fs_info); 6362306a36Sopenharmony_ci return btrfs_item_offset(leaf, nr - 1); 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci/* 6762306a36Sopenharmony_ci * Move data in a @leaf (using memmove, safe for overlapping ranges). 6862306a36Sopenharmony_ci * 6962306a36Sopenharmony_ci * @leaf: leaf that we're doing a memmove on 7062306a36Sopenharmony_ci * @dst_offset: item data offset we're moving to 7162306a36Sopenharmony_ci * @src_offset: item data offset were' moving from 7262306a36Sopenharmony_ci * @len: length of the data we're moving 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * Wrapper around memmove_extent_buffer() that takes into account the header on 7562306a36Sopenharmony_ci * the leaf. The btrfs_item offset's start directly after the header, so we 7662306a36Sopenharmony_ci * have to adjust any offsets to account for the header in the leaf. This 7762306a36Sopenharmony_ci * handles that math to simplify the callers. 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_cistatic inline void memmove_leaf_data(const struct extent_buffer *leaf, 8062306a36Sopenharmony_ci unsigned long dst_offset, 8162306a36Sopenharmony_ci unsigned long src_offset, 8262306a36Sopenharmony_ci unsigned long len) 8362306a36Sopenharmony_ci{ 8462306a36Sopenharmony_ci memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, 0) + dst_offset, 8562306a36Sopenharmony_ci btrfs_item_nr_offset(leaf, 0) + src_offset, len); 8662306a36Sopenharmony_ci} 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci/* 8962306a36Sopenharmony_ci * Copy item data from @src into @dst at the given @offset. 9062306a36Sopenharmony_ci * 9162306a36Sopenharmony_ci * @dst: destination leaf that we're copying into 9262306a36Sopenharmony_ci * @src: source leaf that we're copying from 9362306a36Sopenharmony_ci * @dst_offset: item data offset we're copying to 9462306a36Sopenharmony_ci * @src_offset: item data offset were' copying from 9562306a36Sopenharmony_ci * @len: length of the data we're copying 9662306a36Sopenharmony_ci * 9762306a36Sopenharmony_ci * Wrapper around copy_extent_buffer() that takes into account the header on 9862306a36Sopenharmony_ci * the leaf. The btrfs_item offset's start directly after the header, so we 9962306a36Sopenharmony_ci * have to adjust any offsets to account for the header in the leaf. This 10062306a36Sopenharmony_ci * handles that math to simplify the callers. 10162306a36Sopenharmony_ci */ 10262306a36Sopenharmony_cistatic inline void copy_leaf_data(const struct extent_buffer *dst, 10362306a36Sopenharmony_ci const struct extent_buffer *src, 10462306a36Sopenharmony_ci unsigned long dst_offset, 10562306a36Sopenharmony_ci unsigned long src_offset, unsigned long len) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, 0) + dst_offset, 10862306a36Sopenharmony_ci btrfs_item_nr_offset(src, 0) + src_offset, len); 10962306a36Sopenharmony_ci} 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci/* 11262306a36Sopenharmony_ci * Move items in a @leaf (using memmove). 11362306a36Sopenharmony_ci * 11462306a36Sopenharmony_ci * @dst: destination leaf for the items 11562306a36Sopenharmony_ci * @dst_item: the item nr we're copying into 11662306a36Sopenharmony_ci * @src_item: the item nr we're copying from 11762306a36Sopenharmony_ci * @nr_items: the number of items to copy 11862306a36Sopenharmony_ci * 11962306a36Sopenharmony_ci * Wrapper around memmove_extent_buffer() that does the math to get the 12062306a36Sopenharmony_ci * appropriate offsets into the leaf from the item numbers. 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_cistatic inline void memmove_leaf_items(const struct extent_buffer *leaf, 12362306a36Sopenharmony_ci int dst_item, int src_item, int nr_items) 12462306a36Sopenharmony_ci{ 12562306a36Sopenharmony_ci memmove_extent_buffer(leaf, btrfs_item_nr_offset(leaf, dst_item), 12662306a36Sopenharmony_ci btrfs_item_nr_offset(leaf, src_item), 12762306a36Sopenharmony_ci nr_items * sizeof(struct btrfs_item)); 12862306a36Sopenharmony_ci} 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci/* 13162306a36Sopenharmony_ci * Copy items from @src into @dst at the given @offset. 13262306a36Sopenharmony_ci * 13362306a36Sopenharmony_ci * @dst: destination leaf for the items 13462306a36Sopenharmony_ci * @src: source leaf for the items 13562306a36Sopenharmony_ci * @dst_item: the item nr we're copying into 13662306a36Sopenharmony_ci * @src_item: the item nr we're copying from 13762306a36Sopenharmony_ci * @nr_items: the number of items to copy 13862306a36Sopenharmony_ci * 13962306a36Sopenharmony_ci * Wrapper around copy_extent_buffer() that does the math to get the 14062306a36Sopenharmony_ci * appropriate offsets into the leaf from the item numbers. 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_cistatic inline void copy_leaf_items(const struct extent_buffer *dst, 14362306a36Sopenharmony_ci const struct extent_buffer *src, 14462306a36Sopenharmony_ci int dst_item, int src_item, int nr_items) 14562306a36Sopenharmony_ci{ 14662306a36Sopenharmony_ci copy_extent_buffer(dst, src, btrfs_item_nr_offset(dst, dst_item), 14762306a36Sopenharmony_ci btrfs_item_nr_offset(src, src_item), 14862306a36Sopenharmony_ci nr_items * sizeof(struct btrfs_item)); 14962306a36Sopenharmony_ci} 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci/* This exists for btrfs-progs usages. */ 15262306a36Sopenharmony_ciu16 btrfs_csum_type_size(u16 type) 15362306a36Sopenharmony_ci{ 15462306a36Sopenharmony_ci return btrfs_csums[type].size; 15562306a36Sopenharmony_ci} 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ciint btrfs_super_csum_size(const struct btrfs_super_block *s) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci u16 t = btrfs_super_csum_type(s); 16062306a36Sopenharmony_ci /* 16162306a36Sopenharmony_ci * csum type is validated at mount time 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci return btrfs_csum_type_size(t); 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ciconst char *btrfs_super_csum_name(u16 csum_type) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci /* csum type is validated at mount time */ 16962306a36Sopenharmony_ci return btrfs_csums[csum_type].name; 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_ci/* 17362306a36Sopenharmony_ci * Return driver name if defined, otherwise the name that's also a valid driver 17462306a36Sopenharmony_ci * name 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ciconst char *btrfs_super_csum_driver(u16 csum_type) 17762306a36Sopenharmony_ci{ 17862306a36Sopenharmony_ci /* csum type is validated at mount time */ 17962306a36Sopenharmony_ci return btrfs_csums[csum_type].driver[0] ? 18062306a36Sopenharmony_ci btrfs_csums[csum_type].driver : 18162306a36Sopenharmony_ci btrfs_csums[csum_type].name; 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_cisize_t __attribute_const__ btrfs_get_num_csums(void) 18562306a36Sopenharmony_ci{ 18662306a36Sopenharmony_ci return ARRAY_SIZE(btrfs_csums); 18762306a36Sopenharmony_ci} 18862306a36Sopenharmony_ci 18962306a36Sopenharmony_cistruct btrfs_path *btrfs_alloc_path(void) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci might_sleep(); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci return kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS); 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci/* this also releases the path */ 19762306a36Sopenharmony_civoid btrfs_free_path(struct btrfs_path *p) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci if (!p) 20062306a36Sopenharmony_ci return; 20162306a36Sopenharmony_ci btrfs_release_path(p); 20262306a36Sopenharmony_ci kmem_cache_free(btrfs_path_cachep, p); 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci/* 20662306a36Sopenharmony_ci * path release drops references on the extent buffers in the path 20762306a36Sopenharmony_ci * and it drops any locks held by this path 20862306a36Sopenharmony_ci * 20962306a36Sopenharmony_ci * It is safe to call this on paths that no locks or extent buffers held. 21062306a36Sopenharmony_ci */ 21162306a36Sopenharmony_cinoinline void btrfs_release_path(struct btrfs_path *p) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci int i; 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 21662306a36Sopenharmony_ci p->slots[i] = 0; 21762306a36Sopenharmony_ci if (!p->nodes[i]) 21862306a36Sopenharmony_ci continue; 21962306a36Sopenharmony_ci if (p->locks[i]) { 22062306a36Sopenharmony_ci btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); 22162306a36Sopenharmony_ci p->locks[i] = 0; 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci free_extent_buffer(p->nodes[i]); 22462306a36Sopenharmony_ci p->nodes[i] = NULL; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci} 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci/* 22962306a36Sopenharmony_ci * We want the transaction abort to print stack trace only for errors where the 23062306a36Sopenharmony_ci * cause could be a bug, eg. due to ENOSPC, and not for common errors that are 23162306a36Sopenharmony_ci * caused by external factors. 23262306a36Sopenharmony_ci */ 23362306a36Sopenharmony_cibool __cold abort_should_print_stack(int errno) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci switch (errno) { 23662306a36Sopenharmony_ci case -EIO: 23762306a36Sopenharmony_ci case -EROFS: 23862306a36Sopenharmony_ci case -ENOMEM: 23962306a36Sopenharmony_ci return false; 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci return true; 24262306a36Sopenharmony_ci} 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci/* 24562306a36Sopenharmony_ci * safely gets a reference on the root node of a tree. A lock 24662306a36Sopenharmony_ci * is not taken, so a concurrent writer may put a different node 24762306a36Sopenharmony_ci * at the root of the tree. See btrfs_lock_root_node for the 24862306a36Sopenharmony_ci * looping required. 24962306a36Sopenharmony_ci * 25062306a36Sopenharmony_ci * The extent buffer returned by this has a reference taken, so 25162306a36Sopenharmony_ci * it won't disappear. It may stop being the root of the tree 25262306a36Sopenharmony_ci * at any time because there are no locks held. 25362306a36Sopenharmony_ci */ 25462306a36Sopenharmony_cistruct extent_buffer *btrfs_root_node(struct btrfs_root *root) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci struct extent_buffer *eb; 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci while (1) { 25962306a36Sopenharmony_ci rcu_read_lock(); 26062306a36Sopenharmony_ci eb = rcu_dereference(root->node); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci /* 26362306a36Sopenharmony_ci * RCU really hurts here, we could free up the root node because 26462306a36Sopenharmony_ci * it was COWed but we may not get the new root node yet so do 26562306a36Sopenharmony_ci * the inc_not_zero dance and if it doesn't work then 26662306a36Sopenharmony_ci * synchronize_rcu and try again. 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_ci if (atomic_inc_not_zero(&eb->refs)) { 26962306a36Sopenharmony_ci rcu_read_unlock(); 27062306a36Sopenharmony_ci break; 27162306a36Sopenharmony_ci } 27262306a36Sopenharmony_ci rcu_read_unlock(); 27362306a36Sopenharmony_ci synchronize_rcu(); 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci return eb; 27662306a36Sopenharmony_ci} 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci/* 27962306a36Sopenharmony_ci * Cowonly root (not-shareable trees, everything not subvolume or reloc roots), 28062306a36Sopenharmony_ci * just get put onto a simple dirty list. Transaction walks this list to make 28162306a36Sopenharmony_ci * sure they get properly updated on disk. 28262306a36Sopenharmony_ci */ 28362306a36Sopenharmony_cistatic void add_root_to_dirty_list(struct btrfs_root *root) 28462306a36Sopenharmony_ci{ 28562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci if (test_bit(BTRFS_ROOT_DIRTY, &root->state) || 28862306a36Sopenharmony_ci !test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state)) 28962306a36Sopenharmony_ci return; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci spin_lock(&fs_info->trans_lock); 29262306a36Sopenharmony_ci if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) { 29362306a36Sopenharmony_ci /* Want the extent tree to be the last on the list */ 29462306a36Sopenharmony_ci if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID) 29562306a36Sopenharmony_ci list_move_tail(&root->dirty_list, 29662306a36Sopenharmony_ci &fs_info->dirty_cowonly_roots); 29762306a36Sopenharmony_ci else 29862306a36Sopenharmony_ci list_move(&root->dirty_list, 29962306a36Sopenharmony_ci &fs_info->dirty_cowonly_roots); 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci spin_unlock(&fs_info->trans_lock); 30262306a36Sopenharmony_ci} 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci/* 30562306a36Sopenharmony_ci * used by snapshot creation to make a copy of a root for a tree with 30662306a36Sopenharmony_ci * a given objectid. The buffer with the new root node is returned in 30762306a36Sopenharmony_ci * cow_ret, and this func returns zero on success or a negative error code. 30862306a36Sopenharmony_ci */ 30962306a36Sopenharmony_ciint btrfs_copy_root(struct btrfs_trans_handle *trans, 31062306a36Sopenharmony_ci struct btrfs_root *root, 31162306a36Sopenharmony_ci struct extent_buffer *buf, 31262306a36Sopenharmony_ci struct extent_buffer **cow_ret, u64 new_root_objectid) 31362306a36Sopenharmony_ci{ 31462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 31562306a36Sopenharmony_ci struct extent_buffer *cow; 31662306a36Sopenharmony_ci int ret = 0; 31762306a36Sopenharmony_ci int level; 31862306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 32162306a36Sopenharmony_ci trans->transid != fs_info->running_transaction->transid); 32262306a36Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 32362306a36Sopenharmony_ci trans->transid != root->last_trans); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci level = btrfs_header_level(buf); 32662306a36Sopenharmony_ci if (level == 0) 32762306a36Sopenharmony_ci btrfs_item_key(buf, &disk_key, 0); 32862306a36Sopenharmony_ci else 32962306a36Sopenharmony_ci btrfs_node_key(buf, &disk_key, 0); 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci cow = btrfs_alloc_tree_block(trans, root, 0, new_root_objectid, 33262306a36Sopenharmony_ci &disk_key, level, buf->start, 0, 33362306a36Sopenharmony_ci BTRFS_NESTING_NEW_ROOT); 33462306a36Sopenharmony_ci if (IS_ERR(cow)) 33562306a36Sopenharmony_ci return PTR_ERR(cow); 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci copy_extent_buffer_full(cow, buf); 33862306a36Sopenharmony_ci btrfs_set_header_bytenr(cow, cow->start); 33962306a36Sopenharmony_ci btrfs_set_header_generation(cow, trans->transid); 34062306a36Sopenharmony_ci btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 34162306a36Sopenharmony_ci btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 34262306a36Sopenharmony_ci BTRFS_HEADER_FLAG_RELOC); 34362306a36Sopenharmony_ci if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 34462306a36Sopenharmony_ci btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 34562306a36Sopenharmony_ci else 34662306a36Sopenharmony_ci btrfs_set_header_owner(cow, new_root_objectid); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(buf) > trans->transid); 35162306a36Sopenharmony_ci if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 35262306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 35362306a36Sopenharmony_ci else 35462306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 0); 35562306a36Sopenharmony_ci if (ret) { 35662306a36Sopenharmony_ci btrfs_tree_unlock(cow); 35762306a36Sopenharmony_ci free_extent_buffer(cow); 35862306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 35962306a36Sopenharmony_ci return ret; 36062306a36Sopenharmony_ci } 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, cow); 36362306a36Sopenharmony_ci *cow_ret = cow; 36462306a36Sopenharmony_ci return 0; 36562306a36Sopenharmony_ci} 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci/* 36862306a36Sopenharmony_ci * check if the tree block can be shared by multiple trees 36962306a36Sopenharmony_ci */ 37062306a36Sopenharmony_ciint btrfs_block_can_be_shared(struct btrfs_trans_handle *trans, 37162306a36Sopenharmony_ci struct btrfs_root *root, 37262306a36Sopenharmony_ci struct extent_buffer *buf) 37362306a36Sopenharmony_ci{ 37462306a36Sopenharmony_ci /* 37562306a36Sopenharmony_ci * Tree blocks not in shareable trees and tree roots are never shared. 37662306a36Sopenharmony_ci * If a block was allocated after the last snapshot and the block was 37762306a36Sopenharmony_ci * not allocated by tree relocation, we know the block is not shared. 37862306a36Sopenharmony_ci */ 37962306a36Sopenharmony_ci if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 38062306a36Sopenharmony_ci buf != root->node && 38162306a36Sopenharmony_ci (btrfs_header_generation(buf) <= 38262306a36Sopenharmony_ci btrfs_root_last_snapshot(&root->root_item) || 38362306a36Sopenharmony_ci btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) { 38462306a36Sopenharmony_ci if (buf != root->commit_root) 38562306a36Sopenharmony_ci return 1; 38662306a36Sopenharmony_ci /* 38762306a36Sopenharmony_ci * An extent buffer that used to be the commit root may still be 38862306a36Sopenharmony_ci * shared because the tree height may have increased and it 38962306a36Sopenharmony_ci * became a child of a higher level root. This can happen when 39062306a36Sopenharmony_ci * snapshotting a subvolume created in the current transaction. 39162306a36Sopenharmony_ci */ 39262306a36Sopenharmony_ci if (btrfs_header_generation(buf) == trans->transid) 39362306a36Sopenharmony_ci return 1; 39462306a36Sopenharmony_ci } 39562306a36Sopenharmony_ci 39662306a36Sopenharmony_ci return 0; 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_cistatic noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, 40062306a36Sopenharmony_ci struct btrfs_root *root, 40162306a36Sopenharmony_ci struct extent_buffer *buf, 40262306a36Sopenharmony_ci struct extent_buffer *cow, 40362306a36Sopenharmony_ci int *last_ref) 40462306a36Sopenharmony_ci{ 40562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 40662306a36Sopenharmony_ci u64 refs; 40762306a36Sopenharmony_ci u64 owner; 40862306a36Sopenharmony_ci u64 flags; 40962306a36Sopenharmony_ci u64 new_flags = 0; 41062306a36Sopenharmony_ci int ret; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci /* 41362306a36Sopenharmony_ci * Backrefs update rules: 41462306a36Sopenharmony_ci * 41562306a36Sopenharmony_ci * Always use full backrefs for extent pointers in tree block 41662306a36Sopenharmony_ci * allocated by tree relocation. 41762306a36Sopenharmony_ci * 41862306a36Sopenharmony_ci * If a shared tree block is no longer referenced by its owner 41962306a36Sopenharmony_ci * tree (btrfs_header_owner(buf) == root->root_key.objectid), 42062306a36Sopenharmony_ci * use full backrefs for extent pointers in tree block. 42162306a36Sopenharmony_ci * 42262306a36Sopenharmony_ci * If a tree block is been relocating 42362306a36Sopenharmony_ci * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), 42462306a36Sopenharmony_ci * use full backrefs for extent pointers in tree block. 42562306a36Sopenharmony_ci * The reason for this is some operations (such as drop tree) 42662306a36Sopenharmony_ci * are only allowed for blocks use full backrefs. 42762306a36Sopenharmony_ci */ 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci if (btrfs_block_can_be_shared(trans, root, buf)) { 43062306a36Sopenharmony_ci ret = btrfs_lookup_extent_info(trans, fs_info, buf->start, 43162306a36Sopenharmony_ci btrfs_header_level(buf), 1, 43262306a36Sopenharmony_ci &refs, &flags); 43362306a36Sopenharmony_ci if (ret) 43462306a36Sopenharmony_ci return ret; 43562306a36Sopenharmony_ci if (unlikely(refs == 0)) { 43662306a36Sopenharmony_ci btrfs_crit(fs_info, 43762306a36Sopenharmony_ci "found 0 references for tree block at bytenr %llu level %d root %llu", 43862306a36Sopenharmony_ci buf->start, btrfs_header_level(buf), 43962306a36Sopenharmony_ci btrfs_root_id(root)); 44062306a36Sopenharmony_ci ret = -EUCLEAN; 44162306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 44262306a36Sopenharmony_ci return ret; 44362306a36Sopenharmony_ci } 44462306a36Sopenharmony_ci } else { 44562306a36Sopenharmony_ci refs = 1; 44662306a36Sopenharmony_ci if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 44762306a36Sopenharmony_ci btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 44862306a36Sopenharmony_ci flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 44962306a36Sopenharmony_ci else 45062306a36Sopenharmony_ci flags = 0; 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci owner = btrfs_header_owner(buf); 45462306a36Sopenharmony_ci BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && 45562306a36Sopenharmony_ci !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci if (refs > 1) { 45862306a36Sopenharmony_ci if ((owner == root->root_key.objectid || 45962306a36Sopenharmony_ci root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 46062306a36Sopenharmony_ci !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 46162306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, buf, 1); 46262306a36Sopenharmony_ci if (ret) 46362306a36Sopenharmony_ci return ret; 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci if (root->root_key.objectid == 46662306a36Sopenharmony_ci BTRFS_TREE_RELOC_OBJECTID) { 46762306a36Sopenharmony_ci ret = btrfs_dec_ref(trans, root, buf, 0); 46862306a36Sopenharmony_ci if (ret) 46962306a36Sopenharmony_ci return ret; 47062306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 47162306a36Sopenharmony_ci if (ret) 47262306a36Sopenharmony_ci return ret; 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 47562306a36Sopenharmony_ci } else { 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (root->root_key.objectid == 47862306a36Sopenharmony_ci BTRFS_TREE_RELOC_OBJECTID) 47962306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 48062306a36Sopenharmony_ci else 48162306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 0); 48262306a36Sopenharmony_ci if (ret) 48362306a36Sopenharmony_ci return ret; 48462306a36Sopenharmony_ci } 48562306a36Sopenharmony_ci if (new_flags != 0) { 48662306a36Sopenharmony_ci ret = btrfs_set_disk_extent_flags(trans, buf, new_flags); 48762306a36Sopenharmony_ci if (ret) 48862306a36Sopenharmony_ci return ret; 48962306a36Sopenharmony_ci } 49062306a36Sopenharmony_ci } else { 49162306a36Sopenharmony_ci if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 49262306a36Sopenharmony_ci if (root->root_key.objectid == 49362306a36Sopenharmony_ci BTRFS_TREE_RELOC_OBJECTID) 49462306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 1); 49562306a36Sopenharmony_ci else 49662306a36Sopenharmony_ci ret = btrfs_inc_ref(trans, root, cow, 0); 49762306a36Sopenharmony_ci if (ret) 49862306a36Sopenharmony_ci return ret; 49962306a36Sopenharmony_ci ret = btrfs_dec_ref(trans, root, buf, 1); 50062306a36Sopenharmony_ci if (ret) 50162306a36Sopenharmony_ci return ret; 50262306a36Sopenharmony_ci } 50362306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, buf); 50462306a36Sopenharmony_ci *last_ref = 1; 50562306a36Sopenharmony_ci } 50662306a36Sopenharmony_ci return 0; 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci/* 51062306a36Sopenharmony_ci * does the dirty work in cow of a single block. The parent block (if 51162306a36Sopenharmony_ci * supplied) is updated to point to the new cow copy. The new buffer is marked 51262306a36Sopenharmony_ci * dirty and returned locked. If you modify the block it needs to be marked 51362306a36Sopenharmony_ci * dirty again. 51462306a36Sopenharmony_ci * 51562306a36Sopenharmony_ci * search_start -- an allocation hint for the new block 51662306a36Sopenharmony_ci * 51762306a36Sopenharmony_ci * empty_size -- a hint that you plan on doing more cow. This is the size in 51862306a36Sopenharmony_ci * bytes the allocator should try to find free next to the block it returns. 51962306a36Sopenharmony_ci * This is just a hint and may be ignored by the allocator. 52062306a36Sopenharmony_ci */ 52162306a36Sopenharmony_cistatic noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, 52262306a36Sopenharmony_ci struct btrfs_root *root, 52362306a36Sopenharmony_ci struct extent_buffer *buf, 52462306a36Sopenharmony_ci struct extent_buffer *parent, int parent_slot, 52562306a36Sopenharmony_ci struct extent_buffer **cow_ret, 52662306a36Sopenharmony_ci u64 search_start, u64 empty_size, 52762306a36Sopenharmony_ci enum btrfs_lock_nesting nest) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 53062306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 53162306a36Sopenharmony_ci struct extent_buffer *cow; 53262306a36Sopenharmony_ci int level, ret; 53362306a36Sopenharmony_ci int last_ref = 0; 53462306a36Sopenharmony_ci int unlock_orig = 0; 53562306a36Sopenharmony_ci u64 parent_start = 0; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci if (*cow_ret == buf) 53862306a36Sopenharmony_ci unlock_orig = 1; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci btrfs_assert_tree_write_locked(buf); 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 54362306a36Sopenharmony_ci trans->transid != fs_info->running_transaction->transid); 54462306a36Sopenharmony_ci WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) && 54562306a36Sopenharmony_ci trans->transid != root->last_trans); 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci level = btrfs_header_level(buf); 54862306a36Sopenharmony_ci 54962306a36Sopenharmony_ci if (level == 0) 55062306a36Sopenharmony_ci btrfs_item_key(buf, &disk_key, 0); 55162306a36Sopenharmony_ci else 55262306a36Sopenharmony_ci btrfs_node_key(buf, &disk_key, 0); 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent) 55562306a36Sopenharmony_ci parent_start = parent->start; 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci cow = btrfs_alloc_tree_block(trans, root, parent_start, 55862306a36Sopenharmony_ci root->root_key.objectid, &disk_key, level, 55962306a36Sopenharmony_ci search_start, empty_size, nest); 56062306a36Sopenharmony_ci if (IS_ERR(cow)) 56162306a36Sopenharmony_ci return PTR_ERR(cow); 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci /* cow is set to blocking by btrfs_init_new_buffer */ 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci copy_extent_buffer_full(cow, buf); 56662306a36Sopenharmony_ci btrfs_set_header_bytenr(cow, cow->start); 56762306a36Sopenharmony_ci btrfs_set_header_generation(cow, trans->transid); 56862306a36Sopenharmony_ci btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); 56962306a36Sopenharmony_ci btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | 57062306a36Sopenharmony_ci BTRFS_HEADER_FLAG_RELOC); 57162306a36Sopenharmony_ci if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 57262306a36Sopenharmony_ci btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); 57362306a36Sopenharmony_ci else 57462306a36Sopenharmony_ci btrfs_set_header_owner(cow, root->root_key.objectid); 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid); 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci ret = update_ref_for_cow(trans, root, buf, cow, &last_ref); 57962306a36Sopenharmony_ci if (ret) { 58062306a36Sopenharmony_ci btrfs_tree_unlock(cow); 58162306a36Sopenharmony_ci free_extent_buffer(cow); 58262306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 58362306a36Sopenharmony_ci return ret; 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) { 58762306a36Sopenharmony_ci ret = btrfs_reloc_cow_block(trans, root, buf, cow); 58862306a36Sopenharmony_ci if (ret) { 58962306a36Sopenharmony_ci btrfs_tree_unlock(cow); 59062306a36Sopenharmony_ci free_extent_buffer(cow); 59162306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 59262306a36Sopenharmony_ci return ret; 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci } 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci if (buf == root->node) { 59762306a36Sopenharmony_ci WARN_ON(parent && parent != buf); 59862306a36Sopenharmony_ci if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || 59962306a36Sopenharmony_ci btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) 60062306a36Sopenharmony_ci parent_start = buf->start; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_root(root->node, cow, true); 60362306a36Sopenharmony_ci if (ret < 0) { 60462306a36Sopenharmony_ci btrfs_tree_unlock(cow); 60562306a36Sopenharmony_ci free_extent_buffer(cow); 60662306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 60762306a36Sopenharmony_ci return ret; 60862306a36Sopenharmony_ci } 60962306a36Sopenharmony_ci atomic_inc(&cow->refs); 61062306a36Sopenharmony_ci rcu_assign_pointer(root->node, cow); 61162306a36Sopenharmony_ci 61262306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), buf, 61362306a36Sopenharmony_ci parent_start, last_ref); 61462306a36Sopenharmony_ci free_extent_buffer(buf); 61562306a36Sopenharmony_ci add_root_to_dirty_list(root); 61662306a36Sopenharmony_ci } else { 61762306a36Sopenharmony_ci WARN_ON(trans->transid != btrfs_header_generation(parent)); 61862306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(parent, parent_slot, 61962306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REPLACE); 62062306a36Sopenharmony_ci if (ret) { 62162306a36Sopenharmony_ci btrfs_tree_unlock(cow); 62262306a36Sopenharmony_ci free_extent_buffer(cow); 62362306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 62462306a36Sopenharmony_ci return ret; 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci btrfs_set_node_blockptr(parent, parent_slot, 62762306a36Sopenharmony_ci cow->start); 62862306a36Sopenharmony_ci btrfs_set_node_ptr_generation(parent, parent_slot, 62962306a36Sopenharmony_ci trans->transid); 63062306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, parent); 63162306a36Sopenharmony_ci if (last_ref) { 63262306a36Sopenharmony_ci ret = btrfs_tree_mod_log_free_eb(buf); 63362306a36Sopenharmony_ci if (ret) { 63462306a36Sopenharmony_ci btrfs_tree_unlock(cow); 63562306a36Sopenharmony_ci free_extent_buffer(cow); 63662306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 63762306a36Sopenharmony_ci return ret; 63862306a36Sopenharmony_ci } 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), buf, 64162306a36Sopenharmony_ci parent_start, last_ref); 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci if (unlock_orig) 64462306a36Sopenharmony_ci btrfs_tree_unlock(buf); 64562306a36Sopenharmony_ci free_extent_buffer_stale(buf); 64662306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, cow); 64762306a36Sopenharmony_ci *cow_ret = cow; 64862306a36Sopenharmony_ci return 0; 64962306a36Sopenharmony_ci} 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_cistatic inline int should_cow_block(struct btrfs_trans_handle *trans, 65262306a36Sopenharmony_ci struct btrfs_root *root, 65362306a36Sopenharmony_ci struct extent_buffer *buf) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci if (btrfs_is_testing(root->fs_info)) 65662306a36Sopenharmony_ci return 0; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci /* Ensure we can see the FORCE_COW bit */ 65962306a36Sopenharmony_ci smp_mb__before_atomic(); 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci /* 66262306a36Sopenharmony_ci * We do not need to cow a block if 66362306a36Sopenharmony_ci * 1) this block is not created or changed in this transaction; 66462306a36Sopenharmony_ci * 2) this block does not belong to TREE_RELOC tree; 66562306a36Sopenharmony_ci * 3) the root is not forced COW. 66662306a36Sopenharmony_ci * 66762306a36Sopenharmony_ci * What is forced COW: 66862306a36Sopenharmony_ci * when we create snapshot during committing the transaction, 66962306a36Sopenharmony_ci * after we've finished copying src root, we must COW the shared 67062306a36Sopenharmony_ci * block to ensure the metadata consistency. 67162306a36Sopenharmony_ci */ 67262306a36Sopenharmony_ci if (btrfs_header_generation(buf) == trans->transid && 67362306a36Sopenharmony_ci !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && 67462306a36Sopenharmony_ci !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && 67562306a36Sopenharmony_ci btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) && 67662306a36Sopenharmony_ci !test_bit(BTRFS_ROOT_FORCE_COW, &root->state)) 67762306a36Sopenharmony_ci return 0; 67862306a36Sopenharmony_ci return 1; 67962306a36Sopenharmony_ci} 68062306a36Sopenharmony_ci 68162306a36Sopenharmony_ci/* 68262306a36Sopenharmony_ci * cows a single block, see __btrfs_cow_block for the real work. 68362306a36Sopenharmony_ci * This version of it has extra checks so that a block isn't COWed more than 68462306a36Sopenharmony_ci * once per transaction, as long as it hasn't been written yet 68562306a36Sopenharmony_ci */ 68662306a36Sopenharmony_cinoinline int btrfs_cow_block(struct btrfs_trans_handle *trans, 68762306a36Sopenharmony_ci struct btrfs_root *root, struct extent_buffer *buf, 68862306a36Sopenharmony_ci struct extent_buffer *parent, int parent_slot, 68962306a36Sopenharmony_ci struct extent_buffer **cow_ret, 69062306a36Sopenharmony_ci enum btrfs_lock_nesting nest) 69162306a36Sopenharmony_ci{ 69262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 69362306a36Sopenharmony_ci u64 search_start; 69462306a36Sopenharmony_ci int ret; 69562306a36Sopenharmony_ci 69662306a36Sopenharmony_ci if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) { 69762306a36Sopenharmony_ci btrfs_abort_transaction(trans, -EUCLEAN); 69862306a36Sopenharmony_ci btrfs_crit(fs_info, 69962306a36Sopenharmony_ci "attempt to COW block %llu on root %llu that is being deleted", 70062306a36Sopenharmony_ci buf->start, btrfs_root_id(root)); 70162306a36Sopenharmony_ci return -EUCLEAN; 70262306a36Sopenharmony_ci } 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci /* 70562306a36Sopenharmony_ci * COWing must happen through a running transaction, which always 70662306a36Sopenharmony_ci * matches the current fs generation (it's a transaction with a state 70762306a36Sopenharmony_ci * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs 70862306a36Sopenharmony_ci * into error state to prevent the commit of any transaction. 70962306a36Sopenharmony_ci */ 71062306a36Sopenharmony_ci if (unlikely(trans->transaction != fs_info->running_transaction || 71162306a36Sopenharmony_ci trans->transid != fs_info->generation)) { 71262306a36Sopenharmony_ci btrfs_abort_transaction(trans, -EUCLEAN); 71362306a36Sopenharmony_ci btrfs_crit(fs_info, 71462306a36Sopenharmony_ci"unexpected transaction when attempting to COW block %llu on root %llu, transaction %llu running transaction %llu fs generation %llu", 71562306a36Sopenharmony_ci buf->start, btrfs_root_id(root), trans->transid, 71662306a36Sopenharmony_ci fs_info->running_transaction->transid, 71762306a36Sopenharmony_ci fs_info->generation); 71862306a36Sopenharmony_ci return -EUCLEAN; 71962306a36Sopenharmony_ci } 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci if (!should_cow_block(trans, root, buf)) { 72262306a36Sopenharmony_ci *cow_ret = buf; 72362306a36Sopenharmony_ci return 0; 72462306a36Sopenharmony_ci } 72562306a36Sopenharmony_ci 72662306a36Sopenharmony_ci search_start = buf->start & ~((u64)SZ_1G - 1); 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci /* 72962306a36Sopenharmony_ci * Before CoWing this block for later modification, check if it's 73062306a36Sopenharmony_ci * the subtree root and do the delayed subtree trace if needed. 73162306a36Sopenharmony_ci * 73262306a36Sopenharmony_ci * Also We don't care about the error, as it's handled internally. 73362306a36Sopenharmony_ci */ 73462306a36Sopenharmony_ci btrfs_qgroup_trace_subtree_after_cow(trans, root, buf); 73562306a36Sopenharmony_ci ret = __btrfs_cow_block(trans, root, buf, parent, 73662306a36Sopenharmony_ci parent_slot, cow_ret, search_start, 0, nest); 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci trace_btrfs_cow_block(root, buf, *cow_ret); 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci return ret; 74162306a36Sopenharmony_ci} 74262306a36Sopenharmony_ciALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO); 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci/* 74562306a36Sopenharmony_ci * helper function for defrag to decide if two blocks pointed to by a 74662306a36Sopenharmony_ci * node are actually close by 74762306a36Sopenharmony_ci */ 74862306a36Sopenharmony_cistatic int close_blocks(u64 blocknr, u64 other, u32 blocksize) 74962306a36Sopenharmony_ci{ 75062306a36Sopenharmony_ci if (blocknr < other && other - (blocknr + blocksize) < 32768) 75162306a36Sopenharmony_ci return 1; 75262306a36Sopenharmony_ci if (blocknr > other && blocknr - (other + blocksize) < 32768) 75362306a36Sopenharmony_ci return 1; 75462306a36Sopenharmony_ci return 0; 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci#ifdef __LITTLE_ENDIAN 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci/* 76062306a36Sopenharmony_ci * Compare two keys, on little-endian the disk order is same as CPU order and 76162306a36Sopenharmony_ci * we can avoid the conversion. 76262306a36Sopenharmony_ci */ 76362306a36Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk_key, 76462306a36Sopenharmony_ci const struct btrfs_key *k2) 76562306a36Sopenharmony_ci{ 76662306a36Sopenharmony_ci const struct btrfs_key *k1 = (const struct btrfs_key *)disk_key; 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_ci return btrfs_comp_cpu_keys(k1, k2); 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci#else 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci/* 77462306a36Sopenharmony_ci * compare two keys in a memcmp fashion 77562306a36Sopenharmony_ci */ 77662306a36Sopenharmony_cistatic int comp_keys(const struct btrfs_disk_key *disk, 77762306a36Sopenharmony_ci const struct btrfs_key *k2) 77862306a36Sopenharmony_ci{ 77962306a36Sopenharmony_ci struct btrfs_key k1; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci btrfs_disk_key_to_cpu(&k1, disk); 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci return btrfs_comp_cpu_keys(&k1, k2); 78462306a36Sopenharmony_ci} 78562306a36Sopenharmony_ci#endif 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci/* 78862306a36Sopenharmony_ci * same as comp_keys only with two btrfs_key's 78962306a36Sopenharmony_ci */ 79062306a36Sopenharmony_ciint __pure btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci if (k1->objectid > k2->objectid) 79362306a36Sopenharmony_ci return 1; 79462306a36Sopenharmony_ci if (k1->objectid < k2->objectid) 79562306a36Sopenharmony_ci return -1; 79662306a36Sopenharmony_ci if (k1->type > k2->type) 79762306a36Sopenharmony_ci return 1; 79862306a36Sopenharmony_ci if (k1->type < k2->type) 79962306a36Sopenharmony_ci return -1; 80062306a36Sopenharmony_ci if (k1->offset > k2->offset) 80162306a36Sopenharmony_ci return 1; 80262306a36Sopenharmony_ci if (k1->offset < k2->offset) 80362306a36Sopenharmony_ci return -1; 80462306a36Sopenharmony_ci return 0; 80562306a36Sopenharmony_ci} 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci/* 80862306a36Sopenharmony_ci * this is used by the defrag code to go through all the 80962306a36Sopenharmony_ci * leaves pointed to by a node and reallocate them so that 81062306a36Sopenharmony_ci * disk order is close to key order 81162306a36Sopenharmony_ci */ 81262306a36Sopenharmony_ciint btrfs_realloc_node(struct btrfs_trans_handle *trans, 81362306a36Sopenharmony_ci struct btrfs_root *root, struct extent_buffer *parent, 81462306a36Sopenharmony_ci int start_slot, u64 *last_ret, 81562306a36Sopenharmony_ci struct btrfs_key *progress) 81662306a36Sopenharmony_ci{ 81762306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 81862306a36Sopenharmony_ci struct extent_buffer *cur; 81962306a36Sopenharmony_ci u64 blocknr; 82062306a36Sopenharmony_ci u64 search_start = *last_ret; 82162306a36Sopenharmony_ci u64 last_block = 0; 82262306a36Sopenharmony_ci u64 other; 82362306a36Sopenharmony_ci u32 parent_nritems; 82462306a36Sopenharmony_ci int end_slot; 82562306a36Sopenharmony_ci int i; 82662306a36Sopenharmony_ci int err = 0; 82762306a36Sopenharmony_ci u32 blocksize; 82862306a36Sopenharmony_ci int progress_passed = 0; 82962306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci /* 83262306a36Sopenharmony_ci * COWing must happen through a running transaction, which always 83362306a36Sopenharmony_ci * matches the current fs generation (it's a transaction with a state 83462306a36Sopenharmony_ci * less than TRANS_STATE_UNBLOCKED). If it doesn't, then turn the fs 83562306a36Sopenharmony_ci * into error state to prevent the commit of any transaction. 83662306a36Sopenharmony_ci */ 83762306a36Sopenharmony_ci if (unlikely(trans->transaction != fs_info->running_transaction || 83862306a36Sopenharmony_ci trans->transid != fs_info->generation)) { 83962306a36Sopenharmony_ci btrfs_abort_transaction(trans, -EUCLEAN); 84062306a36Sopenharmony_ci btrfs_crit(fs_info, 84162306a36Sopenharmony_ci"unexpected transaction when attempting to reallocate parent %llu for root %llu, transaction %llu running transaction %llu fs generation %llu", 84262306a36Sopenharmony_ci parent->start, btrfs_root_id(root), trans->transid, 84362306a36Sopenharmony_ci fs_info->running_transaction->transid, 84462306a36Sopenharmony_ci fs_info->generation); 84562306a36Sopenharmony_ci return -EUCLEAN; 84662306a36Sopenharmony_ci } 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci parent_nritems = btrfs_header_nritems(parent); 84962306a36Sopenharmony_ci blocksize = fs_info->nodesize; 85062306a36Sopenharmony_ci end_slot = parent_nritems - 1; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci if (parent_nritems <= 1) 85362306a36Sopenharmony_ci return 0; 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci for (i = start_slot; i <= end_slot; i++) { 85662306a36Sopenharmony_ci int close = 1; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci btrfs_node_key(parent, &disk_key, i); 85962306a36Sopenharmony_ci if (!progress_passed && comp_keys(&disk_key, progress) < 0) 86062306a36Sopenharmony_ci continue; 86162306a36Sopenharmony_ci 86262306a36Sopenharmony_ci progress_passed = 1; 86362306a36Sopenharmony_ci blocknr = btrfs_node_blockptr(parent, i); 86462306a36Sopenharmony_ci if (last_block == 0) 86562306a36Sopenharmony_ci last_block = blocknr; 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci if (i > 0) { 86862306a36Sopenharmony_ci other = btrfs_node_blockptr(parent, i - 1); 86962306a36Sopenharmony_ci close = close_blocks(blocknr, other, blocksize); 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci if (!close && i < end_slot) { 87262306a36Sopenharmony_ci other = btrfs_node_blockptr(parent, i + 1); 87362306a36Sopenharmony_ci close = close_blocks(blocknr, other, blocksize); 87462306a36Sopenharmony_ci } 87562306a36Sopenharmony_ci if (close) { 87662306a36Sopenharmony_ci last_block = blocknr; 87762306a36Sopenharmony_ci continue; 87862306a36Sopenharmony_ci } 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci cur = btrfs_read_node_slot(parent, i); 88162306a36Sopenharmony_ci if (IS_ERR(cur)) 88262306a36Sopenharmony_ci return PTR_ERR(cur); 88362306a36Sopenharmony_ci if (search_start == 0) 88462306a36Sopenharmony_ci search_start = last_block; 88562306a36Sopenharmony_ci 88662306a36Sopenharmony_ci btrfs_tree_lock(cur); 88762306a36Sopenharmony_ci err = __btrfs_cow_block(trans, root, cur, parent, i, 88862306a36Sopenharmony_ci &cur, search_start, 88962306a36Sopenharmony_ci min(16 * blocksize, 89062306a36Sopenharmony_ci (end_slot - i) * blocksize), 89162306a36Sopenharmony_ci BTRFS_NESTING_COW); 89262306a36Sopenharmony_ci if (err) { 89362306a36Sopenharmony_ci btrfs_tree_unlock(cur); 89462306a36Sopenharmony_ci free_extent_buffer(cur); 89562306a36Sopenharmony_ci break; 89662306a36Sopenharmony_ci } 89762306a36Sopenharmony_ci search_start = cur->start; 89862306a36Sopenharmony_ci last_block = cur->start; 89962306a36Sopenharmony_ci *last_ret = search_start; 90062306a36Sopenharmony_ci btrfs_tree_unlock(cur); 90162306a36Sopenharmony_ci free_extent_buffer(cur); 90262306a36Sopenharmony_ci } 90362306a36Sopenharmony_ci return err; 90462306a36Sopenharmony_ci} 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci/* 90762306a36Sopenharmony_ci * Search for a key in the given extent_buffer. 90862306a36Sopenharmony_ci * 90962306a36Sopenharmony_ci * The lower boundary for the search is specified by the slot number @first_slot. 91062306a36Sopenharmony_ci * Use a value of 0 to search over the whole extent buffer. Works for both 91162306a36Sopenharmony_ci * leaves and nodes. 91262306a36Sopenharmony_ci * 91362306a36Sopenharmony_ci * The slot in the extent buffer is returned via @slot. If the key exists in the 91462306a36Sopenharmony_ci * extent buffer, then @slot will point to the slot where the key is, otherwise 91562306a36Sopenharmony_ci * it points to the slot where you would insert the key. 91662306a36Sopenharmony_ci * 91762306a36Sopenharmony_ci * Slot may point to the total number of items (i.e. one position beyond the last 91862306a36Sopenharmony_ci * key) if the key is bigger than the last key in the extent buffer. 91962306a36Sopenharmony_ci */ 92062306a36Sopenharmony_ciint btrfs_bin_search(struct extent_buffer *eb, int first_slot, 92162306a36Sopenharmony_ci const struct btrfs_key *key, int *slot) 92262306a36Sopenharmony_ci{ 92362306a36Sopenharmony_ci unsigned long p; 92462306a36Sopenharmony_ci int item_size; 92562306a36Sopenharmony_ci /* 92662306a36Sopenharmony_ci * Use unsigned types for the low and high slots, so that we get a more 92762306a36Sopenharmony_ci * efficient division in the search loop below. 92862306a36Sopenharmony_ci */ 92962306a36Sopenharmony_ci u32 low = first_slot; 93062306a36Sopenharmony_ci u32 high = btrfs_header_nritems(eb); 93162306a36Sopenharmony_ci int ret; 93262306a36Sopenharmony_ci const int key_size = sizeof(struct btrfs_disk_key); 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci if (unlikely(low > high)) { 93562306a36Sopenharmony_ci btrfs_err(eb->fs_info, 93662306a36Sopenharmony_ci "%s: low (%u) > high (%u) eb %llu owner %llu level %d", 93762306a36Sopenharmony_ci __func__, low, high, eb->start, 93862306a36Sopenharmony_ci btrfs_header_owner(eb), btrfs_header_level(eb)); 93962306a36Sopenharmony_ci return -EINVAL; 94062306a36Sopenharmony_ci } 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci if (btrfs_header_level(eb) == 0) { 94362306a36Sopenharmony_ci p = offsetof(struct btrfs_leaf, items); 94462306a36Sopenharmony_ci item_size = sizeof(struct btrfs_item); 94562306a36Sopenharmony_ci } else { 94662306a36Sopenharmony_ci p = offsetof(struct btrfs_node, ptrs); 94762306a36Sopenharmony_ci item_size = sizeof(struct btrfs_key_ptr); 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci 95062306a36Sopenharmony_ci while (low < high) { 95162306a36Sopenharmony_ci unsigned long oip; 95262306a36Sopenharmony_ci unsigned long offset; 95362306a36Sopenharmony_ci struct btrfs_disk_key *tmp; 95462306a36Sopenharmony_ci struct btrfs_disk_key unaligned; 95562306a36Sopenharmony_ci int mid; 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci mid = (low + high) / 2; 95862306a36Sopenharmony_ci offset = p + mid * item_size; 95962306a36Sopenharmony_ci oip = offset_in_page(offset); 96062306a36Sopenharmony_ci 96162306a36Sopenharmony_ci if (oip + key_size <= PAGE_SIZE) { 96262306a36Sopenharmony_ci const unsigned long idx = get_eb_page_index(offset); 96362306a36Sopenharmony_ci char *kaddr = page_address(eb->pages[idx]); 96462306a36Sopenharmony_ci 96562306a36Sopenharmony_ci oip = get_eb_offset_in_page(eb, offset); 96662306a36Sopenharmony_ci tmp = (struct btrfs_disk_key *)(kaddr + oip); 96762306a36Sopenharmony_ci } else { 96862306a36Sopenharmony_ci read_extent_buffer(eb, &unaligned, offset, key_size); 96962306a36Sopenharmony_ci tmp = &unaligned; 97062306a36Sopenharmony_ci } 97162306a36Sopenharmony_ci 97262306a36Sopenharmony_ci ret = comp_keys(tmp, key); 97362306a36Sopenharmony_ci 97462306a36Sopenharmony_ci if (ret < 0) 97562306a36Sopenharmony_ci low = mid + 1; 97662306a36Sopenharmony_ci else if (ret > 0) 97762306a36Sopenharmony_ci high = mid; 97862306a36Sopenharmony_ci else { 97962306a36Sopenharmony_ci *slot = mid; 98062306a36Sopenharmony_ci return 0; 98162306a36Sopenharmony_ci } 98262306a36Sopenharmony_ci } 98362306a36Sopenharmony_ci *slot = low; 98462306a36Sopenharmony_ci return 1; 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_cistatic void root_add_used(struct btrfs_root *root, u32 size) 98862306a36Sopenharmony_ci{ 98962306a36Sopenharmony_ci spin_lock(&root->accounting_lock); 99062306a36Sopenharmony_ci btrfs_set_root_used(&root->root_item, 99162306a36Sopenharmony_ci btrfs_root_used(&root->root_item) + size); 99262306a36Sopenharmony_ci spin_unlock(&root->accounting_lock); 99362306a36Sopenharmony_ci} 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_cistatic void root_sub_used(struct btrfs_root *root, u32 size) 99662306a36Sopenharmony_ci{ 99762306a36Sopenharmony_ci spin_lock(&root->accounting_lock); 99862306a36Sopenharmony_ci btrfs_set_root_used(&root->root_item, 99962306a36Sopenharmony_ci btrfs_root_used(&root->root_item) - size); 100062306a36Sopenharmony_ci spin_unlock(&root->accounting_lock); 100162306a36Sopenharmony_ci} 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci/* given a node and slot number, this reads the blocks it points to. The 100462306a36Sopenharmony_ci * extent buffer is returned with a reference taken (but unlocked). 100562306a36Sopenharmony_ci */ 100662306a36Sopenharmony_cistruct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, 100762306a36Sopenharmony_ci int slot) 100862306a36Sopenharmony_ci{ 100962306a36Sopenharmony_ci int level = btrfs_header_level(parent); 101062306a36Sopenharmony_ci struct btrfs_tree_parent_check check = { 0 }; 101162306a36Sopenharmony_ci struct extent_buffer *eb; 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci if (slot < 0 || slot >= btrfs_header_nritems(parent)) 101462306a36Sopenharmony_ci return ERR_PTR(-ENOENT); 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci ASSERT(level); 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci check.level = level - 1; 101962306a36Sopenharmony_ci check.transid = btrfs_node_ptr_generation(parent, slot); 102062306a36Sopenharmony_ci check.owner_root = btrfs_header_owner(parent); 102162306a36Sopenharmony_ci check.has_first_key = true; 102262306a36Sopenharmony_ci btrfs_node_key_to_cpu(parent, &check.first_key, slot); 102362306a36Sopenharmony_ci 102462306a36Sopenharmony_ci eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), 102562306a36Sopenharmony_ci &check); 102662306a36Sopenharmony_ci if (IS_ERR(eb)) 102762306a36Sopenharmony_ci return eb; 102862306a36Sopenharmony_ci if (!extent_buffer_uptodate(eb)) { 102962306a36Sopenharmony_ci free_extent_buffer(eb); 103062306a36Sopenharmony_ci return ERR_PTR(-EIO); 103162306a36Sopenharmony_ci } 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_ci return eb; 103462306a36Sopenharmony_ci} 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci/* 103762306a36Sopenharmony_ci * node level balancing, used to make sure nodes are in proper order for 103862306a36Sopenharmony_ci * item deletion. We balance from the top down, so we have to make sure 103962306a36Sopenharmony_ci * that a deletion won't leave an node completely empty later on. 104062306a36Sopenharmony_ci */ 104162306a36Sopenharmony_cistatic noinline int balance_level(struct btrfs_trans_handle *trans, 104262306a36Sopenharmony_ci struct btrfs_root *root, 104362306a36Sopenharmony_ci struct btrfs_path *path, int level) 104462306a36Sopenharmony_ci{ 104562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 104662306a36Sopenharmony_ci struct extent_buffer *right = NULL; 104762306a36Sopenharmony_ci struct extent_buffer *mid; 104862306a36Sopenharmony_ci struct extent_buffer *left = NULL; 104962306a36Sopenharmony_ci struct extent_buffer *parent = NULL; 105062306a36Sopenharmony_ci int ret = 0; 105162306a36Sopenharmony_ci int wret; 105262306a36Sopenharmony_ci int pslot; 105362306a36Sopenharmony_ci int orig_slot = path->slots[level]; 105462306a36Sopenharmony_ci u64 orig_ptr; 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_ci ASSERT(level > 0); 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci mid = path->nodes[level]; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK); 106162306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(mid) != trans->transid); 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci orig_ptr = btrfs_node_blockptr(mid, orig_slot); 106462306a36Sopenharmony_ci 106562306a36Sopenharmony_ci if (level < BTRFS_MAX_LEVEL - 1) { 106662306a36Sopenharmony_ci parent = path->nodes[level + 1]; 106762306a36Sopenharmony_ci pslot = path->slots[level + 1]; 106862306a36Sopenharmony_ci } 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci /* 107162306a36Sopenharmony_ci * deal with the case where there is only one pointer in the root 107262306a36Sopenharmony_ci * by promoting the node below to a root 107362306a36Sopenharmony_ci */ 107462306a36Sopenharmony_ci if (!parent) { 107562306a36Sopenharmony_ci struct extent_buffer *child; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci if (btrfs_header_nritems(mid) != 1) 107862306a36Sopenharmony_ci return 0; 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci /* promote the child to a root */ 108162306a36Sopenharmony_ci child = btrfs_read_node_slot(mid, 0); 108262306a36Sopenharmony_ci if (IS_ERR(child)) { 108362306a36Sopenharmony_ci ret = PTR_ERR(child); 108462306a36Sopenharmony_ci goto out; 108562306a36Sopenharmony_ci } 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci btrfs_tree_lock(child); 108862306a36Sopenharmony_ci ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 108962306a36Sopenharmony_ci BTRFS_NESTING_COW); 109062306a36Sopenharmony_ci if (ret) { 109162306a36Sopenharmony_ci btrfs_tree_unlock(child); 109262306a36Sopenharmony_ci free_extent_buffer(child); 109362306a36Sopenharmony_ci goto out; 109462306a36Sopenharmony_ci } 109562306a36Sopenharmony_ci 109662306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_root(root->node, child, true); 109762306a36Sopenharmony_ci if (ret < 0) { 109862306a36Sopenharmony_ci btrfs_tree_unlock(child); 109962306a36Sopenharmony_ci free_extent_buffer(child); 110062306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 110162306a36Sopenharmony_ci goto out; 110262306a36Sopenharmony_ci } 110362306a36Sopenharmony_ci rcu_assign_pointer(root->node, child); 110462306a36Sopenharmony_ci 110562306a36Sopenharmony_ci add_root_to_dirty_list(root); 110662306a36Sopenharmony_ci btrfs_tree_unlock(child); 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci path->locks[level] = 0; 110962306a36Sopenharmony_ci path->nodes[level] = NULL; 111062306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, mid); 111162306a36Sopenharmony_ci btrfs_tree_unlock(mid); 111262306a36Sopenharmony_ci /* once for the path */ 111362306a36Sopenharmony_ci free_extent_buffer(mid); 111462306a36Sopenharmony_ci 111562306a36Sopenharmony_ci root_sub_used(root, mid->len); 111662306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); 111762306a36Sopenharmony_ci /* once for the root ptr */ 111862306a36Sopenharmony_ci free_extent_buffer_stale(mid); 111962306a36Sopenharmony_ci return 0; 112062306a36Sopenharmony_ci } 112162306a36Sopenharmony_ci if (btrfs_header_nritems(mid) > 112262306a36Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 4) 112362306a36Sopenharmony_ci return 0; 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci if (pslot) { 112662306a36Sopenharmony_ci left = btrfs_read_node_slot(parent, pslot - 1); 112762306a36Sopenharmony_ci if (IS_ERR(left)) { 112862306a36Sopenharmony_ci ret = PTR_ERR(left); 112962306a36Sopenharmony_ci left = NULL; 113062306a36Sopenharmony_ci goto out; 113162306a36Sopenharmony_ci } 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_ci __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 113462306a36Sopenharmony_ci wret = btrfs_cow_block(trans, root, left, 113562306a36Sopenharmony_ci parent, pslot - 1, &left, 113662306a36Sopenharmony_ci BTRFS_NESTING_LEFT_COW); 113762306a36Sopenharmony_ci if (wret) { 113862306a36Sopenharmony_ci ret = wret; 113962306a36Sopenharmony_ci goto out; 114062306a36Sopenharmony_ci } 114162306a36Sopenharmony_ci } 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci if (pslot + 1 < btrfs_header_nritems(parent)) { 114462306a36Sopenharmony_ci right = btrfs_read_node_slot(parent, pslot + 1); 114562306a36Sopenharmony_ci if (IS_ERR(right)) { 114662306a36Sopenharmony_ci ret = PTR_ERR(right); 114762306a36Sopenharmony_ci right = NULL; 114862306a36Sopenharmony_ci goto out; 114962306a36Sopenharmony_ci } 115062306a36Sopenharmony_ci 115162306a36Sopenharmony_ci __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 115262306a36Sopenharmony_ci wret = btrfs_cow_block(trans, root, right, 115362306a36Sopenharmony_ci parent, pslot + 1, &right, 115462306a36Sopenharmony_ci BTRFS_NESTING_RIGHT_COW); 115562306a36Sopenharmony_ci if (wret) { 115662306a36Sopenharmony_ci ret = wret; 115762306a36Sopenharmony_ci goto out; 115862306a36Sopenharmony_ci } 115962306a36Sopenharmony_ci } 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci /* first, try to make some room in the middle buffer */ 116262306a36Sopenharmony_ci if (left) { 116362306a36Sopenharmony_ci orig_slot += btrfs_header_nritems(left); 116462306a36Sopenharmony_ci wret = push_node_left(trans, left, mid, 1); 116562306a36Sopenharmony_ci if (wret < 0) 116662306a36Sopenharmony_ci ret = wret; 116762306a36Sopenharmony_ci } 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci /* 117062306a36Sopenharmony_ci * then try to empty the right most buffer into the middle 117162306a36Sopenharmony_ci */ 117262306a36Sopenharmony_ci if (right) { 117362306a36Sopenharmony_ci wret = push_node_left(trans, mid, right, 1); 117462306a36Sopenharmony_ci if (wret < 0 && wret != -ENOSPC) 117562306a36Sopenharmony_ci ret = wret; 117662306a36Sopenharmony_ci if (btrfs_header_nritems(right) == 0) { 117762306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, right); 117862306a36Sopenharmony_ci btrfs_tree_unlock(right); 117962306a36Sopenharmony_ci ret = btrfs_del_ptr(trans, root, path, level + 1, pslot + 1); 118062306a36Sopenharmony_ci if (ret < 0) { 118162306a36Sopenharmony_ci free_extent_buffer_stale(right); 118262306a36Sopenharmony_ci right = NULL; 118362306a36Sopenharmony_ci goto out; 118462306a36Sopenharmony_ci } 118562306a36Sopenharmony_ci root_sub_used(root, right->len); 118662306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), right, 118762306a36Sopenharmony_ci 0, 1); 118862306a36Sopenharmony_ci free_extent_buffer_stale(right); 118962306a36Sopenharmony_ci right = NULL; 119062306a36Sopenharmony_ci } else { 119162306a36Sopenharmony_ci struct btrfs_disk_key right_key; 119262306a36Sopenharmony_ci btrfs_node_key(right, &right_key, 0); 119362306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, 119462306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REPLACE); 119562306a36Sopenharmony_ci if (ret < 0) { 119662306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 119762306a36Sopenharmony_ci goto out; 119862306a36Sopenharmony_ci } 119962306a36Sopenharmony_ci btrfs_set_node_key(parent, &right_key, pslot + 1); 120062306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, parent); 120162306a36Sopenharmony_ci } 120262306a36Sopenharmony_ci } 120362306a36Sopenharmony_ci if (btrfs_header_nritems(mid) == 1) { 120462306a36Sopenharmony_ci /* 120562306a36Sopenharmony_ci * we're not allowed to leave a node with one item in the 120662306a36Sopenharmony_ci * tree during a delete. A deletion from lower in the tree 120762306a36Sopenharmony_ci * could try to delete the only pointer in this node. 120862306a36Sopenharmony_ci * So, pull some keys from the left. 120962306a36Sopenharmony_ci * There has to be a left pointer at this point because 121062306a36Sopenharmony_ci * otherwise we would have pulled some pointers from the 121162306a36Sopenharmony_ci * right 121262306a36Sopenharmony_ci */ 121362306a36Sopenharmony_ci if (unlikely(!left)) { 121462306a36Sopenharmony_ci btrfs_crit(fs_info, 121562306a36Sopenharmony_ci"missing left child when middle child only has 1 item, parent bytenr %llu level %d mid bytenr %llu root %llu", 121662306a36Sopenharmony_ci parent->start, btrfs_header_level(parent), 121762306a36Sopenharmony_ci mid->start, btrfs_root_id(root)); 121862306a36Sopenharmony_ci ret = -EUCLEAN; 121962306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 122062306a36Sopenharmony_ci goto out; 122162306a36Sopenharmony_ci } 122262306a36Sopenharmony_ci wret = balance_node_right(trans, mid, left); 122362306a36Sopenharmony_ci if (wret < 0) { 122462306a36Sopenharmony_ci ret = wret; 122562306a36Sopenharmony_ci goto out; 122662306a36Sopenharmony_ci } 122762306a36Sopenharmony_ci if (wret == 1) { 122862306a36Sopenharmony_ci wret = push_node_left(trans, left, mid, 1); 122962306a36Sopenharmony_ci if (wret < 0) 123062306a36Sopenharmony_ci ret = wret; 123162306a36Sopenharmony_ci } 123262306a36Sopenharmony_ci BUG_ON(wret == 1); 123362306a36Sopenharmony_ci } 123462306a36Sopenharmony_ci if (btrfs_header_nritems(mid) == 0) { 123562306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, mid); 123662306a36Sopenharmony_ci btrfs_tree_unlock(mid); 123762306a36Sopenharmony_ci ret = btrfs_del_ptr(trans, root, path, level + 1, pslot); 123862306a36Sopenharmony_ci if (ret < 0) { 123962306a36Sopenharmony_ci free_extent_buffer_stale(mid); 124062306a36Sopenharmony_ci mid = NULL; 124162306a36Sopenharmony_ci goto out; 124262306a36Sopenharmony_ci } 124362306a36Sopenharmony_ci root_sub_used(root, mid->len); 124462306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1); 124562306a36Sopenharmony_ci free_extent_buffer_stale(mid); 124662306a36Sopenharmony_ci mid = NULL; 124762306a36Sopenharmony_ci } else { 124862306a36Sopenharmony_ci /* update the parent key to reflect our changes */ 124962306a36Sopenharmony_ci struct btrfs_disk_key mid_key; 125062306a36Sopenharmony_ci btrfs_node_key(mid, &mid_key, 0); 125162306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(parent, pslot, 125262306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REPLACE); 125362306a36Sopenharmony_ci if (ret < 0) { 125462306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 125562306a36Sopenharmony_ci goto out; 125662306a36Sopenharmony_ci } 125762306a36Sopenharmony_ci btrfs_set_node_key(parent, &mid_key, pslot); 125862306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, parent); 125962306a36Sopenharmony_ci } 126062306a36Sopenharmony_ci 126162306a36Sopenharmony_ci /* update the path */ 126262306a36Sopenharmony_ci if (left) { 126362306a36Sopenharmony_ci if (btrfs_header_nritems(left) > orig_slot) { 126462306a36Sopenharmony_ci atomic_inc(&left->refs); 126562306a36Sopenharmony_ci /* left was locked after cow */ 126662306a36Sopenharmony_ci path->nodes[level] = left; 126762306a36Sopenharmony_ci path->slots[level + 1] -= 1; 126862306a36Sopenharmony_ci path->slots[level] = orig_slot; 126962306a36Sopenharmony_ci if (mid) { 127062306a36Sopenharmony_ci btrfs_tree_unlock(mid); 127162306a36Sopenharmony_ci free_extent_buffer(mid); 127262306a36Sopenharmony_ci } 127362306a36Sopenharmony_ci } else { 127462306a36Sopenharmony_ci orig_slot -= btrfs_header_nritems(left); 127562306a36Sopenharmony_ci path->slots[level] = orig_slot; 127662306a36Sopenharmony_ci } 127762306a36Sopenharmony_ci } 127862306a36Sopenharmony_ci /* double check we haven't messed things up */ 127962306a36Sopenharmony_ci if (orig_ptr != 128062306a36Sopenharmony_ci btrfs_node_blockptr(path->nodes[level], path->slots[level])) 128162306a36Sopenharmony_ci BUG(); 128262306a36Sopenharmony_ciout: 128362306a36Sopenharmony_ci if (right) { 128462306a36Sopenharmony_ci btrfs_tree_unlock(right); 128562306a36Sopenharmony_ci free_extent_buffer(right); 128662306a36Sopenharmony_ci } 128762306a36Sopenharmony_ci if (left) { 128862306a36Sopenharmony_ci if (path->nodes[level] != left) 128962306a36Sopenharmony_ci btrfs_tree_unlock(left); 129062306a36Sopenharmony_ci free_extent_buffer(left); 129162306a36Sopenharmony_ci } 129262306a36Sopenharmony_ci return ret; 129362306a36Sopenharmony_ci} 129462306a36Sopenharmony_ci 129562306a36Sopenharmony_ci/* Node balancing for insertion. Here we only split or push nodes around 129662306a36Sopenharmony_ci * when they are completely full. This is also done top down, so we 129762306a36Sopenharmony_ci * have to be pessimistic. 129862306a36Sopenharmony_ci */ 129962306a36Sopenharmony_cistatic noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, 130062306a36Sopenharmony_ci struct btrfs_root *root, 130162306a36Sopenharmony_ci struct btrfs_path *path, int level) 130262306a36Sopenharmony_ci{ 130362306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 130462306a36Sopenharmony_ci struct extent_buffer *right = NULL; 130562306a36Sopenharmony_ci struct extent_buffer *mid; 130662306a36Sopenharmony_ci struct extent_buffer *left = NULL; 130762306a36Sopenharmony_ci struct extent_buffer *parent = NULL; 130862306a36Sopenharmony_ci int ret = 0; 130962306a36Sopenharmony_ci int wret; 131062306a36Sopenharmony_ci int pslot; 131162306a36Sopenharmony_ci int orig_slot = path->slots[level]; 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci if (level == 0) 131462306a36Sopenharmony_ci return 1; 131562306a36Sopenharmony_ci 131662306a36Sopenharmony_ci mid = path->nodes[level]; 131762306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(mid) != trans->transid); 131862306a36Sopenharmony_ci 131962306a36Sopenharmony_ci if (level < BTRFS_MAX_LEVEL - 1) { 132062306a36Sopenharmony_ci parent = path->nodes[level + 1]; 132162306a36Sopenharmony_ci pslot = path->slots[level + 1]; 132262306a36Sopenharmony_ci } 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_ci if (!parent) 132562306a36Sopenharmony_ci return 1; 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci /* first, try to make some room in the middle buffer */ 132862306a36Sopenharmony_ci if (pslot) { 132962306a36Sopenharmony_ci u32 left_nr; 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci left = btrfs_read_node_slot(parent, pslot - 1); 133262306a36Sopenharmony_ci if (IS_ERR(left)) 133362306a36Sopenharmony_ci return PTR_ERR(left); 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci left_nr = btrfs_header_nritems(left); 133862306a36Sopenharmony_ci if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { 133962306a36Sopenharmony_ci wret = 1; 134062306a36Sopenharmony_ci } else { 134162306a36Sopenharmony_ci ret = btrfs_cow_block(trans, root, left, parent, 134262306a36Sopenharmony_ci pslot - 1, &left, 134362306a36Sopenharmony_ci BTRFS_NESTING_LEFT_COW); 134462306a36Sopenharmony_ci if (ret) 134562306a36Sopenharmony_ci wret = 1; 134662306a36Sopenharmony_ci else { 134762306a36Sopenharmony_ci wret = push_node_left(trans, left, mid, 0); 134862306a36Sopenharmony_ci } 134962306a36Sopenharmony_ci } 135062306a36Sopenharmony_ci if (wret < 0) 135162306a36Sopenharmony_ci ret = wret; 135262306a36Sopenharmony_ci if (wret == 0) { 135362306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 135462306a36Sopenharmony_ci orig_slot += left_nr; 135562306a36Sopenharmony_ci btrfs_node_key(mid, &disk_key, 0); 135662306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(parent, pslot, 135762306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REPLACE); 135862306a36Sopenharmony_ci if (ret < 0) { 135962306a36Sopenharmony_ci btrfs_tree_unlock(left); 136062306a36Sopenharmony_ci free_extent_buffer(left); 136162306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 136262306a36Sopenharmony_ci return ret; 136362306a36Sopenharmony_ci } 136462306a36Sopenharmony_ci btrfs_set_node_key(parent, &disk_key, pslot); 136562306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, parent); 136662306a36Sopenharmony_ci if (btrfs_header_nritems(left) > orig_slot) { 136762306a36Sopenharmony_ci path->nodes[level] = left; 136862306a36Sopenharmony_ci path->slots[level + 1] -= 1; 136962306a36Sopenharmony_ci path->slots[level] = orig_slot; 137062306a36Sopenharmony_ci btrfs_tree_unlock(mid); 137162306a36Sopenharmony_ci free_extent_buffer(mid); 137262306a36Sopenharmony_ci } else { 137362306a36Sopenharmony_ci orig_slot -= 137462306a36Sopenharmony_ci btrfs_header_nritems(left); 137562306a36Sopenharmony_ci path->slots[level] = orig_slot; 137662306a36Sopenharmony_ci btrfs_tree_unlock(left); 137762306a36Sopenharmony_ci free_extent_buffer(left); 137862306a36Sopenharmony_ci } 137962306a36Sopenharmony_ci return 0; 138062306a36Sopenharmony_ci } 138162306a36Sopenharmony_ci btrfs_tree_unlock(left); 138262306a36Sopenharmony_ci free_extent_buffer(left); 138362306a36Sopenharmony_ci } 138462306a36Sopenharmony_ci 138562306a36Sopenharmony_ci /* 138662306a36Sopenharmony_ci * then try to empty the right most buffer into the middle 138762306a36Sopenharmony_ci */ 138862306a36Sopenharmony_ci if (pslot + 1 < btrfs_header_nritems(parent)) { 138962306a36Sopenharmony_ci u32 right_nr; 139062306a36Sopenharmony_ci 139162306a36Sopenharmony_ci right = btrfs_read_node_slot(parent, pslot + 1); 139262306a36Sopenharmony_ci if (IS_ERR(right)) 139362306a36Sopenharmony_ci return PTR_ERR(right); 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci right_nr = btrfs_header_nritems(right); 139862306a36Sopenharmony_ci if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) { 139962306a36Sopenharmony_ci wret = 1; 140062306a36Sopenharmony_ci } else { 140162306a36Sopenharmony_ci ret = btrfs_cow_block(trans, root, right, 140262306a36Sopenharmony_ci parent, pslot + 1, 140362306a36Sopenharmony_ci &right, BTRFS_NESTING_RIGHT_COW); 140462306a36Sopenharmony_ci if (ret) 140562306a36Sopenharmony_ci wret = 1; 140662306a36Sopenharmony_ci else { 140762306a36Sopenharmony_ci wret = balance_node_right(trans, right, mid); 140862306a36Sopenharmony_ci } 140962306a36Sopenharmony_ci } 141062306a36Sopenharmony_ci if (wret < 0) 141162306a36Sopenharmony_ci ret = wret; 141262306a36Sopenharmony_ci if (wret == 0) { 141362306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 141462306a36Sopenharmony_ci 141562306a36Sopenharmony_ci btrfs_node_key(right, &disk_key, 0); 141662306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1, 141762306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REPLACE); 141862306a36Sopenharmony_ci if (ret < 0) { 141962306a36Sopenharmony_ci btrfs_tree_unlock(right); 142062306a36Sopenharmony_ci free_extent_buffer(right); 142162306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 142262306a36Sopenharmony_ci return ret; 142362306a36Sopenharmony_ci } 142462306a36Sopenharmony_ci btrfs_set_node_key(parent, &disk_key, pslot + 1); 142562306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, parent); 142662306a36Sopenharmony_ci 142762306a36Sopenharmony_ci if (btrfs_header_nritems(mid) <= orig_slot) { 142862306a36Sopenharmony_ci path->nodes[level] = right; 142962306a36Sopenharmony_ci path->slots[level + 1] += 1; 143062306a36Sopenharmony_ci path->slots[level] = orig_slot - 143162306a36Sopenharmony_ci btrfs_header_nritems(mid); 143262306a36Sopenharmony_ci btrfs_tree_unlock(mid); 143362306a36Sopenharmony_ci free_extent_buffer(mid); 143462306a36Sopenharmony_ci } else { 143562306a36Sopenharmony_ci btrfs_tree_unlock(right); 143662306a36Sopenharmony_ci free_extent_buffer(right); 143762306a36Sopenharmony_ci } 143862306a36Sopenharmony_ci return 0; 143962306a36Sopenharmony_ci } 144062306a36Sopenharmony_ci btrfs_tree_unlock(right); 144162306a36Sopenharmony_ci free_extent_buffer(right); 144262306a36Sopenharmony_ci } 144362306a36Sopenharmony_ci return 1; 144462306a36Sopenharmony_ci} 144562306a36Sopenharmony_ci 144662306a36Sopenharmony_ci/* 144762306a36Sopenharmony_ci * readahead one full node of leaves, finding things that are close 144862306a36Sopenharmony_ci * to the block in 'slot', and triggering ra on them. 144962306a36Sopenharmony_ci */ 145062306a36Sopenharmony_cistatic void reada_for_search(struct btrfs_fs_info *fs_info, 145162306a36Sopenharmony_ci struct btrfs_path *path, 145262306a36Sopenharmony_ci int level, int slot, u64 objectid) 145362306a36Sopenharmony_ci{ 145462306a36Sopenharmony_ci struct extent_buffer *node; 145562306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 145662306a36Sopenharmony_ci u32 nritems; 145762306a36Sopenharmony_ci u64 search; 145862306a36Sopenharmony_ci u64 target; 145962306a36Sopenharmony_ci u64 nread = 0; 146062306a36Sopenharmony_ci u64 nread_max; 146162306a36Sopenharmony_ci u32 nr; 146262306a36Sopenharmony_ci u32 blocksize; 146362306a36Sopenharmony_ci u32 nscan = 0; 146462306a36Sopenharmony_ci 146562306a36Sopenharmony_ci if (level != 1 && path->reada != READA_FORWARD_ALWAYS) 146662306a36Sopenharmony_ci return; 146762306a36Sopenharmony_ci 146862306a36Sopenharmony_ci if (!path->nodes[level]) 146962306a36Sopenharmony_ci return; 147062306a36Sopenharmony_ci 147162306a36Sopenharmony_ci node = path->nodes[level]; 147262306a36Sopenharmony_ci 147362306a36Sopenharmony_ci /* 147462306a36Sopenharmony_ci * Since the time between visiting leaves is much shorter than the time 147562306a36Sopenharmony_ci * between visiting nodes, limit read ahead of nodes to 1, to avoid too 147662306a36Sopenharmony_ci * much IO at once (possibly random). 147762306a36Sopenharmony_ci */ 147862306a36Sopenharmony_ci if (path->reada == READA_FORWARD_ALWAYS) { 147962306a36Sopenharmony_ci if (level > 1) 148062306a36Sopenharmony_ci nread_max = node->fs_info->nodesize; 148162306a36Sopenharmony_ci else 148262306a36Sopenharmony_ci nread_max = SZ_128K; 148362306a36Sopenharmony_ci } else { 148462306a36Sopenharmony_ci nread_max = SZ_64K; 148562306a36Sopenharmony_ci } 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci search = btrfs_node_blockptr(node, slot); 148862306a36Sopenharmony_ci blocksize = fs_info->nodesize; 148962306a36Sopenharmony_ci if (path->reada != READA_FORWARD_ALWAYS) { 149062306a36Sopenharmony_ci struct extent_buffer *eb; 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci eb = find_extent_buffer(fs_info, search); 149362306a36Sopenharmony_ci if (eb) { 149462306a36Sopenharmony_ci free_extent_buffer(eb); 149562306a36Sopenharmony_ci return; 149662306a36Sopenharmony_ci } 149762306a36Sopenharmony_ci } 149862306a36Sopenharmony_ci 149962306a36Sopenharmony_ci target = search; 150062306a36Sopenharmony_ci 150162306a36Sopenharmony_ci nritems = btrfs_header_nritems(node); 150262306a36Sopenharmony_ci nr = slot; 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci while (1) { 150562306a36Sopenharmony_ci if (path->reada == READA_BACK) { 150662306a36Sopenharmony_ci if (nr == 0) 150762306a36Sopenharmony_ci break; 150862306a36Sopenharmony_ci nr--; 150962306a36Sopenharmony_ci } else if (path->reada == READA_FORWARD || 151062306a36Sopenharmony_ci path->reada == READA_FORWARD_ALWAYS) { 151162306a36Sopenharmony_ci nr++; 151262306a36Sopenharmony_ci if (nr >= nritems) 151362306a36Sopenharmony_ci break; 151462306a36Sopenharmony_ci } 151562306a36Sopenharmony_ci if (path->reada == READA_BACK && objectid) { 151662306a36Sopenharmony_ci btrfs_node_key(node, &disk_key, nr); 151762306a36Sopenharmony_ci if (btrfs_disk_key_objectid(&disk_key) != objectid) 151862306a36Sopenharmony_ci break; 151962306a36Sopenharmony_ci } 152062306a36Sopenharmony_ci search = btrfs_node_blockptr(node, nr); 152162306a36Sopenharmony_ci if (path->reada == READA_FORWARD_ALWAYS || 152262306a36Sopenharmony_ci (search <= target && target - search <= 65536) || 152362306a36Sopenharmony_ci (search > target && search - target <= 65536)) { 152462306a36Sopenharmony_ci btrfs_readahead_node_child(node, nr); 152562306a36Sopenharmony_ci nread += blocksize; 152662306a36Sopenharmony_ci } 152762306a36Sopenharmony_ci nscan++; 152862306a36Sopenharmony_ci if (nread > nread_max || nscan > 32) 152962306a36Sopenharmony_ci break; 153062306a36Sopenharmony_ci } 153162306a36Sopenharmony_ci} 153262306a36Sopenharmony_ci 153362306a36Sopenharmony_cistatic noinline void reada_for_balance(struct btrfs_path *path, int level) 153462306a36Sopenharmony_ci{ 153562306a36Sopenharmony_ci struct extent_buffer *parent; 153662306a36Sopenharmony_ci int slot; 153762306a36Sopenharmony_ci int nritems; 153862306a36Sopenharmony_ci 153962306a36Sopenharmony_ci parent = path->nodes[level + 1]; 154062306a36Sopenharmony_ci if (!parent) 154162306a36Sopenharmony_ci return; 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci nritems = btrfs_header_nritems(parent); 154462306a36Sopenharmony_ci slot = path->slots[level + 1]; 154562306a36Sopenharmony_ci 154662306a36Sopenharmony_ci if (slot > 0) 154762306a36Sopenharmony_ci btrfs_readahead_node_child(parent, slot - 1); 154862306a36Sopenharmony_ci if (slot + 1 < nritems) 154962306a36Sopenharmony_ci btrfs_readahead_node_child(parent, slot + 1); 155062306a36Sopenharmony_ci} 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci 155362306a36Sopenharmony_ci/* 155462306a36Sopenharmony_ci * when we walk down the tree, it is usually safe to unlock the higher layers 155562306a36Sopenharmony_ci * in the tree. The exceptions are when our path goes through slot 0, because 155662306a36Sopenharmony_ci * operations on the tree might require changing key pointers higher up in the 155762306a36Sopenharmony_ci * tree. 155862306a36Sopenharmony_ci * 155962306a36Sopenharmony_ci * callers might also have set path->keep_locks, which tells this code to keep 156062306a36Sopenharmony_ci * the lock if the path points to the last slot in the block. This is part of 156162306a36Sopenharmony_ci * walking through the tree, and selecting the next slot in the higher block. 156262306a36Sopenharmony_ci * 156362306a36Sopenharmony_ci * lowest_unlock sets the lowest level in the tree we're allowed to unlock. so 156462306a36Sopenharmony_ci * if lowest_unlock is 1, level 0 won't be unlocked 156562306a36Sopenharmony_ci */ 156662306a36Sopenharmony_cistatic noinline void unlock_up(struct btrfs_path *path, int level, 156762306a36Sopenharmony_ci int lowest_unlock, int min_write_lock_level, 156862306a36Sopenharmony_ci int *write_lock_level) 156962306a36Sopenharmony_ci{ 157062306a36Sopenharmony_ci int i; 157162306a36Sopenharmony_ci int skip_level = level; 157262306a36Sopenharmony_ci bool check_skip = true; 157362306a36Sopenharmony_ci 157462306a36Sopenharmony_ci for (i = level; i < BTRFS_MAX_LEVEL; i++) { 157562306a36Sopenharmony_ci if (!path->nodes[i]) 157662306a36Sopenharmony_ci break; 157762306a36Sopenharmony_ci if (!path->locks[i]) 157862306a36Sopenharmony_ci break; 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci if (check_skip) { 158162306a36Sopenharmony_ci if (path->slots[i] == 0) { 158262306a36Sopenharmony_ci skip_level = i + 1; 158362306a36Sopenharmony_ci continue; 158462306a36Sopenharmony_ci } 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_ci if (path->keep_locks) { 158762306a36Sopenharmony_ci u32 nritems; 158862306a36Sopenharmony_ci 158962306a36Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[i]); 159062306a36Sopenharmony_ci if (nritems < 1 || path->slots[i] >= nritems - 1) { 159162306a36Sopenharmony_ci skip_level = i + 1; 159262306a36Sopenharmony_ci continue; 159362306a36Sopenharmony_ci } 159462306a36Sopenharmony_ci } 159562306a36Sopenharmony_ci } 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci if (i >= lowest_unlock && i > skip_level) { 159862306a36Sopenharmony_ci check_skip = false; 159962306a36Sopenharmony_ci btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 160062306a36Sopenharmony_ci path->locks[i] = 0; 160162306a36Sopenharmony_ci if (write_lock_level && 160262306a36Sopenharmony_ci i > min_write_lock_level && 160362306a36Sopenharmony_ci i <= *write_lock_level) { 160462306a36Sopenharmony_ci *write_lock_level = i - 1; 160562306a36Sopenharmony_ci } 160662306a36Sopenharmony_ci } 160762306a36Sopenharmony_ci } 160862306a36Sopenharmony_ci} 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_ci/* 161162306a36Sopenharmony_ci * Helper function for btrfs_search_slot() and other functions that do a search 161262306a36Sopenharmony_ci * on a btree. The goal is to find a tree block in the cache (the radix tree at 161362306a36Sopenharmony_ci * fs_info->buffer_radix), but if we can't find it, or it's not up to date, read 161462306a36Sopenharmony_ci * its pages from disk. 161562306a36Sopenharmony_ci * 161662306a36Sopenharmony_ci * Returns -EAGAIN, with the path unlocked, if the caller needs to repeat the 161762306a36Sopenharmony_ci * whole btree search, starting again from the current root node. 161862306a36Sopenharmony_ci */ 161962306a36Sopenharmony_cistatic int 162062306a36Sopenharmony_ciread_block_for_search(struct btrfs_root *root, struct btrfs_path *p, 162162306a36Sopenharmony_ci struct extent_buffer **eb_ret, int level, int slot, 162262306a36Sopenharmony_ci const struct btrfs_key *key) 162362306a36Sopenharmony_ci{ 162462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 162562306a36Sopenharmony_ci struct btrfs_tree_parent_check check = { 0 }; 162662306a36Sopenharmony_ci u64 blocknr; 162762306a36Sopenharmony_ci u64 gen; 162862306a36Sopenharmony_ci struct extent_buffer *tmp; 162962306a36Sopenharmony_ci int ret; 163062306a36Sopenharmony_ci int parent_level; 163162306a36Sopenharmony_ci bool unlock_up; 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]); 163462306a36Sopenharmony_ci blocknr = btrfs_node_blockptr(*eb_ret, slot); 163562306a36Sopenharmony_ci gen = btrfs_node_ptr_generation(*eb_ret, slot); 163662306a36Sopenharmony_ci parent_level = btrfs_header_level(*eb_ret); 163762306a36Sopenharmony_ci btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot); 163862306a36Sopenharmony_ci check.has_first_key = true; 163962306a36Sopenharmony_ci check.level = parent_level - 1; 164062306a36Sopenharmony_ci check.transid = gen; 164162306a36Sopenharmony_ci check.owner_root = root->root_key.objectid; 164262306a36Sopenharmony_ci 164362306a36Sopenharmony_ci /* 164462306a36Sopenharmony_ci * If we need to read an extent buffer from disk and we are holding locks 164562306a36Sopenharmony_ci * on upper level nodes, we unlock all the upper nodes before reading the 164662306a36Sopenharmony_ci * extent buffer, and then return -EAGAIN to the caller as it needs to 164762306a36Sopenharmony_ci * restart the search. We don't release the lock on the current level 164862306a36Sopenharmony_ci * because we need to walk this node to figure out which blocks to read. 164962306a36Sopenharmony_ci */ 165062306a36Sopenharmony_ci tmp = find_extent_buffer(fs_info, blocknr); 165162306a36Sopenharmony_ci if (tmp) { 165262306a36Sopenharmony_ci if (p->reada == READA_FORWARD_ALWAYS) 165362306a36Sopenharmony_ci reada_for_search(fs_info, p, level, slot, key->objectid); 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci /* first we do an atomic uptodate check */ 165662306a36Sopenharmony_ci if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { 165762306a36Sopenharmony_ci /* 165862306a36Sopenharmony_ci * Do extra check for first_key, eb can be stale due to 165962306a36Sopenharmony_ci * being cached, read from scrub, or have multiple 166062306a36Sopenharmony_ci * parents (shared tree blocks). 166162306a36Sopenharmony_ci */ 166262306a36Sopenharmony_ci if (btrfs_verify_level_key(tmp, 166362306a36Sopenharmony_ci parent_level - 1, &check.first_key, gen)) { 166462306a36Sopenharmony_ci free_extent_buffer(tmp); 166562306a36Sopenharmony_ci return -EUCLEAN; 166662306a36Sopenharmony_ci } 166762306a36Sopenharmony_ci *eb_ret = tmp; 166862306a36Sopenharmony_ci return 0; 166962306a36Sopenharmony_ci } 167062306a36Sopenharmony_ci 167162306a36Sopenharmony_ci if (p->nowait) { 167262306a36Sopenharmony_ci free_extent_buffer(tmp); 167362306a36Sopenharmony_ci return -EAGAIN; 167462306a36Sopenharmony_ci } 167562306a36Sopenharmony_ci 167662306a36Sopenharmony_ci if (unlock_up) 167762306a36Sopenharmony_ci btrfs_unlock_up_safe(p, level + 1); 167862306a36Sopenharmony_ci 167962306a36Sopenharmony_ci /* now we're allowed to do a blocking uptodate check */ 168062306a36Sopenharmony_ci ret = btrfs_read_extent_buffer(tmp, &check); 168162306a36Sopenharmony_ci if (ret) { 168262306a36Sopenharmony_ci free_extent_buffer(tmp); 168362306a36Sopenharmony_ci btrfs_release_path(p); 168462306a36Sopenharmony_ci return -EIO; 168562306a36Sopenharmony_ci } 168662306a36Sopenharmony_ci if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) { 168762306a36Sopenharmony_ci free_extent_buffer(tmp); 168862306a36Sopenharmony_ci btrfs_release_path(p); 168962306a36Sopenharmony_ci return -EUCLEAN; 169062306a36Sopenharmony_ci } 169162306a36Sopenharmony_ci 169262306a36Sopenharmony_ci if (unlock_up) 169362306a36Sopenharmony_ci ret = -EAGAIN; 169462306a36Sopenharmony_ci 169562306a36Sopenharmony_ci goto out; 169662306a36Sopenharmony_ci } else if (p->nowait) { 169762306a36Sopenharmony_ci return -EAGAIN; 169862306a36Sopenharmony_ci } 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_ci if (unlock_up) { 170162306a36Sopenharmony_ci btrfs_unlock_up_safe(p, level + 1); 170262306a36Sopenharmony_ci ret = -EAGAIN; 170362306a36Sopenharmony_ci } else { 170462306a36Sopenharmony_ci ret = 0; 170562306a36Sopenharmony_ci } 170662306a36Sopenharmony_ci 170762306a36Sopenharmony_ci if (p->reada != READA_NONE) 170862306a36Sopenharmony_ci reada_for_search(fs_info, p, level, slot, key->objectid); 170962306a36Sopenharmony_ci 171062306a36Sopenharmony_ci tmp = read_tree_block(fs_info, blocknr, &check); 171162306a36Sopenharmony_ci if (IS_ERR(tmp)) { 171262306a36Sopenharmony_ci btrfs_release_path(p); 171362306a36Sopenharmony_ci return PTR_ERR(tmp); 171462306a36Sopenharmony_ci } 171562306a36Sopenharmony_ci /* 171662306a36Sopenharmony_ci * If the read above didn't mark this buffer up to date, 171762306a36Sopenharmony_ci * it will never end up being up to date. Set ret to EIO now 171862306a36Sopenharmony_ci * and give up so that our caller doesn't loop forever 171962306a36Sopenharmony_ci * on our EAGAINs. 172062306a36Sopenharmony_ci */ 172162306a36Sopenharmony_ci if (!extent_buffer_uptodate(tmp)) 172262306a36Sopenharmony_ci ret = -EIO; 172362306a36Sopenharmony_ci 172462306a36Sopenharmony_ciout: 172562306a36Sopenharmony_ci if (ret == 0) { 172662306a36Sopenharmony_ci *eb_ret = tmp; 172762306a36Sopenharmony_ci } else { 172862306a36Sopenharmony_ci free_extent_buffer(tmp); 172962306a36Sopenharmony_ci btrfs_release_path(p); 173062306a36Sopenharmony_ci } 173162306a36Sopenharmony_ci 173262306a36Sopenharmony_ci return ret; 173362306a36Sopenharmony_ci} 173462306a36Sopenharmony_ci 173562306a36Sopenharmony_ci/* 173662306a36Sopenharmony_ci * helper function for btrfs_search_slot. This does all of the checks 173762306a36Sopenharmony_ci * for node-level blocks and does any balancing required based on 173862306a36Sopenharmony_ci * the ins_len. 173962306a36Sopenharmony_ci * 174062306a36Sopenharmony_ci * If no extra work was required, zero is returned. If we had to 174162306a36Sopenharmony_ci * drop the path, -EAGAIN is returned and btrfs_search_slot must 174262306a36Sopenharmony_ci * start over 174362306a36Sopenharmony_ci */ 174462306a36Sopenharmony_cistatic int 174562306a36Sopenharmony_cisetup_nodes_for_search(struct btrfs_trans_handle *trans, 174662306a36Sopenharmony_ci struct btrfs_root *root, struct btrfs_path *p, 174762306a36Sopenharmony_ci struct extent_buffer *b, int level, int ins_len, 174862306a36Sopenharmony_ci int *write_lock_level) 174962306a36Sopenharmony_ci{ 175062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 175162306a36Sopenharmony_ci int ret = 0; 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_ci if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= 175462306a36Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) { 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci if (*write_lock_level < level + 1) { 175762306a36Sopenharmony_ci *write_lock_level = level + 1; 175862306a36Sopenharmony_ci btrfs_release_path(p); 175962306a36Sopenharmony_ci return -EAGAIN; 176062306a36Sopenharmony_ci } 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci reada_for_balance(p, level); 176362306a36Sopenharmony_ci ret = split_node(trans, root, p, level); 176462306a36Sopenharmony_ci 176562306a36Sopenharmony_ci b = p->nodes[level]; 176662306a36Sopenharmony_ci } else if (ins_len < 0 && btrfs_header_nritems(b) < 176762306a36Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) / 2) { 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci if (*write_lock_level < level + 1) { 177062306a36Sopenharmony_ci *write_lock_level = level + 1; 177162306a36Sopenharmony_ci btrfs_release_path(p); 177262306a36Sopenharmony_ci return -EAGAIN; 177362306a36Sopenharmony_ci } 177462306a36Sopenharmony_ci 177562306a36Sopenharmony_ci reada_for_balance(p, level); 177662306a36Sopenharmony_ci ret = balance_level(trans, root, p, level); 177762306a36Sopenharmony_ci if (ret) 177862306a36Sopenharmony_ci return ret; 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci b = p->nodes[level]; 178162306a36Sopenharmony_ci if (!b) { 178262306a36Sopenharmony_ci btrfs_release_path(p); 178362306a36Sopenharmony_ci return -EAGAIN; 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci BUG_ON(btrfs_header_nritems(b) == 1); 178662306a36Sopenharmony_ci } 178762306a36Sopenharmony_ci return ret; 178862306a36Sopenharmony_ci} 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_ciint btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *path, 179162306a36Sopenharmony_ci u64 iobjectid, u64 ioff, u8 key_type, 179262306a36Sopenharmony_ci struct btrfs_key *found_key) 179362306a36Sopenharmony_ci{ 179462306a36Sopenharmony_ci int ret; 179562306a36Sopenharmony_ci struct btrfs_key key; 179662306a36Sopenharmony_ci struct extent_buffer *eb; 179762306a36Sopenharmony_ci 179862306a36Sopenharmony_ci ASSERT(path); 179962306a36Sopenharmony_ci ASSERT(found_key); 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci key.type = key_type; 180262306a36Sopenharmony_ci key.objectid = iobjectid; 180362306a36Sopenharmony_ci key.offset = ioff; 180462306a36Sopenharmony_ci 180562306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0); 180662306a36Sopenharmony_ci if (ret < 0) 180762306a36Sopenharmony_ci return ret; 180862306a36Sopenharmony_ci 180962306a36Sopenharmony_ci eb = path->nodes[0]; 181062306a36Sopenharmony_ci if (ret && path->slots[0] >= btrfs_header_nritems(eb)) { 181162306a36Sopenharmony_ci ret = btrfs_next_leaf(fs_root, path); 181262306a36Sopenharmony_ci if (ret) 181362306a36Sopenharmony_ci return ret; 181462306a36Sopenharmony_ci eb = path->nodes[0]; 181562306a36Sopenharmony_ci } 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci btrfs_item_key_to_cpu(eb, found_key, path->slots[0]); 181862306a36Sopenharmony_ci if (found_key->type != key.type || 181962306a36Sopenharmony_ci found_key->objectid != key.objectid) 182062306a36Sopenharmony_ci return 1; 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ci return 0; 182362306a36Sopenharmony_ci} 182462306a36Sopenharmony_ci 182562306a36Sopenharmony_cistatic struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, 182662306a36Sopenharmony_ci struct btrfs_path *p, 182762306a36Sopenharmony_ci int write_lock_level) 182862306a36Sopenharmony_ci{ 182962306a36Sopenharmony_ci struct extent_buffer *b; 183062306a36Sopenharmony_ci int root_lock = 0; 183162306a36Sopenharmony_ci int level = 0; 183262306a36Sopenharmony_ci 183362306a36Sopenharmony_ci if (p->search_commit_root) { 183462306a36Sopenharmony_ci b = root->commit_root; 183562306a36Sopenharmony_ci atomic_inc(&b->refs); 183662306a36Sopenharmony_ci level = btrfs_header_level(b); 183762306a36Sopenharmony_ci /* 183862306a36Sopenharmony_ci * Ensure that all callers have set skip_locking when 183962306a36Sopenharmony_ci * p->search_commit_root = 1. 184062306a36Sopenharmony_ci */ 184162306a36Sopenharmony_ci ASSERT(p->skip_locking == 1); 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci goto out; 184462306a36Sopenharmony_ci } 184562306a36Sopenharmony_ci 184662306a36Sopenharmony_ci if (p->skip_locking) { 184762306a36Sopenharmony_ci b = btrfs_root_node(root); 184862306a36Sopenharmony_ci level = btrfs_header_level(b); 184962306a36Sopenharmony_ci goto out; 185062306a36Sopenharmony_ci } 185162306a36Sopenharmony_ci 185262306a36Sopenharmony_ci /* We try very hard to do read locks on the root */ 185362306a36Sopenharmony_ci root_lock = BTRFS_READ_LOCK; 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci /* 185662306a36Sopenharmony_ci * If the level is set to maximum, we can skip trying to get the read 185762306a36Sopenharmony_ci * lock. 185862306a36Sopenharmony_ci */ 185962306a36Sopenharmony_ci if (write_lock_level < BTRFS_MAX_LEVEL) { 186062306a36Sopenharmony_ci /* 186162306a36Sopenharmony_ci * We don't know the level of the root node until we actually 186262306a36Sopenharmony_ci * have it read locked 186362306a36Sopenharmony_ci */ 186462306a36Sopenharmony_ci if (p->nowait) { 186562306a36Sopenharmony_ci b = btrfs_try_read_lock_root_node(root); 186662306a36Sopenharmony_ci if (IS_ERR(b)) 186762306a36Sopenharmony_ci return b; 186862306a36Sopenharmony_ci } else { 186962306a36Sopenharmony_ci b = btrfs_read_lock_root_node(root); 187062306a36Sopenharmony_ci } 187162306a36Sopenharmony_ci level = btrfs_header_level(b); 187262306a36Sopenharmony_ci if (level > write_lock_level) 187362306a36Sopenharmony_ci goto out; 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci /* Whoops, must trade for write lock */ 187662306a36Sopenharmony_ci btrfs_tree_read_unlock(b); 187762306a36Sopenharmony_ci free_extent_buffer(b); 187862306a36Sopenharmony_ci } 187962306a36Sopenharmony_ci 188062306a36Sopenharmony_ci b = btrfs_lock_root_node(root); 188162306a36Sopenharmony_ci root_lock = BTRFS_WRITE_LOCK; 188262306a36Sopenharmony_ci 188362306a36Sopenharmony_ci /* The level might have changed, check again */ 188462306a36Sopenharmony_ci level = btrfs_header_level(b); 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ciout: 188762306a36Sopenharmony_ci /* 188862306a36Sopenharmony_ci * The root may have failed to write out at some point, and thus is no 188962306a36Sopenharmony_ci * longer valid, return an error in this case. 189062306a36Sopenharmony_ci */ 189162306a36Sopenharmony_ci if (!extent_buffer_uptodate(b)) { 189262306a36Sopenharmony_ci if (root_lock) 189362306a36Sopenharmony_ci btrfs_tree_unlock_rw(b, root_lock); 189462306a36Sopenharmony_ci free_extent_buffer(b); 189562306a36Sopenharmony_ci return ERR_PTR(-EIO); 189662306a36Sopenharmony_ci } 189762306a36Sopenharmony_ci 189862306a36Sopenharmony_ci p->nodes[level] = b; 189962306a36Sopenharmony_ci if (!p->skip_locking) 190062306a36Sopenharmony_ci p->locks[level] = root_lock; 190162306a36Sopenharmony_ci /* 190262306a36Sopenharmony_ci * Callers are responsible for dropping b's references. 190362306a36Sopenharmony_ci */ 190462306a36Sopenharmony_ci return b; 190562306a36Sopenharmony_ci} 190662306a36Sopenharmony_ci 190762306a36Sopenharmony_ci/* 190862306a36Sopenharmony_ci * Replace the extent buffer at the lowest level of the path with a cloned 190962306a36Sopenharmony_ci * version. The purpose is to be able to use it safely, after releasing the 191062306a36Sopenharmony_ci * commit root semaphore, even if relocation is happening in parallel, the 191162306a36Sopenharmony_ci * transaction used for relocation is committed and the extent buffer is 191262306a36Sopenharmony_ci * reallocated in the next transaction. 191362306a36Sopenharmony_ci * 191462306a36Sopenharmony_ci * This is used in a context where the caller does not prevent transaction 191562306a36Sopenharmony_ci * commits from happening, either by holding a transaction handle or holding 191662306a36Sopenharmony_ci * some lock, while it's doing searches through a commit root. 191762306a36Sopenharmony_ci * At the moment it's only used for send operations. 191862306a36Sopenharmony_ci */ 191962306a36Sopenharmony_cistatic int finish_need_commit_sem_search(struct btrfs_path *path) 192062306a36Sopenharmony_ci{ 192162306a36Sopenharmony_ci const int i = path->lowest_level; 192262306a36Sopenharmony_ci const int slot = path->slots[i]; 192362306a36Sopenharmony_ci struct extent_buffer *lowest = path->nodes[i]; 192462306a36Sopenharmony_ci struct extent_buffer *clone; 192562306a36Sopenharmony_ci 192662306a36Sopenharmony_ci ASSERT(path->need_commit_sem); 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci if (!lowest) 192962306a36Sopenharmony_ci return 0; 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci lockdep_assert_held_read(&lowest->fs_info->commit_root_sem); 193262306a36Sopenharmony_ci 193362306a36Sopenharmony_ci clone = btrfs_clone_extent_buffer(lowest); 193462306a36Sopenharmony_ci if (!clone) 193562306a36Sopenharmony_ci return -ENOMEM; 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci btrfs_release_path(path); 193862306a36Sopenharmony_ci path->nodes[i] = clone; 193962306a36Sopenharmony_ci path->slots[i] = slot; 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci return 0; 194262306a36Sopenharmony_ci} 194362306a36Sopenharmony_ci 194462306a36Sopenharmony_cistatic inline int search_for_key_slot(struct extent_buffer *eb, 194562306a36Sopenharmony_ci int search_low_slot, 194662306a36Sopenharmony_ci const struct btrfs_key *key, 194762306a36Sopenharmony_ci int prev_cmp, 194862306a36Sopenharmony_ci int *slot) 194962306a36Sopenharmony_ci{ 195062306a36Sopenharmony_ci /* 195162306a36Sopenharmony_ci * If a previous call to btrfs_bin_search() on a parent node returned an 195262306a36Sopenharmony_ci * exact match (prev_cmp == 0), we can safely assume the target key will 195362306a36Sopenharmony_ci * always be at slot 0 on lower levels, since each key pointer 195462306a36Sopenharmony_ci * (struct btrfs_key_ptr) refers to the lowest key accessible from the 195562306a36Sopenharmony_ci * subtree it points to. Thus we can skip searching lower levels. 195662306a36Sopenharmony_ci */ 195762306a36Sopenharmony_ci if (prev_cmp == 0) { 195862306a36Sopenharmony_ci *slot = 0; 195962306a36Sopenharmony_ci return 0; 196062306a36Sopenharmony_ci } 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci return btrfs_bin_search(eb, search_low_slot, key, slot); 196362306a36Sopenharmony_ci} 196462306a36Sopenharmony_ci 196562306a36Sopenharmony_cistatic int search_leaf(struct btrfs_trans_handle *trans, 196662306a36Sopenharmony_ci struct btrfs_root *root, 196762306a36Sopenharmony_ci const struct btrfs_key *key, 196862306a36Sopenharmony_ci struct btrfs_path *path, 196962306a36Sopenharmony_ci int ins_len, 197062306a36Sopenharmony_ci int prev_cmp) 197162306a36Sopenharmony_ci{ 197262306a36Sopenharmony_ci struct extent_buffer *leaf = path->nodes[0]; 197362306a36Sopenharmony_ci int leaf_free_space = -1; 197462306a36Sopenharmony_ci int search_low_slot = 0; 197562306a36Sopenharmony_ci int ret; 197662306a36Sopenharmony_ci bool do_bin_search = true; 197762306a36Sopenharmony_ci 197862306a36Sopenharmony_ci /* 197962306a36Sopenharmony_ci * If we are doing an insertion, the leaf has enough free space and the 198062306a36Sopenharmony_ci * destination slot for the key is not slot 0, then we can unlock our 198162306a36Sopenharmony_ci * write lock on the parent, and any other upper nodes, before doing the 198262306a36Sopenharmony_ci * binary search on the leaf (with search_for_key_slot()), allowing other 198362306a36Sopenharmony_ci * tasks to lock the parent and any other upper nodes. 198462306a36Sopenharmony_ci */ 198562306a36Sopenharmony_ci if (ins_len > 0) { 198662306a36Sopenharmony_ci /* 198762306a36Sopenharmony_ci * Cache the leaf free space, since we will need it later and it 198862306a36Sopenharmony_ci * will not change until then. 198962306a36Sopenharmony_ci */ 199062306a36Sopenharmony_ci leaf_free_space = btrfs_leaf_free_space(leaf); 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci /* 199362306a36Sopenharmony_ci * !path->locks[1] means we have a single node tree, the leaf is 199462306a36Sopenharmony_ci * the root of the tree. 199562306a36Sopenharmony_ci */ 199662306a36Sopenharmony_ci if (path->locks[1] && leaf_free_space >= ins_len) { 199762306a36Sopenharmony_ci struct btrfs_disk_key first_key; 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci ASSERT(btrfs_header_nritems(leaf) > 0); 200062306a36Sopenharmony_ci btrfs_item_key(leaf, &first_key, 0); 200162306a36Sopenharmony_ci 200262306a36Sopenharmony_ci /* 200362306a36Sopenharmony_ci * Doing the extra comparison with the first key is cheap, 200462306a36Sopenharmony_ci * taking into account that the first key is very likely 200562306a36Sopenharmony_ci * already in a cache line because it immediately follows 200662306a36Sopenharmony_ci * the extent buffer's header and we have recently accessed 200762306a36Sopenharmony_ci * the header's level field. 200862306a36Sopenharmony_ci */ 200962306a36Sopenharmony_ci ret = comp_keys(&first_key, key); 201062306a36Sopenharmony_ci if (ret < 0) { 201162306a36Sopenharmony_ci /* 201262306a36Sopenharmony_ci * The first key is smaller than the key we want 201362306a36Sopenharmony_ci * to insert, so we are safe to unlock all upper 201462306a36Sopenharmony_ci * nodes and we have to do the binary search. 201562306a36Sopenharmony_ci * 201662306a36Sopenharmony_ci * We do use btrfs_unlock_up_safe() and not 201762306a36Sopenharmony_ci * unlock_up() because the later does not unlock 201862306a36Sopenharmony_ci * nodes with a slot of 0 - we can safely unlock 201962306a36Sopenharmony_ci * any node even if its slot is 0 since in this 202062306a36Sopenharmony_ci * case the key does not end up at slot 0 of the 202162306a36Sopenharmony_ci * leaf and there's no need to split the leaf. 202262306a36Sopenharmony_ci */ 202362306a36Sopenharmony_ci btrfs_unlock_up_safe(path, 1); 202462306a36Sopenharmony_ci search_low_slot = 1; 202562306a36Sopenharmony_ci } else { 202662306a36Sopenharmony_ci /* 202762306a36Sopenharmony_ci * The first key is >= then the key we want to 202862306a36Sopenharmony_ci * insert, so we can skip the binary search as 202962306a36Sopenharmony_ci * the target key will be at slot 0. 203062306a36Sopenharmony_ci * 203162306a36Sopenharmony_ci * We can not unlock upper nodes when the key is 203262306a36Sopenharmony_ci * less than the first key, because we will need 203362306a36Sopenharmony_ci * to update the key at slot 0 of the parent node 203462306a36Sopenharmony_ci * and possibly of other upper nodes too. 203562306a36Sopenharmony_ci * If the key matches the first key, then we can 203662306a36Sopenharmony_ci * unlock all the upper nodes, using 203762306a36Sopenharmony_ci * btrfs_unlock_up_safe() instead of unlock_up() 203862306a36Sopenharmony_ci * as stated above. 203962306a36Sopenharmony_ci */ 204062306a36Sopenharmony_ci if (ret == 0) 204162306a36Sopenharmony_ci btrfs_unlock_up_safe(path, 1); 204262306a36Sopenharmony_ci /* 204362306a36Sopenharmony_ci * ret is already 0 or 1, matching the result of 204462306a36Sopenharmony_ci * a btrfs_bin_search() call, so there is no need 204562306a36Sopenharmony_ci * to adjust it. 204662306a36Sopenharmony_ci */ 204762306a36Sopenharmony_ci do_bin_search = false; 204862306a36Sopenharmony_ci path->slots[0] = 0; 204962306a36Sopenharmony_ci } 205062306a36Sopenharmony_ci } 205162306a36Sopenharmony_ci } 205262306a36Sopenharmony_ci 205362306a36Sopenharmony_ci if (do_bin_search) { 205462306a36Sopenharmony_ci ret = search_for_key_slot(leaf, search_low_slot, key, 205562306a36Sopenharmony_ci prev_cmp, &path->slots[0]); 205662306a36Sopenharmony_ci if (ret < 0) 205762306a36Sopenharmony_ci return ret; 205862306a36Sopenharmony_ci } 205962306a36Sopenharmony_ci 206062306a36Sopenharmony_ci if (ins_len > 0) { 206162306a36Sopenharmony_ci /* 206262306a36Sopenharmony_ci * Item key already exists. In this case, if we are allowed to 206362306a36Sopenharmony_ci * insert the item (for example, in dir_item case, item key 206462306a36Sopenharmony_ci * collision is allowed), it will be merged with the original 206562306a36Sopenharmony_ci * item. Only the item size grows, no new btrfs item will be 206662306a36Sopenharmony_ci * added. If search_for_extension is not set, ins_len already 206762306a36Sopenharmony_ci * accounts the size btrfs_item, deduct it here so leaf space 206862306a36Sopenharmony_ci * check will be correct. 206962306a36Sopenharmony_ci */ 207062306a36Sopenharmony_ci if (ret == 0 && !path->search_for_extension) { 207162306a36Sopenharmony_ci ASSERT(ins_len >= sizeof(struct btrfs_item)); 207262306a36Sopenharmony_ci ins_len -= sizeof(struct btrfs_item); 207362306a36Sopenharmony_ci } 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci ASSERT(leaf_free_space >= 0); 207662306a36Sopenharmony_ci 207762306a36Sopenharmony_ci if (leaf_free_space < ins_len) { 207862306a36Sopenharmony_ci int err; 207962306a36Sopenharmony_ci 208062306a36Sopenharmony_ci err = split_leaf(trans, root, key, path, ins_len, 208162306a36Sopenharmony_ci (ret == 0)); 208262306a36Sopenharmony_ci ASSERT(err <= 0); 208362306a36Sopenharmony_ci if (WARN_ON(err > 0)) 208462306a36Sopenharmony_ci err = -EUCLEAN; 208562306a36Sopenharmony_ci if (err) 208662306a36Sopenharmony_ci ret = err; 208762306a36Sopenharmony_ci } 208862306a36Sopenharmony_ci } 208962306a36Sopenharmony_ci 209062306a36Sopenharmony_ci return ret; 209162306a36Sopenharmony_ci} 209262306a36Sopenharmony_ci 209362306a36Sopenharmony_ci/* 209462306a36Sopenharmony_ci * btrfs_search_slot - look for a key in a tree and perform necessary 209562306a36Sopenharmony_ci * modifications to preserve tree invariants. 209662306a36Sopenharmony_ci * 209762306a36Sopenharmony_ci * @trans: Handle of transaction, used when modifying the tree 209862306a36Sopenharmony_ci * @p: Holds all btree nodes along the search path 209962306a36Sopenharmony_ci * @root: The root node of the tree 210062306a36Sopenharmony_ci * @key: The key we are looking for 210162306a36Sopenharmony_ci * @ins_len: Indicates purpose of search: 210262306a36Sopenharmony_ci * >0 for inserts it's size of item inserted (*) 210362306a36Sopenharmony_ci * <0 for deletions 210462306a36Sopenharmony_ci * 0 for plain searches, not modifying the tree 210562306a36Sopenharmony_ci * 210662306a36Sopenharmony_ci * (*) If size of item inserted doesn't include 210762306a36Sopenharmony_ci * sizeof(struct btrfs_item), then p->search_for_extension must 210862306a36Sopenharmony_ci * be set. 210962306a36Sopenharmony_ci * @cow: boolean should CoW operations be performed. Must always be 1 211062306a36Sopenharmony_ci * when modifying the tree. 211162306a36Sopenharmony_ci * 211262306a36Sopenharmony_ci * If @ins_len > 0, nodes and leaves will be split as we walk down the tree. 211362306a36Sopenharmony_ci * If @ins_len < 0, nodes will be merged as we walk down the tree (if possible) 211462306a36Sopenharmony_ci * 211562306a36Sopenharmony_ci * If @key is found, 0 is returned and you can find the item in the leaf level 211662306a36Sopenharmony_ci * of the path (level 0) 211762306a36Sopenharmony_ci * 211862306a36Sopenharmony_ci * If @key isn't found, 1 is returned and the leaf level of the path (level 0) 211962306a36Sopenharmony_ci * points to the slot where it should be inserted 212062306a36Sopenharmony_ci * 212162306a36Sopenharmony_ci * If an error is encountered while searching the tree a negative error number 212262306a36Sopenharmony_ci * is returned 212362306a36Sopenharmony_ci */ 212462306a36Sopenharmony_ciint btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root, 212562306a36Sopenharmony_ci const struct btrfs_key *key, struct btrfs_path *p, 212662306a36Sopenharmony_ci int ins_len, int cow) 212762306a36Sopenharmony_ci{ 212862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 212962306a36Sopenharmony_ci struct extent_buffer *b; 213062306a36Sopenharmony_ci int slot; 213162306a36Sopenharmony_ci int ret; 213262306a36Sopenharmony_ci int err; 213362306a36Sopenharmony_ci int level; 213462306a36Sopenharmony_ci int lowest_unlock = 1; 213562306a36Sopenharmony_ci /* everything at write_lock_level or lower must be write locked */ 213662306a36Sopenharmony_ci int write_lock_level = 0; 213762306a36Sopenharmony_ci u8 lowest_level = 0; 213862306a36Sopenharmony_ci int min_write_lock_level; 213962306a36Sopenharmony_ci int prev_cmp; 214062306a36Sopenharmony_ci 214162306a36Sopenharmony_ci might_sleep(); 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_ci lowest_level = p->lowest_level; 214462306a36Sopenharmony_ci WARN_ON(lowest_level && ins_len > 0); 214562306a36Sopenharmony_ci WARN_ON(p->nodes[0] != NULL); 214662306a36Sopenharmony_ci BUG_ON(!cow && ins_len); 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_ci /* 214962306a36Sopenharmony_ci * For now only allow nowait for read only operations. There's no 215062306a36Sopenharmony_ci * strict reason why we can't, we just only need it for reads so it's 215162306a36Sopenharmony_ci * only implemented for reads. 215262306a36Sopenharmony_ci */ 215362306a36Sopenharmony_ci ASSERT(!p->nowait || !cow); 215462306a36Sopenharmony_ci 215562306a36Sopenharmony_ci if (ins_len < 0) { 215662306a36Sopenharmony_ci lowest_unlock = 2; 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci /* when we are removing items, we might have to go up to level 215962306a36Sopenharmony_ci * two as we update tree pointers Make sure we keep write 216062306a36Sopenharmony_ci * for those levels as well 216162306a36Sopenharmony_ci */ 216262306a36Sopenharmony_ci write_lock_level = 2; 216362306a36Sopenharmony_ci } else if (ins_len > 0) { 216462306a36Sopenharmony_ci /* 216562306a36Sopenharmony_ci * for inserting items, make sure we have a write lock on 216662306a36Sopenharmony_ci * level 1 so we can update keys 216762306a36Sopenharmony_ci */ 216862306a36Sopenharmony_ci write_lock_level = 1; 216962306a36Sopenharmony_ci } 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci if (!cow) 217262306a36Sopenharmony_ci write_lock_level = -1; 217362306a36Sopenharmony_ci 217462306a36Sopenharmony_ci if (cow && (p->keep_locks || p->lowest_level)) 217562306a36Sopenharmony_ci write_lock_level = BTRFS_MAX_LEVEL; 217662306a36Sopenharmony_ci 217762306a36Sopenharmony_ci min_write_lock_level = write_lock_level; 217862306a36Sopenharmony_ci 217962306a36Sopenharmony_ci if (p->need_commit_sem) { 218062306a36Sopenharmony_ci ASSERT(p->search_commit_root); 218162306a36Sopenharmony_ci if (p->nowait) { 218262306a36Sopenharmony_ci if (!down_read_trylock(&fs_info->commit_root_sem)) 218362306a36Sopenharmony_ci return -EAGAIN; 218462306a36Sopenharmony_ci } else { 218562306a36Sopenharmony_ci down_read(&fs_info->commit_root_sem); 218662306a36Sopenharmony_ci } 218762306a36Sopenharmony_ci } 218862306a36Sopenharmony_ci 218962306a36Sopenharmony_ciagain: 219062306a36Sopenharmony_ci prev_cmp = -1; 219162306a36Sopenharmony_ci b = btrfs_search_slot_get_root(root, p, write_lock_level); 219262306a36Sopenharmony_ci if (IS_ERR(b)) { 219362306a36Sopenharmony_ci ret = PTR_ERR(b); 219462306a36Sopenharmony_ci goto done; 219562306a36Sopenharmony_ci } 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci while (b) { 219862306a36Sopenharmony_ci int dec = 0; 219962306a36Sopenharmony_ci 220062306a36Sopenharmony_ci level = btrfs_header_level(b); 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_ci if (cow) { 220362306a36Sopenharmony_ci bool last_level = (level == (BTRFS_MAX_LEVEL - 1)); 220462306a36Sopenharmony_ci 220562306a36Sopenharmony_ci /* 220662306a36Sopenharmony_ci * if we don't really need to cow this block 220762306a36Sopenharmony_ci * then we don't want to set the path blocking, 220862306a36Sopenharmony_ci * so we test it here 220962306a36Sopenharmony_ci */ 221062306a36Sopenharmony_ci if (!should_cow_block(trans, root, b)) 221162306a36Sopenharmony_ci goto cow_done; 221262306a36Sopenharmony_ci 221362306a36Sopenharmony_ci /* 221462306a36Sopenharmony_ci * must have write locks on this node and the 221562306a36Sopenharmony_ci * parent 221662306a36Sopenharmony_ci */ 221762306a36Sopenharmony_ci if (level > write_lock_level || 221862306a36Sopenharmony_ci (level + 1 > write_lock_level && 221962306a36Sopenharmony_ci level + 1 < BTRFS_MAX_LEVEL && 222062306a36Sopenharmony_ci p->nodes[level + 1])) { 222162306a36Sopenharmony_ci write_lock_level = level + 1; 222262306a36Sopenharmony_ci btrfs_release_path(p); 222362306a36Sopenharmony_ci goto again; 222462306a36Sopenharmony_ci } 222562306a36Sopenharmony_ci 222662306a36Sopenharmony_ci if (last_level) 222762306a36Sopenharmony_ci err = btrfs_cow_block(trans, root, b, NULL, 0, 222862306a36Sopenharmony_ci &b, 222962306a36Sopenharmony_ci BTRFS_NESTING_COW); 223062306a36Sopenharmony_ci else 223162306a36Sopenharmony_ci err = btrfs_cow_block(trans, root, b, 223262306a36Sopenharmony_ci p->nodes[level + 1], 223362306a36Sopenharmony_ci p->slots[level + 1], &b, 223462306a36Sopenharmony_ci BTRFS_NESTING_COW); 223562306a36Sopenharmony_ci if (err) { 223662306a36Sopenharmony_ci ret = err; 223762306a36Sopenharmony_ci goto done; 223862306a36Sopenharmony_ci } 223962306a36Sopenharmony_ci } 224062306a36Sopenharmony_cicow_done: 224162306a36Sopenharmony_ci p->nodes[level] = b; 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci /* 224462306a36Sopenharmony_ci * we have a lock on b and as long as we aren't changing 224562306a36Sopenharmony_ci * the tree, there is no way to for the items in b to change. 224662306a36Sopenharmony_ci * It is safe to drop the lock on our parent before we 224762306a36Sopenharmony_ci * go through the expensive btree search on b. 224862306a36Sopenharmony_ci * 224962306a36Sopenharmony_ci * If we're inserting or deleting (ins_len != 0), then we might 225062306a36Sopenharmony_ci * be changing slot zero, which may require changing the parent. 225162306a36Sopenharmony_ci * So, we can't drop the lock until after we know which slot 225262306a36Sopenharmony_ci * we're operating on. 225362306a36Sopenharmony_ci */ 225462306a36Sopenharmony_ci if (!ins_len && !p->keep_locks) { 225562306a36Sopenharmony_ci int u = level + 1; 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_ci if (u < BTRFS_MAX_LEVEL && p->locks[u]) { 225862306a36Sopenharmony_ci btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]); 225962306a36Sopenharmony_ci p->locks[u] = 0; 226062306a36Sopenharmony_ci } 226162306a36Sopenharmony_ci } 226262306a36Sopenharmony_ci 226362306a36Sopenharmony_ci if (level == 0) { 226462306a36Sopenharmony_ci if (ins_len > 0) 226562306a36Sopenharmony_ci ASSERT(write_lock_level >= 1); 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci ret = search_leaf(trans, root, key, p, ins_len, prev_cmp); 226862306a36Sopenharmony_ci if (!p->search_for_split) 226962306a36Sopenharmony_ci unlock_up(p, level, lowest_unlock, 227062306a36Sopenharmony_ci min_write_lock_level, NULL); 227162306a36Sopenharmony_ci goto done; 227262306a36Sopenharmony_ci } 227362306a36Sopenharmony_ci 227462306a36Sopenharmony_ci ret = search_for_key_slot(b, 0, key, prev_cmp, &slot); 227562306a36Sopenharmony_ci if (ret < 0) 227662306a36Sopenharmony_ci goto done; 227762306a36Sopenharmony_ci prev_cmp = ret; 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci if (ret && slot > 0) { 228062306a36Sopenharmony_ci dec = 1; 228162306a36Sopenharmony_ci slot--; 228262306a36Sopenharmony_ci } 228362306a36Sopenharmony_ci p->slots[level] = slot; 228462306a36Sopenharmony_ci err = setup_nodes_for_search(trans, root, p, b, level, ins_len, 228562306a36Sopenharmony_ci &write_lock_level); 228662306a36Sopenharmony_ci if (err == -EAGAIN) 228762306a36Sopenharmony_ci goto again; 228862306a36Sopenharmony_ci if (err) { 228962306a36Sopenharmony_ci ret = err; 229062306a36Sopenharmony_ci goto done; 229162306a36Sopenharmony_ci } 229262306a36Sopenharmony_ci b = p->nodes[level]; 229362306a36Sopenharmony_ci slot = p->slots[level]; 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_ci /* 229662306a36Sopenharmony_ci * Slot 0 is special, if we change the key we have to update 229762306a36Sopenharmony_ci * the parent pointer which means we must have a write lock on 229862306a36Sopenharmony_ci * the parent 229962306a36Sopenharmony_ci */ 230062306a36Sopenharmony_ci if (slot == 0 && ins_len && write_lock_level < level + 1) { 230162306a36Sopenharmony_ci write_lock_level = level + 1; 230262306a36Sopenharmony_ci btrfs_release_path(p); 230362306a36Sopenharmony_ci goto again; 230462306a36Sopenharmony_ci } 230562306a36Sopenharmony_ci 230662306a36Sopenharmony_ci unlock_up(p, level, lowest_unlock, min_write_lock_level, 230762306a36Sopenharmony_ci &write_lock_level); 230862306a36Sopenharmony_ci 230962306a36Sopenharmony_ci if (level == lowest_level) { 231062306a36Sopenharmony_ci if (dec) 231162306a36Sopenharmony_ci p->slots[level]++; 231262306a36Sopenharmony_ci goto done; 231362306a36Sopenharmony_ci } 231462306a36Sopenharmony_ci 231562306a36Sopenharmony_ci err = read_block_for_search(root, p, &b, level, slot, key); 231662306a36Sopenharmony_ci if (err == -EAGAIN) 231762306a36Sopenharmony_ci goto again; 231862306a36Sopenharmony_ci if (err) { 231962306a36Sopenharmony_ci ret = err; 232062306a36Sopenharmony_ci goto done; 232162306a36Sopenharmony_ci } 232262306a36Sopenharmony_ci 232362306a36Sopenharmony_ci if (!p->skip_locking) { 232462306a36Sopenharmony_ci level = btrfs_header_level(b); 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_ci btrfs_maybe_reset_lockdep_class(root, b); 232762306a36Sopenharmony_ci 232862306a36Sopenharmony_ci if (level <= write_lock_level) { 232962306a36Sopenharmony_ci btrfs_tree_lock(b); 233062306a36Sopenharmony_ci p->locks[level] = BTRFS_WRITE_LOCK; 233162306a36Sopenharmony_ci } else { 233262306a36Sopenharmony_ci if (p->nowait) { 233362306a36Sopenharmony_ci if (!btrfs_try_tree_read_lock(b)) { 233462306a36Sopenharmony_ci free_extent_buffer(b); 233562306a36Sopenharmony_ci ret = -EAGAIN; 233662306a36Sopenharmony_ci goto done; 233762306a36Sopenharmony_ci } 233862306a36Sopenharmony_ci } else { 233962306a36Sopenharmony_ci btrfs_tree_read_lock(b); 234062306a36Sopenharmony_ci } 234162306a36Sopenharmony_ci p->locks[level] = BTRFS_READ_LOCK; 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci p->nodes[level] = b; 234462306a36Sopenharmony_ci } 234562306a36Sopenharmony_ci } 234662306a36Sopenharmony_ci ret = 1; 234762306a36Sopenharmony_cidone: 234862306a36Sopenharmony_ci if (ret < 0 && !p->skip_release_on_error) 234962306a36Sopenharmony_ci btrfs_release_path(p); 235062306a36Sopenharmony_ci 235162306a36Sopenharmony_ci if (p->need_commit_sem) { 235262306a36Sopenharmony_ci int ret2; 235362306a36Sopenharmony_ci 235462306a36Sopenharmony_ci ret2 = finish_need_commit_sem_search(p); 235562306a36Sopenharmony_ci up_read(&fs_info->commit_root_sem); 235662306a36Sopenharmony_ci if (ret2) 235762306a36Sopenharmony_ci ret = ret2; 235862306a36Sopenharmony_ci } 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ci return ret; 236162306a36Sopenharmony_ci} 236262306a36Sopenharmony_ciALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO); 236362306a36Sopenharmony_ci 236462306a36Sopenharmony_ci/* 236562306a36Sopenharmony_ci * Like btrfs_search_slot, this looks for a key in the given tree. It uses the 236662306a36Sopenharmony_ci * current state of the tree together with the operations recorded in the tree 236762306a36Sopenharmony_ci * modification log to search for the key in a previous version of this tree, as 236862306a36Sopenharmony_ci * denoted by the time_seq parameter. 236962306a36Sopenharmony_ci * 237062306a36Sopenharmony_ci * Naturally, there is no support for insert, delete or cow operations. 237162306a36Sopenharmony_ci * 237262306a36Sopenharmony_ci * The resulting path and return value will be set up as if we called 237362306a36Sopenharmony_ci * btrfs_search_slot at that point in time with ins_len and cow both set to 0. 237462306a36Sopenharmony_ci */ 237562306a36Sopenharmony_ciint btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, 237662306a36Sopenharmony_ci struct btrfs_path *p, u64 time_seq) 237762306a36Sopenharmony_ci{ 237862306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 237962306a36Sopenharmony_ci struct extent_buffer *b; 238062306a36Sopenharmony_ci int slot; 238162306a36Sopenharmony_ci int ret; 238262306a36Sopenharmony_ci int err; 238362306a36Sopenharmony_ci int level; 238462306a36Sopenharmony_ci int lowest_unlock = 1; 238562306a36Sopenharmony_ci u8 lowest_level = 0; 238662306a36Sopenharmony_ci 238762306a36Sopenharmony_ci lowest_level = p->lowest_level; 238862306a36Sopenharmony_ci WARN_ON(p->nodes[0] != NULL); 238962306a36Sopenharmony_ci ASSERT(!p->nowait); 239062306a36Sopenharmony_ci 239162306a36Sopenharmony_ci if (p->search_commit_root) { 239262306a36Sopenharmony_ci BUG_ON(time_seq); 239362306a36Sopenharmony_ci return btrfs_search_slot(NULL, root, key, p, 0, 0); 239462306a36Sopenharmony_ci } 239562306a36Sopenharmony_ci 239662306a36Sopenharmony_ciagain: 239762306a36Sopenharmony_ci b = btrfs_get_old_root(root, time_seq); 239862306a36Sopenharmony_ci if (!b) { 239962306a36Sopenharmony_ci ret = -EIO; 240062306a36Sopenharmony_ci goto done; 240162306a36Sopenharmony_ci } 240262306a36Sopenharmony_ci level = btrfs_header_level(b); 240362306a36Sopenharmony_ci p->locks[level] = BTRFS_READ_LOCK; 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci while (b) { 240662306a36Sopenharmony_ci int dec = 0; 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci level = btrfs_header_level(b); 240962306a36Sopenharmony_ci p->nodes[level] = b; 241062306a36Sopenharmony_ci 241162306a36Sopenharmony_ci /* 241262306a36Sopenharmony_ci * we have a lock on b and as long as we aren't changing 241362306a36Sopenharmony_ci * the tree, there is no way to for the items in b to change. 241462306a36Sopenharmony_ci * It is safe to drop the lock on our parent before we 241562306a36Sopenharmony_ci * go through the expensive btree search on b. 241662306a36Sopenharmony_ci */ 241762306a36Sopenharmony_ci btrfs_unlock_up_safe(p, level + 1); 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci ret = btrfs_bin_search(b, 0, key, &slot); 242062306a36Sopenharmony_ci if (ret < 0) 242162306a36Sopenharmony_ci goto done; 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_ci if (level == 0) { 242462306a36Sopenharmony_ci p->slots[level] = slot; 242562306a36Sopenharmony_ci unlock_up(p, level, lowest_unlock, 0, NULL); 242662306a36Sopenharmony_ci goto done; 242762306a36Sopenharmony_ci } 242862306a36Sopenharmony_ci 242962306a36Sopenharmony_ci if (ret && slot > 0) { 243062306a36Sopenharmony_ci dec = 1; 243162306a36Sopenharmony_ci slot--; 243262306a36Sopenharmony_ci } 243362306a36Sopenharmony_ci p->slots[level] = slot; 243462306a36Sopenharmony_ci unlock_up(p, level, lowest_unlock, 0, NULL); 243562306a36Sopenharmony_ci 243662306a36Sopenharmony_ci if (level == lowest_level) { 243762306a36Sopenharmony_ci if (dec) 243862306a36Sopenharmony_ci p->slots[level]++; 243962306a36Sopenharmony_ci goto done; 244062306a36Sopenharmony_ci } 244162306a36Sopenharmony_ci 244262306a36Sopenharmony_ci err = read_block_for_search(root, p, &b, level, slot, key); 244362306a36Sopenharmony_ci if (err == -EAGAIN) 244462306a36Sopenharmony_ci goto again; 244562306a36Sopenharmony_ci if (err) { 244662306a36Sopenharmony_ci ret = err; 244762306a36Sopenharmony_ci goto done; 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci level = btrfs_header_level(b); 245162306a36Sopenharmony_ci btrfs_tree_read_lock(b); 245262306a36Sopenharmony_ci b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq); 245362306a36Sopenharmony_ci if (!b) { 245462306a36Sopenharmony_ci ret = -ENOMEM; 245562306a36Sopenharmony_ci goto done; 245662306a36Sopenharmony_ci } 245762306a36Sopenharmony_ci p->locks[level] = BTRFS_READ_LOCK; 245862306a36Sopenharmony_ci p->nodes[level] = b; 245962306a36Sopenharmony_ci } 246062306a36Sopenharmony_ci ret = 1; 246162306a36Sopenharmony_cidone: 246262306a36Sopenharmony_ci if (ret < 0) 246362306a36Sopenharmony_ci btrfs_release_path(p); 246462306a36Sopenharmony_ci 246562306a36Sopenharmony_ci return ret; 246662306a36Sopenharmony_ci} 246762306a36Sopenharmony_ci 246862306a36Sopenharmony_ci/* 246962306a36Sopenharmony_ci * Search the tree again to find a leaf with smaller keys. 247062306a36Sopenharmony_ci * Returns 0 if it found something. 247162306a36Sopenharmony_ci * Returns 1 if there are no smaller keys. 247262306a36Sopenharmony_ci * Returns < 0 on error. 247362306a36Sopenharmony_ci * 247462306a36Sopenharmony_ci * This may release the path, and so you may lose any locks held at the 247562306a36Sopenharmony_ci * time you call it. 247662306a36Sopenharmony_ci */ 247762306a36Sopenharmony_cistatic int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) 247862306a36Sopenharmony_ci{ 247962306a36Sopenharmony_ci struct btrfs_key key; 248062306a36Sopenharmony_ci struct btrfs_key orig_key; 248162306a36Sopenharmony_ci struct btrfs_disk_key found_key; 248262306a36Sopenharmony_ci int ret; 248362306a36Sopenharmony_ci 248462306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, 0); 248562306a36Sopenharmony_ci orig_key = key; 248662306a36Sopenharmony_ci 248762306a36Sopenharmony_ci if (key.offset > 0) { 248862306a36Sopenharmony_ci key.offset--; 248962306a36Sopenharmony_ci } else if (key.type > 0) { 249062306a36Sopenharmony_ci key.type--; 249162306a36Sopenharmony_ci key.offset = (u64)-1; 249262306a36Sopenharmony_ci } else if (key.objectid > 0) { 249362306a36Sopenharmony_ci key.objectid--; 249462306a36Sopenharmony_ci key.type = (u8)-1; 249562306a36Sopenharmony_ci key.offset = (u64)-1; 249662306a36Sopenharmony_ci } else { 249762306a36Sopenharmony_ci return 1; 249862306a36Sopenharmony_ci } 249962306a36Sopenharmony_ci 250062306a36Sopenharmony_ci btrfs_release_path(path); 250162306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 250262306a36Sopenharmony_ci if (ret <= 0) 250362306a36Sopenharmony_ci return ret; 250462306a36Sopenharmony_ci 250562306a36Sopenharmony_ci /* 250662306a36Sopenharmony_ci * Previous key not found. Even if we were at slot 0 of the leaf we had 250762306a36Sopenharmony_ci * before releasing the path and calling btrfs_search_slot(), we now may 250862306a36Sopenharmony_ci * be in a slot pointing to the same original key - this can happen if 250962306a36Sopenharmony_ci * after we released the path, one of more items were moved from a 251062306a36Sopenharmony_ci * sibling leaf into the front of the leaf we had due to an insertion 251162306a36Sopenharmony_ci * (see push_leaf_right()). 251262306a36Sopenharmony_ci * If we hit this case and our slot is > 0 and just decrement the slot 251362306a36Sopenharmony_ci * so that the caller does not process the same key again, which may or 251462306a36Sopenharmony_ci * may not break the caller, depending on its logic. 251562306a36Sopenharmony_ci */ 251662306a36Sopenharmony_ci if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) { 251762306a36Sopenharmony_ci btrfs_item_key(path->nodes[0], &found_key, path->slots[0]); 251862306a36Sopenharmony_ci ret = comp_keys(&found_key, &orig_key); 251962306a36Sopenharmony_ci if (ret == 0) { 252062306a36Sopenharmony_ci if (path->slots[0] > 0) { 252162306a36Sopenharmony_ci path->slots[0]--; 252262306a36Sopenharmony_ci return 0; 252362306a36Sopenharmony_ci } 252462306a36Sopenharmony_ci /* 252562306a36Sopenharmony_ci * At slot 0, same key as before, it means orig_key is 252662306a36Sopenharmony_ci * the lowest, leftmost, key in the tree. We're done. 252762306a36Sopenharmony_ci */ 252862306a36Sopenharmony_ci return 1; 252962306a36Sopenharmony_ci } 253062306a36Sopenharmony_ci } 253162306a36Sopenharmony_ci 253262306a36Sopenharmony_ci btrfs_item_key(path->nodes[0], &found_key, 0); 253362306a36Sopenharmony_ci ret = comp_keys(&found_key, &key); 253462306a36Sopenharmony_ci /* 253562306a36Sopenharmony_ci * We might have had an item with the previous key in the tree right 253662306a36Sopenharmony_ci * before we released our path. And after we released our path, that 253762306a36Sopenharmony_ci * item might have been pushed to the first slot (0) of the leaf we 253862306a36Sopenharmony_ci * were holding due to a tree balance. Alternatively, an item with the 253962306a36Sopenharmony_ci * previous key can exist as the only element of a leaf (big fat item). 254062306a36Sopenharmony_ci * Therefore account for these 2 cases, so that our callers (like 254162306a36Sopenharmony_ci * btrfs_previous_item) don't miss an existing item with a key matching 254262306a36Sopenharmony_ci * the previous key we computed above. 254362306a36Sopenharmony_ci */ 254462306a36Sopenharmony_ci if (ret <= 0) 254562306a36Sopenharmony_ci return 0; 254662306a36Sopenharmony_ci return 1; 254762306a36Sopenharmony_ci} 254862306a36Sopenharmony_ci 254962306a36Sopenharmony_ci/* 255062306a36Sopenharmony_ci * helper to use instead of search slot if no exact match is needed but 255162306a36Sopenharmony_ci * instead the next or previous item should be returned. 255262306a36Sopenharmony_ci * When find_higher is true, the next higher item is returned, the next lower 255362306a36Sopenharmony_ci * otherwise. 255462306a36Sopenharmony_ci * When return_any and find_higher are both true, and no higher item is found, 255562306a36Sopenharmony_ci * return the next lower instead. 255662306a36Sopenharmony_ci * When return_any is true and find_higher is false, and no lower item is found, 255762306a36Sopenharmony_ci * return the next higher instead. 255862306a36Sopenharmony_ci * It returns 0 if any item is found, 1 if none is found (tree empty), and 255962306a36Sopenharmony_ci * < 0 on error 256062306a36Sopenharmony_ci */ 256162306a36Sopenharmony_ciint btrfs_search_slot_for_read(struct btrfs_root *root, 256262306a36Sopenharmony_ci const struct btrfs_key *key, 256362306a36Sopenharmony_ci struct btrfs_path *p, int find_higher, 256462306a36Sopenharmony_ci int return_any) 256562306a36Sopenharmony_ci{ 256662306a36Sopenharmony_ci int ret; 256762306a36Sopenharmony_ci struct extent_buffer *leaf; 256862306a36Sopenharmony_ci 256962306a36Sopenharmony_ciagain: 257062306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, root, key, p, 0, 0); 257162306a36Sopenharmony_ci if (ret <= 0) 257262306a36Sopenharmony_ci return ret; 257362306a36Sopenharmony_ci /* 257462306a36Sopenharmony_ci * a return value of 1 means the path is at the position where the 257562306a36Sopenharmony_ci * item should be inserted. Normally this is the next bigger item, 257662306a36Sopenharmony_ci * but in case the previous item is the last in a leaf, path points 257762306a36Sopenharmony_ci * to the first free slot in the previous leaf, i.e. at an invalid 257862306a36Sopenharmony_ci * item. 257962306a36Sopenharmony_ci */ 258062306a36Sopenharmony_ci leaf = p->nodes[0]; 258162306a36Sopenharmony_ci 258262306a36Sopenharmony_ci if (find_higher) { 258362306a36Sopenharmony_ci if (p->slots[0] >= btrfs_header_nritems(leaf)) { 258462306a36Sopenharmony_ci ret = btrfs_next_leaf(root, p); 258562306a36Sopenharmony_ci if (ret <= 0) 258662306a36Sopenharmony_ci return ret; 258762306a36Sopenharmony_ci if (!return_any) 258862306a36Sopenharmony_ci return 1; 258962306a36Sopenharmony_ci /* 259062306a36Sopenharmony_ci * no higher item found, return the next 259162306a36Sopenharmony_ci * lower instead 259262306a36Sopenharmony_ci */ 259362306a36Sopenharmony_ci return_any = 0; 259462306a36Sopenharmony_ci find_higher = 0; 259562306a36Sopenharmony_ci btrfs_release_path(p); 259662306a36Sopenharmony_ci goto again; 259762306a36Sopenharmony_ci } 259862306a36Sopenharmony_ci } else { 259962306a36Sopenharmony_ci if (p->slots[0] == 0) { 260062306a36Sopenharmony_ci ret = btrfs_prev_leaf(root, p); 260162306a36Sopenharmony_ci if (ret < 0) 260262306a36Sopenharmony_ci return ret; 260362306a36Sopenharmony_ci if (!ret) { 260462306a36Sopenharmony_ci leaf = p->nodes[0]; 260562306a36Sopenharmony_ci if (p->slots[0] == btrfs_header_nritems(leaf)) 260662306a36Sopenharmony_ci p->slots[0]--; 260762306a36Sopenharmony_ci return 0; 260862306a36Sopenharmony_ci } 260962306a36Sopenharmony_ci if (!return_any) 261062306a36Sopenharmony_ci return 1; 261162306a36Sopenharmony_ci /* 261262306a36Sopenharmony_ci * no lower item found, return the next 261362306a36Sopenharmony_ci * higher instead 261462306a36Sopenharmony_ci */ 261562306a36Sopenharmony_ci return_any = 0; 261662306a36Sopenharmony_ci find_higher = 1; 261762306a36Sopenharmony_ci btrfs_release_path(p); 261862306a36Sopenharmony_ci goto again; 261962306a36Sopenharmony_ci } else { 262062306a36Sopenharmony_ci --p->slots[0]; 262162306a36Sopenharmony_ci } 262262306a36Sopenharmony_ci } 262362306a36Sopenharmony_ci return 0; 262462306a36Sopenharmony_ci} 262562306a36Sopenharmony_ci 262662306a36Sopenharmony_ci/* 262762306a36Sopenharmony_ci * Execute search and call btrfs_previous_item to traverse backwards if the item 262862306a36Sopenharmony_ci * was not found. 262962306a36Sopenharmony_ci * 263062306a36Sopenharmony_ci * Return 0 if found, 1 if not found and < 0 if error. 263162306a36Sopenharmony_ci */ 263262306a36Sopenharmony_ciint btrfs_search_backwards(struct btrfs_root *root, struct btrfs_key *key, 263362306a36Sopenharmony_ci struct btrfs_path *path) 263462306a36Sopenharmony_ci{ 263562306a36Sopenharmony_ci int ret; 263662306a36Sopenharmony_ci 263762306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 263862306a36Sopenharmony_ci if (ret > 0) 263962306a36Sopenharmony_ci ret = btrfs_previous_item(root, path, key->objectid, key->type); 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci if (ret == 0) 264262306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]); 264362306a36Sopenharmony_ci 264462306a36Sopenharmony_ci return ret; 264562306a36Sopenharmony_ci} 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci/* 264862306a36Sopenharmony_ci * Search for a valid slot for the given path. 264962306a36Sopenharmony_ci * 265062306a36Sopenharmony_ci * @root: The root node of the tree. 265162306a36Sopenharmony_ci * @key: Will contain a valid item if found. 265262306a36Sopenharmony_ci * @path: The starting point to validate the slot. 265362306a36Sopenharmony_ci * 265462306a36Sopenharmony_ci * Return: 0 if the item is valid 265562306a36Sopenharmony_ci * 1 if not found 265662306a36Sopenharmony_ci * <0 if error. 265762306a36Sopenharmony_ci */ 265862306a36Sopenharmony_ciint btrfs_get_next_valid_item(struct btrfs_root *root, struct btrfs_key *key, 265962306a36Sopenharmony_ci struct btrfs_path *path) 266062306a36Sopenharmony_ci{ 266162306a36Sopenharmony_ci if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 266262306a36Sopenharmony_ci int ret; 266362306a36Sopenharmony_ci 266462306a36Sopenharmony_ci ret = btrfs_next_leaf(root, path); 266562306a36Sopenharmony_ci if (ret) 266662306a36Sopenharmony_ci return ret; 266762306a36Sopenharmony_ci } 266862306a36Sopenharmony_ci 266962306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], key, path->slots[0]); 267062306a36Sopenharmony_ci return 0; 267162306a36Sopenharmony_ci} 267262306a36Sopenharmony_ci 267362306a36Sopenharmony_ci/* 267462306a36Sopenharmony_ci * adjust the pointers going up the tree, starting at level 267562306a36Sopenharmony_ci * making sure the right key of each node is points to 'key'. 267662306a36Sopenharmony_ci * This is used after shifting pointers to the left, so it stops 267762306a36Sopenharmony_ci * fixing up pointers when a given leaf/node is not in slot 0 of the 267862306a36Sopenharmony_ci * higher levels 267962306a36Sopenharmony_ci * 268062306a36Sopenharmony_ci */ 268162306a36Sopenharmony_cistatic void fixup_low_keys(struct btrfs_trans_handle *trans, 268262306a36Sopenharmony_ci struct btrfs_path *path, 268362306a36Sopenharmony_ci struct btrfs_disk_key *key, int level) 268462306a36Sopenharmony_ci{ 268562306a36Sopenharmony_ci int i; 268662306a36Sopenharmony_ci struct extent_buffer *t; 268762306a36Sopenharmony_ci int ret; 268862306a36Sopenharmony_ci 268962306a36Sopenharmony_ci for (i = level; i < BTRFS_MAX_LEVEL; i++) { 269062306a36Sopenharmony_ci int tslot = path->slots[i]; 269162306a36Sopenharmony_ci 269262306a36Sopenharmony_ci if (!path->nodes[i]) 269362306a36Sopenharmony_ci break; 269462306a36Sopenharmony_ci t = path->nodes[i]; 269562306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(t, tslot, 269662306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REPLACE); 269762306a36Sopenharmony_ci BUG_ON(ret < 0); 269862306a36Sopenharmony_ci btrfs_set_node_key(t, key, tslot); 269962306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, path->nodes[i]); 270062306a36Sopenharmony_ci if (tslot != 0) 270162306a36Sopenharmony_ci break; 270262306a36Sopenharmony_ci } 270362306a36Sopenharmony_ci} 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci/* 270662306a36Sopenharmony_ci * update item key. 270762306a36Sopenharmony_ci * 270862306a36Sopenharmony_ci * This function isn't completely safe. It's the caller's responsibility 270962306a36Sopenharmony_ci * that the new key won't break the order 271062306a36Sopenharmony_ci */ 271162306a36Sopenharmony_civoid btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, 271262306a36Sopenharmony_ci struct btrfs_path *path, 271362306a36Sopenharmony_ci const struct btrfs_key *new_key) 271462306a36Sopenharmony_ci{ 271562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 271662306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 271762306a36Sopenharmony_ci struct extent_buffer *eb; 271862306a36Sopenharmony_ci int slot; 271962306a36Sopenharmony_ci 272062306a36Sopenharmony_ci eb = path->nodes[0]; 272162306a36Sopenharmony_ci slot = path->slots[0]; 272262306a36Sopenharmony_ci if (slot > 0) { 272362306a36Sopenharmony_ci btrfs_item_key(eb, &disk_key, slot - 1); 272462306a36Sopenharmony_ci if (unlikely(comp_keys(&disk_key, new_key) >= 0)) { 272562306a36Sopenharmony_ci btrfs_print_leaf(eb); 272662306a36Sopenharmony_ci btrfs_crit(fs_info, 272762306a36Sopenharmony_ci "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 272862306a36Sopenharmony_ci slot, btrfs_disk_key_objectid(&disk_key), 272962306a36Sopenharmony_ci btrfs_disk_key_type(&disk_key), 273062306a36Sopenharmony_ci btrfs_disk_key_offset(&disk_key), 273162306a36Sopenharmony_ci new_key->objectid, new_key->type, 273262306a36Sopenharmony_ci new_key->offset); 273362306a36Sopenharmony_ci BUG(); 273462306a36Sopenharmony_ci } 273562306a36Sopenharmony_ci } 273662306a36Sopenharmony_ci if (slot < btrfs_header_nritems(eb) - 1) { 273762306a36Sopenharmony_ci btrfs_item_key(eb, &disk_key, slot + 1); 273862306a36Sopenharmony_ci if (unlikely(comp_keys(&disk_key, new_key) <= 0)) { 273962306a36Sopenharmony_ci btrfs_print_leaf(eb); 274062306a36Sopenharmony_ci btrfs_crit(fs_info, 274162306a36Sopenharmony_ci "slot %u key (%llu %u %llu) new key (%llu %u %llu)", 274262306a36Sopenharmony_ci slot, btrfs_disk_key_objectid(&disk_key), 274362306a36Sopenharmony_ci btrfs_disk_key_type(&disk_key), 274462306a36Sopenharmony_ci btrfs_disk_key_offset(&disk_key), 274562306a36Sopenharmony_ci new_key->objectid, new_key->type, 274662306a36Sopenharmony_ci new_key->offset); 274762306a36Sopenharmony_ci BUG(); 274862306a36Sopenharmony_ci } 274962306a36Sopenharmony_ci } 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, new_key); 275262306a36Sopenharmony_ci btrfs_set_item_key(eb, &disk_key, slot); 275362306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, eb); 275462306a36Sopenharmony_ci if (slot == 0) 275562306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, 1); 275662306a36Sopenharmony_ci} 275762306a36Sopenharmony_ci 275862306a36Sopenharmony_ci/* 275962306a36Sopenharmony_ci * Check key order of two sibling extent buffers. 276062306a36Sopenharmony_ci * 276162306a36Sopenharmony_ci * Return true if something is wrong. 276262306a36Sopenharmony_ci * Return false if everything is fine. 276362306a36Sopenharmony_ci * 276462306a36Sopenharmony_ci * Tree-checker only works inside one tree block, thus the following 276562306a36Sopenharmony_ci * corruption can not be detected by tree-checker: 276662306a36Sopenharmony_ci * 276762306a36Sopenharmony_ci * Leaf @left | Leaf @right 276862306a36Sopenharmony_ci * -------------------------------------------------------------- 276962306a36Sopenharmony_ci * | 1 | 2 | 3 | 4 | 5 | f6 | | 7 | 8 | 277062306a36Sopenharmony_ci * 277162306a36Sopenharmony_ci * Key f6 in leaf @left itself is valid, but not valid when the next 277262306a36Sopenharmony_ci * key in leaf @right is 7. 277362306a36Sopenharmony_ci * This can only be checked at tree block merge time. 277462306a36Sopenharmony_ci * And since tree checker has ensured all key order in each tree block 277562306a36Sopenharmony_ci * is correct, we only need to bother the last key of @left and the first 277662306a36Sopenharmony_ci * key of @right. 277762306a36Sopenharmony_ci */ 277862306a36Sopenharmony_cistatic bool check_sibling_keys(struct extent_buffer *left, 277962306a36Sopenharmony_ci struct extent_buffer *right) 278062306a36Sopenharmony_ci{ 278162306a36Sopenharmony_ci struct btrfs_key left_last; 278262306a36Sopenharmony_ci struct btrfs_key right_first; 278362306a36Sopenharmony_ci int level = btrfs_header_level(left); 278462306a36Sopenharmony_ci int nr_left = btrfs_header_nritems(left); 278562306a36Sopenharmony_ci int nr_right = btrfs_header_nritems(right); 278662306a36Sopenharmony_ci 278762306a36Sopenharmony_ci /* No key to check in one of the tree blocks */ 278862306a36Sopenharmony_ci if (!nr_left || !nr_right) 278962306a36Sopenharmony_ci return false; 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_ci if (level) { 279262306a36Sopenharmony_ci btrfs_node_key_to_cpu(left, &left_last, nr_left - 1); 279362306a36Sopenharmony_ci btrfs_node_key_to_cpu(right, &right_first, 0); 279462306a36Sopenharmony_ci } else { 279562306a36Sopenharmony_ci btrfs_item_key_to_cpu(left, &left_last, nr_left - 1); 279662306a36Sopenharmony_ci btrfs_item_key_to_cpu(right, &right_first, 0); 279762306a36Sopenharmony_ci } 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci if (unlikely(btrfs_comp_cpu_keys(&left_last, &right_first) >= 0)) { 280062306a36Sopenharmony_ci btrfs_crit(left->fs_info, "left extent buffer:"); 280162306a36Sopenharmony_ci btrfs_print_tree(left, false); 280262306a36Sopenharmony_ci btrfs_crit(left->fs_info, "right extent buffer:"); 280362306a36Sopenharmony_ci btrfs_print_tree(right, false); 280462306a36Sopenharmony_ci btrfs_crit(left->fs_info, 280562306a36Sopenharmony_ci"bad key order, sibling blocks, left last (%llu %u %llu) right first (%llu %u %llu)", 280662306a36Sopenharmony_ci left_last.objectid, left_last.type, 280762306a36Sopenharmony_ci left_last.offset, right_first.objectid, 280862306a36Sopenharmony_ci right_first.type, right_first.offset); 280962306a36Sopenharmony_ci return true; 281062306a36Sopenharmony_ci } 281162306a36Sopenharmony_ci return false; 281262306a36Sopenharmony_ci} 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_ci/* 281562306a36Sopenharmony_ci * try to push data from one node into the next node left in the 281662306a36Sopenharmony_ci * tree. 281762306a36Sopenharmony_ci * 281862306a36Sopenharmony_ci * returns 0 if some ptrs were pushed left, < 0 if there was some horrible 281962306a36Sopenharmony_ci * error, and > 0 if there was no room in the left hand block. 282062306a36Sopenharmony_ci */ 282162306a36Sopenharmony_cistatic int push_node_left(struct btrfs_trans_handle *trans, 282262306a36Sopenharmony_ci struct extent_buffer *dst, 282362306a36Sopenharmony_ci struct extent_buffer *src, int empty) 282462306a36Sopenharmony_ci{ 282562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 282662306a36Sopenharmony_ci int push_items = 0; 282762306a36Sopenharmony_ci int src_nritems; 282862306a36Sopenharmony_ci int dst_nritems; 282962306a36Sopenharmony_ci int ret = 0; 283062306a36Sopenharmony_ci 283162306a36Sopenharmony_ci src_nritems = btrfs_header_nritems(src); 283262306a36Sopenharmony_ci dst_nritems = btrfs_header_nritems(dst); 283362306a36Sopenharmony_ci push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; 283462306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(src) != trans->transid); 283562306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(dst) != trans->transid); 283662306a36Sopenharmony_ci 283762306a36Sopenharmony_ci if (!empty && src_nritems <= 8) 283862306a36Sopenharmony_ci return 1; 283962306a36Sopenharmony_ci 284062306a36Sopenharmony_ci if (push_items <= 0) 284162306a36Sopenharmony_ci return 1; 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_ci if (empty) { 284462306a36Sopenharmony_ci push_items = min(src_nritems, push_items); 284562306a36Sopenharmony_ci if (push_items < src_nritems) { 284662306a36Sopenharmony_ci /* leave at least 8 pointers in the node if 284762306a36Sopenharmony_ci * we aren't going to empty it 284862306a36Sopenharmony_ci */ 284962306a36Sopenharmony_ci if (src_nritems - push_items < 8) { 285062306a36Sopenharmony_ci if (push_items <= 8) 285162306a36Sopenharmony_ci return 1; 285262306a36Sopenharmony_ci push_items -= 8; 285362306a36Sopenharmony_ci } 285462306a36Sopenharmony_ci } 285562306a36Sopenharmony_ci } else 285662306a36Sopenharmony_ci push_items = min(src_nritems - 8, push_items); 285762306a36Sopenharmony_ci 285862306a36Sopenharmony_ci /* dst is the left eb, src is the middle eb */ 285962306a36Sopenharmony_ci if (check_sibling_keys(dst, src)) { 286062306a36Sopenharmony_ci ret = -EUCLEAN; 286162306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 286262306a36Sopenharmony_ci return ret; 286362306a36Sopenharmony_ci } 286462306a36Sopenharmony_ci ret = btrfs_tree_mod_log_eb_copy(dst, src, dst_nritems, 0, push_items); 286562306a36Sopenharmony_ci if (ret) { 286662306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 286762306a36Sopenharmony_ci return ret; 286862306a36Sopenharmony_ci } 286962306a36Sopenharmony_ci copy_extent_buffer(dst, src, 287062306a36Sopenharmony_ci btrfs_node_key_ptr_offset(dst, dst_nritems), 287162306a36Sopenharmony_ci btrfs_node_key_ptr_offset(src, 0), 287262306a36Sopenharmony_ci push_items * sizeof(struct btrfs_key_ptr)); 287362306a36Sopenharmony_ci 287462306a36Sopenharmony_ci if (push_items < src_nritems) { 287562306a36Sopenharmony_ci /* 287662306a36Sopenharmony_ci * btrfs_tree_mod_log_eb_copy handles logging the move, so we 287762306a36Sopenharmony_ci * don't need to do an explicit tree mod log operation for it. 287862306a36Sopenharmony_ci */ 287962306a36Sopenharmony_ci memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0), 288062306a36Sopenharmony_ci btrfs_node_key_ptr_offset(src, push_items), 288162306a36Sopenharmony_ci (src_nritems - push_items) * 288262306a36Sopenharmony_ci sizeof(struct btrfs_key_ptr)); 288362306a36Sopenharmony_ci } 288462306a36Sopenharmony_ci btrfs_set_header_nritems(src, src_nritems - push_items); 288562306a36Sopenharmony_ci btrfs_set_header_nritems(dst, dst_nritems + push_items); 288662306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, src); 288762306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, dst); 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_ci return ret; 289062306a36Sopenharmony_ci} 289162306a36Sopenharmony_ci 289262306a36Sopenharmony_ci/* 289362306a36Sopenharmony_ci * try to push data from one node into the next node right in the 289462306a36Sopenharmony_ci * tree. 289562306a36Sopenharmony_ci * 289662306a36Sopenharmony_ci * returns 0 if some ptrs were pushed, < 0 if there was some horrible 289762306a36Sopenharmony_ci * error, and > 0 if there was no room in the right hand block. 289862306a36Sopenharmony_ci * 289962306a36Sopenharmony_ci * this will only push up to 1/2 the contents of the left node over 290062306a36Sopenharmony_ci */ 290162306a36Sopenharmony_cistatic int balance_node_right(struct btrfs_trans_handle *trans, 290262306a36Sopenharmony_ci struct extent_buffer *dst, 290362306a36Sopenharmony_ci struct extent_buffer *src) 290462306a36Sopenharmony_ci{ 290562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 290662306a36Sopenharmony_ci int push_items = 0; 290762306a36Sopenharmony_ci int max_push; 290862306a36Sopenharmony_ci int src_nritems; 290962306a36Sopenharmony_ci int dst_nritems; 291062306a36Sopenharmony_ci int ret = 0; 291162306a36Sopenharmony_ci 291262306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(src) != trans->transid); 291362306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(dst) != trans->transid); 291462306a36Sopenharmony_ci 291562306a36Sopenharmony_ci src_nritems = btrfs_header_nritems(src); 291662306a36Sopenharmony_ci dst_nritems = btrfs_header_nritems(dst); 291762306a36Sopenharmony_ci push_items = BTRFS_NODEPTRS_PER_BLOCK(fs_info) - dst_nritems; 291862306a36Sopenharmony_ci if (push_items <= 0) 291962306a36Sopenharmony_ci return 1; 292062306a36Sopenharmony_ci 292162306a36Sopenharmony_ci if (src_nritems < 4) 292262306a36Sopenharmony_ci return 1; 292362306a36Sopenharmony_ci 292462306a36Sopenharmony_ci max_push = src_nritems / 2 + 1; 292562306a36Sopenharmony_ci /* don't try to empty the node */ 292662306a36Sopenharmony_ci if (max_push >= src_nritems) 292762306a36Sopenharmony_ci return 1; 292862306a36Sopenharmony_ci 292962306a36Sopenharmony_ci if (max_push < push_items) 293062306a36Sopenharmony_ci push_items = max_push; 293162306a36Sopenharmony_ci 293262306a36Sopenharmony_ci /* dst is the right eb, src is the middle eb */ 293362306a36Sopenharmony_ci if (check_sibling_keys(src, dst)) { 293462306a36Sopenharmony_ci ret = -EUCLEAN; 293562306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 293662306a36Sopenharmony_ci return ret; 293762306a36Sopenharmony_ci } 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci /* 294062306a36Sopenharmony_ci * btrfs_tree_mod_log_eb_copy handles logging the move, so we don't 294162306a36Sopenharmony_ci * need to do an explicit tree mod log operation for it. 294262306a36Sopenharmony_ci */ 294362306a36Sopenharmony_ci memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items), 294462306a36Sopenharmony_ci btrfs_node_key_ptr_offset(dst, 0), 294562306a36Sopenharmony_ci (dst_nritems) * 294662306a36Sopenharmony_ci sizeof(struct btrfs_key_ptr)); 294762306a36Sopenharmony_ci 294862306a36Sopenharmony_ci ret = btrfs_tree_mod_log_eb_copy(dst, src, 0, src_nritems - push_items, 294962306a36Sopenharmony_ci push_items); 295062306a36Sopenharmony_ci if (ret) { 295162306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 295262306a36Sopenharmony_ci return ret; 295362306a36Sopenharmony_ci } 295462306a36Sopenharmony_ci copy_extent_buffer(dst, src, 295562306a36Sopenharmony_ci btrfs_node_key_ptr_offset(dst, 0), 295662306a36Sopenharmony_ci btrfs_node_key_ptr_offset(src, src_nritems - push_items), 295762306a36Sopenharmony_ci push_items * sizeof(struct btrfs_key_ptr)); 295862306a36Sopenharmony_ci 295962306a36Sopenharmony_ci btrfs_set_header_nritems(src, src_nritems - push_items); 296062306a36Sopenharmony_ci btrfs_set_header_nritems(dst, dst_nritems + push_items); 296162306a36Sopenharmony_ci 296262306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, src); 296362306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, dst); 296462306a36Sopenharmony_ci 296562306a36Sopenharmony_ci return ret; 296662306a36Sopenharmony_ci} 296762306a36Sopenharmony_ci 296862306a36Sopenharmony_ci/* 296962306a36Sopenharmony_ci * helper function to insert a new root level in the tree. 297062306a36Sopenharmony_ci * A new node is allocated, and a single item is inserted to 297162306a36Sopenharmony_ci * point to the existing root 297262306a36Sopenharmony_ci * 297362306a36Sopenharmony_ci * returns zero on success or < 0 on failure. 297462306a36Sopenharmony_ci */ 297562306a36Sopenharmony_cistatic noinline int insert_new_root(struct btrfs_trans_handle *trans, 297662306a36Sopenharmony_ci struct btrfs_root *root, 297762306a36Sopenharmony_ci struct btrfs_path *path, int level) 297862306a36Sopenharmony_ci{ 297962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 298062306a36Sopenharmony_ci u64 lower_gen; 298162306a36Sopenharmony_ci struct extent_buffer *lower; 298262306a36Sopenharmony_ci struct extent_buffer *c; 298362306a36Sopenharmony_ci struct extent_buffer *old; 298462306a36Sopenharmony_ci struct btrfs_disk_key lower_key; 298562306a36Sopenharmony_ci int ret; 298662306a36Sopenharmony_ci 298762306a36Sopenharmony_ci BUG_ON(path->nodes[level]); 298862306a36Sopenharmony_ci BUG_ON(path->nodes[level-1] != root->node); 298962306a36Sopenharmony_ci 299062306a36Sopenharmony_ci lower = path->nodes[level-1]; 299162306a36Sopenharmony_ci if (level == 1) 299262306a36Sopenharmony_ci btrfs_item_key(lower, &lower_key, 0); 299362306a36Sopenharmony_ci else 299462306a36Sopenharmony_ci btrfs_node_key(lower, &lower_key, 0); 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_ci c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 299762306a36Sopenharmony_ci &lower_key, level, root->node->start, 0, 299862306a36Sopenharmony_ci BTRFS_NESTING_NEW_ROOT); 299962306a36Sopenharmony_ci if (IS_ERR(c)) 300062306a36Sopenharmony_ci return PTR_ERR(c); 300162306a36Sopenharmony_ci 300262306a36Sopenharmony_ci root_add_used(root, fs_info->nodesize); 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ci btrfs_set_header_nritems(c, 1); 300562306a36Sopenharmony_ci btrfs_set_node_key(c, &lower_key, 0); 300662306a36Sopenharmony_ci btrfs_set_node_blockptr(c, 0, lower->start); 300762306a36Sopenharmony_ci lower_gen = btrfs_header_generation(lower); 300862306a36Sopenharmony_ci WARN_ON(lower_gen != trans->transid); 300962306a36Sopenharmony_ci 301062306a36Sopenharmony_ci btrfs_set_node_ptr_generation(c, 0, lower_gen); 301162306a36Sopenharmony_ci 301262306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, c); 301362306a36Sopenharmony_ci 301462306a36Sopenharmony_ci old = root->node; 301562306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_root(root->node, c, false); 301662306a36Sopenharmony_ci if (ret < 0) { 301762306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1); 301862306a36Sopenharmony_ci btrfs_tree_unlock(c); 301962306a36Sopenharmony_ci free_extent_buffer(c); 302062306a36Sopenharmony_ci return ret; 302162306a36Sopenharmony_ci } 302262306a36Sopenharmony_ci rcu_assign_pointer(root->node, c); 302362306a36Sopenharmony_ci 302462306a36Sopenharmony_ci /* the super has an extra ref to root->node */ 302562306a36Sopenharmony_ci free_extent_buffer(old); 302662306a36Sopenharmony_ci 302762306a36Sopenharmony_ci add_root_to_dirty_list(root); 302862306a36Sopenharmony_ci atomic_inc(&c->refs); 302962306a36Sopenharmony_ci path->nodes[level] = c; 303062306a36Sopenharmony_ci path->locks[level] = BTRFS_WRITE_LOCK; 303162306a36Sopenharmony_ci path->slots[level] = 0; 303262306a36Sopenharmony_ci return 0; 303362306a36Sopenharmony_ci} 303462306a36Sopenharmony_ci 303562306a36Sopenharmony_ci/* 303662306a36Sopenharmony_ci * worker function to insert a single pointer in a node. 303762306a36Sopenharmony_ci * the node should have enough room for the pointer already 303862306a36Sopenharmony_ci * 303962306a36Sopenharmony_ci * slot and level indicate where you want the key to go, and 304062306a36Sopenharmony_ci * blocknr is the block the key points to. 304162306a36Sopenharmony_ci */ 304262306a36Sopenharmony_cistatic int insert_ptr(struct btrfs_trans_handle *trans, 304362306a36Sopenharmony_ci struct btrfs_path *path, 304462306a36Sopenharmony_ci struct btrfs_disk_key *key, u64 bytenr, 304562306a36Sopenharmony_ci int slot, int level) 304662306a36Sopenharmony_ci{ 304762306a36Sopenharmony_ci struct extent_buffer *lower; 304862306a36Sopenharmony_ci int nritems; 304962306a36Sopenharmony_ci int ret; 305062306a36Sopenharmony_ci 305162306a36Sopenharmony_ci BUG_ON(!path->nodes[level]); 305262306a36Sopenharmony_ci btrfs_assert_tree_write_locked(path->nodes[level]); 305362306a36Sopenharmony_ci lower = path->nodes[level]; 305462306a36Sopenharmony_ci nritems = btrfs_header_nritems(lower); 305562306a36Sopenharmony_ci BUG_ON(slot > nritems); 305662306a36Sopenharmony_ci BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(trans->fs_info)); 305762306a36Sopenharmony_ci if (slot != nritems) { 305862306a36Sopenharmony_ci if (level) { 305962306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_move(lower, slot + 1, 306062306a36Sopenharmony_ci slot, nritems - slot); 306162306a36Sopenharmony_ci if (ret < 0) { 306262306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 306362306a36Sopenharmony_ci return ret; 306462306a36Sopenharmony_ci } 306562306a36Sopenharmony_ci } 306662306a36Sopenharmony_ci memmove_extent_buffer(lower, 306762306a36Sopenharmony_ci btrfs_node_key_ptr_offset(lower, slot + 1), 306862306a36Sopenharmony_ci btrfs_node_key_ptr_offset(lower, slot), 306962306a36Sopenharmony_ci (nritems - slot) * sizeof(struct btrfs_key_ptr)); 307062306a36Sopenharmony_ci } 307162306a36Sopenharmony_ci if (level) { 307262306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(lower, slot, 307362306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_ADD); 307462306a36Sopenharmony_ci if (ret < 0) { 307562306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 307662306a36Sopenharmony_ci return ret; 307762306a36Sopenharmony_ci } 307862306a36Sopenharmony_ci } 307962306a36Sopenharmony_ci btrfs_set_node_key(lower, key, slot); 308062306a36Sopenharmony_ci btrfs_set_node_blockptr(lower, slot, bytenr); 308162306a36Sopenharmony_ci WARN_ON(trans->transid == 0); 308262306a36Sopenharmony_ci btrfs_set_node_ptr_generation(lower, slot, trans->transid); 308362306a36Sopenharmony_ci btrfs_set_header_nritems(lower, nritems + 1); 308462306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, lower); 308562306a36Sopenharmony_ci 308662306a36Sopenharmony_ci return 0; 308762306a36Sopenharmony_ci} 308862306a36Sopenharmony_ci 308962306a36Sopenharmony_ci/* 309062306a36Sopenharmony_ci * split the node at the specified level in path in two. 309162306a36Sopenharmony_ci * The path is corrected to point to the appropriate node after the split 309262306a36Sopenharmony_ci * 309362306a36Sopenharmony_ci * Before splitting this tries to make some room in the node by pushing 309462306a36Sopenharmony_ci * left and right, if either one works, it returns right away. 309562306a36Sopenharmony_ci * 309662306a36Sopenharmony_ci * returns 0 on success and < 0 on failure 309762306a36Sopenharmony_ci */ 309862306a36Sopenharmony_cistatic noinline int split_node(struct btrfs_trans_handle *trans, 309962306a36Sopenharmony_ci struct btrfs_root *root, 310062306a36Sopenharmony_ci struct btrfs_path *path, int level) 310162306a36Sopenharmony_ci{ 310262306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 310362306a36Sopenharmony_ci struct extent_buffer *c; 310462306a36Sopenharmony_ci struct extent_buffer *split; 310562306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 310662306a36Sopenharmony_ci int mid; 310762306a36Sopenharmony_ci int ret; 310862306a36Sopenharmony_ci u32 c_nritems; 310962306a36Sopenharmony_ci 311062306a36Sopenharmony_ci c = path->nodes[level]; 311162306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(c) != trans->transid); 311262306a36Sopenharmony_ci if (c == root->node) { 311362306a36Sopenharmony_ci /* 311462306a36Sopenharmony_ci * trying to split the root, lets make a new one 311562306a36Sopenharmony_ci * 311662306a36Sopenharmony_ci * tree mod log: We don't log_removal old root in 311762306a36Sopenharmony_ci * insert_new_root, because that root buffer will be kept as a 311862306a36Sopenharmony_ci * normal node. We are going to log removal of half of the 311962306a36Sopenharmony_ci * elements below with btrfs_tree_mod_log_eb_copy(). We're 312062306a36Sopenharmony_ci * holding a tree lock on the buffer, which is why we cannot 312162306a36Sopenharmony_ci * race with other tree_mod_log users. 312262306a36Sopenharmony_ci */ 312362306a36Sopenharmony_ci ret = insert_new_root(trans, root, path, level + 1); 312462306a36Sopenharmony_ci if (ret) 312562306a36Sopenharmony_ci return ret; 312662306a36Sopenharmony_ci } else { 312762306a36Sopenharmony_ci ret = push_nodes_for_insert(trans, root, path, level); 312862306a36Sopenharmony_ci c = path->nodes[level]; 312962306a36Sopenharmony_ci if (!ret && btrfs_header_nritems(c) < 313062306a36Sopenharmony_ci BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) 313162306a36Sopenharmony_ci return 0; 313262306a36Sopenharmony_ci if (ret < 0) 313362306a36Sopenharmony_ci return ret; 313462306a36Sopenharmony_ci } 313562306a36Sopenharmony_ci 313662306a36Sopenharmony_ci c_nritems = btrfs_header_nritems(c); 313762306a36Sopenharmony_ci mid = (c_nritems + 1) / 2; 313862306a36Sopenharmony_ci btrfs_node_key(c, &disk_key, mid); 313962306a36Sopenharmony_ci 314062306a36Sopenharmony_ci split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 314162306a36Sopenharmony_ci &disk_key, level, c->start, 0, 314262306a36Sopenharmony_ci BTRFS_NESTING_SPLIT); 314362306a36Sopenharmony_ci if (IS_ERR(split)) 314462306a36Sopenharmony_ci return PTR_ERR(split); 314562306a36Sopenharmony_ci 314662306a36Sopenharmony_ci root_add_used(root, fs_info->nodesize); 314762306a36Sopenharmony_ci ASSERT(btrfs_header_level(c) == level); 314862306a36Sopenharmony_ci 314962306a36Sopenharmony_ci ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); 315062306a36Sopenharmony_ci if (ret) { 315162306a36Sopenharmony_ci btrfs_tree_unlock(split); 315262306a36Sopenharmony_ci free_extent_buffer(split); 315362306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 315462306a36Sopenharmony_ci return ret; 315562306a36Sopenharmony_ci } 315662306a36Sopenharmony_ci copy_extent_buffer(split, c, 315762306a36Sopenharmony_ci btrfs_node_key_ptr_offset(split, 0), 315862306a36Sopenharmony_ci btrfs_node_key_ptr_offset(c, mid), 315962306a36Sopenharmony_ci (c_nritems - mid) * sizeof(struct btrfs_key_ptr)); 316062306a36Sopenharmony_ci btrfs_set_header_nritems(split, c_nritems - mid); 316162306a36Sopenharmony_ci btrfs_set_header_nritems(c, mid); 316262306a36Sopenharmony_ci 316362306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, c); 316462306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, split); 316562306a36Sopenharmony_ci 316662306a36Sopenharmony_ci ret = insert_ptr(trans, path, &disk_key, split->start, 316762306a36Sopenharmony_ci path->slots[level + 1] + 1, level + 1); 316862306a36Sopenharmony_ci if (ret < 0) { 316962306a36Sopenharmony_ci btrfs_tree_unlock(split); 317062306a36Sopenharmony_ci free_extent_buffer(split); 317162306a36Sopenharmony_ci return ret; 317262306a36Sopenharmony_ci } 317362306a36Sopenharmony_ci 317462306a36Sopenharmony_ci if (path->slots[level] >= mid) { 317562306a36Sopenharmony_ci path->slots[level] -= mid; 317662306a36Sopenharmony_ci btrfs_tree_unlock(c); 317762306a36Sopenharmony_ci free_extent_buffer(c); 317862306a36Sopenharmony_ci path->nodes[level] = split; 317962306a36Sopenharmony_ci path->slots[level + 1] += 1; 318062306a36Sopenharmony_ci } else { 318162306a36Sopenharmony_ci btrfs_tree_unlock(split); 318262306a36Sopenharmony_ci free_extent_buffer(split); 318362306a36Sopenharmony_ci } 318462306a36Sopenharmony_ci return 0; 318562306a36Sopenharmony_ci} 318662306a36Sopenharmony_ci 318762306a36Sopenharmony_ci/* 318862306a36Sopenharmony_ci * how many bytes are required to store the items in a leaf. start 318962306a36Sopenharmony_ci * and nr indicate which items in the leaf to check. This totals up the 319062306a36Sopenharmony_ci * space used both by the item structs and the item data 319162306a36Sopenharmony_ci */ 319262306a36Sopenharmony_cistatic int leaf_space_used(const struct extent_buffer *l, int start, int nr) 319362306a36Sopenharmony_ci{ 319462306a36Sopenharmony_ci int data_len; 319562306a36Sopenharmony_ci int nritems = btrfs_header_nritems(l); 319662306a36Sopenharmony_ci int end = min(nritems, start + nr) - 1; 319762306a36Sopenharmony_ci 319862306a36Sopenharmony_ci if (!nr) 319962306a36Sopenharmony_ci return 0; 320062306a36Sopenharmony_ci data_len = btrfs_item_offset(l, start) + btrfs_item_size(l, start); 320162306a36Sopenharmony_ci data_len = data_len - btrfs_item_offset(l, end); 320262306a36Sopenharmony_ci data_len += sizeof(struct btrfs_item) * nr; 320362306a36Sopenharmony_ci WARN_ON(data_len < 0); 320462306a36Sopenharmony_ci return data_len; 320562306a36Sopenharmony_ci} 320662306a36Sopenharmony_ci 320762306a36Sopenharmony_ci/* 320862306a36Sopenharmony_ci * The space between the end of the leaf items and 320962306a36Sopenharmony_ci * the start of the leaf data. IOW, how much room 321062306a36Sopenharmony_ci * the leaf has left for both items and data 321162306a36Sopenharmony_ci */ 321262306a36Sopenharmony_ciint btrfs_leaf_free_space(const struct extent_buffer *leaf) 321362306a36Sopenharmony_ci{ 321462306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = leaf->fs_info; 321562306a36Sopenharmony_ci int nritems = btrfs_header_nritems(leaf); 321662306a36Sopenharmony_ci int ret; 321762306a36Sopenharmony_ci 321862306a36Sopenharmony_ci ret = BTRFS_LEAF_DATA_SIZE(fs_info) - leaf_space_used(leaf, 0, nritems); 321962306a36Sopenharmony_ci if (ret < 0) { 322062306a36Sopenharmony_ci btrfs_crit(fs_info, 322162306a36Sopenharmony_ci "leaf free space ret %d, leaf data size %lu, used %d nritems %d", 322262306a36Sopenharmony_ci ret, 322362306a36Sopenharmony_ci (unsigned long) BTRFS_LEAF_DATA_SIZE(fs_info), 322462306a36Sopenharmony_ci leaf_space_used(leaf, 0, nritems), nritems); 322562306a36Sopenharmony_ci } 322662306a36Sopenharmony_ci return ret; 322762306a36Sopenharmony_ci} 322862306a36Sopenharmony_ci 322962306a36Sopenharmony_ci/* 323062306a36Sopenharmony_ci * min slot controls the lowest index we're willing to push to the 323162306a36Sopenharmony_ci * right. We'll push up to and including min_slot, but no lower 323262306a36Sopenharmony_ci */ 323362306a36Sopenharmony_cistatic noinline int __push_leaf_right(struct btrfs_trans_handle *trans, 323462306a36Sopenharmony_ci struct btrfs_path *path, 323562306a36Sopenharmony_ci int data_size, int empty, 323662306a36Sopenharmony_ci struct extent_buffer *right, 323762306a36Sopenharmony_ci int free_space, u32 left_nritems, 323862306a36Sopenharmony_ci u32 min_slot) 323962306a36Sopenharmony_ci{ 324062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = right->fs_info; 324162306a36Sopenharmony_ci struct extent_buffer *left = path->nodes[0]; 324262306a36Sopenharmony_ci struct extent_buffer *upper = path->nodes[1]; 324362306a36Sopenharmony_ci struct btrfs_map_token token; 324462306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 324562306a36Sopenharmony_ci int slot; 324662306a36Sopenharmony_ci u32 i; 324762306a36Sopenharmony_ci int push_space = 0; 324862306a36Sopenharmony_ci int push_items = 0; 324962306a36Sopenharmony_ci u32 nr; 325062306a36Sopenharmony_ci u32 right_nritems; 325162306a36Sopenharmony_ci u32 data_end; 325262306a36Sopenharmony_ci u32 this_item_size; 325362306a36Sopenharmony_ci 325462306a36Sopenharmony_ci if (empty) 325562306a36Sopenharmony_ci nr = 0; 325662306a36Sopenharmony_ci else 325762306a36Sopenharmony_ci nr = max_t(u32, 1, min_slot); 325862306a36Sopenharmony_ci 325962306a36Sopenharmony_ci if (path->slots[0] >= left_nritems) 326062306a36Sopenharmony_ci push_space += data_size; 326162306a36Sopenharmony_ci 326262306a36Sopenharmony_ci slot = path->slots[1]; 326362306a36Sopenharmony_ci i = left_nritems - 1; 326462306a36Sopenharmony_ci while (i >= nr) { 326562306a36Sopenharmony_ci if (!empty && push_items > 0) { 326662306a36Sopenharmony_ci if (path->slots[0] > i) 326762306a36Sopenharmony_ci break; 326862306a36Sopenharmony_ci if (path->slots[0] == i) { 326962306a36Sopenharmony_ci int space = btrfs_leaf_free_space(left); 327062306a36Sopenharmony_ci 327162306a36Sopenharmony_ci if (space + push_space * 2 > free_space) 327262306a36Sopenharmony_ci break; 327362306a36Sopenharmony_ci } 327462306a36Sopenharmony_ci } 327562306a36Sopenharmony_ci 327662306a36Sopenharmony_ci if (path->slots[0] == i) 327762306a36Sopenharmony_ci push_space += data_size; 327862306a36Sopenharmony_ci 327962306a36Sopenharmony_ci this_item_size = btrfs_item_size(left, i); 328062306a36Sopenharmony_ci if (this_item_size + sizeof(struct btrfs_item) + 328162306a36Sopenharmony_ci push_space > free_space) 328262306a36Sopenharmony_ci break; 328362306a36Sopenharmony_ci 328462306a36Sopenharmony_ci push_items++; 328562306a36Sopenharmony_ci push_space += this_item_size + sizeof(struct btrfs_item); 328662306a36Sopenharmony_ci if (i == 0) 328762306a36Sopenharmony_ci break; 328862306a36Sopenharmony_ci i--; 328962306a36Sopenharmony_ci } 329062306a36Sopenharmony_ci 329162306a36Sopenharmony_ci if (push_items == 0) 329262306a36Sopenharmony_ci goto out_unlock; 329362306a36Sopenharmony_ci 329462306a36Sopenharmony_ci WARN_ON(!empty && push_items == left_nritems); 329562306a36Sopenharmony_ci 329662306a36Sopenharmony_ci /* push left to right */ 329762306a36Sopenharmony_ci right_nritems = btrfs_header_nritems(right); 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci push_space = btrfs_item_data_end(left, left_nritems - push_items); 330062306a36Sopenharmony_ci push_space -= leaf_data_end(left); 330162306a36Sopenharmony_ci 330262306a36Sopenharmony_ci /* make room in the right data area */ 330362306a36Sopenharmony_ci data_end = leaf_data_end(right); 330462306a36Sopenharmony_ci memmove_leaf_data(right, data_end - push_space, data_end, 330562306a36Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info) - data_end); 330662306a36Sopenharmony_ci 330762306a36Sopenharmony_ci /* copy from the left data area */ 330862306a36Sopenharmony_ci copy_leaf_data(right, left, BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 330962306a36Sopenharmony_ci leaf_data_end(left), push_space); 331062306a36Sopenharmony_ci 331162306a36Sopenharmony_ci memmove_leaf_items(right, push_items, 0, right_nritems); 331262306a36Sopenharmony_ci 331362306a36Sopenharmony_ci /* copy the items from left to right */ 331462306a36Sopenharmony_ci copy_leaf_items(right, left, 0, left_nritems - push_items, push_items); 331562306a36Sopenharmony_ci 331662306a36Sopenharmony_ci /* update the item pointers */ 331762306a36Sopenharmony_ci btrfs_init_map_token(&token, right); 331862306a36Sopenharmony_ci right_nritems += push_items; 331962306a36Sopenharmony_ci btrfs_set_header_nritems(right, right_nritems); 332062306a36Sopenharmony_ci push_space = BTRFS_LEAF_DATA_SIZE(fs_info); 332162306a36Sopenharmony_ci for (i = 0; i < right_nritems; i++) { 332262306a36Sopenharmony_ci push_space -= btrfs_token_item_size(&token, i); 332362306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, push_space); 332462306a36Sopenharmony_ci } 332562306a36Sopenharmony_ci 332662306a36Sopenharmony_ci left_nritems -= push_items; 332762306a36Sopenharmony_ci btrfs_set_header_nritems(left, left_nritems); 332862306a36Sopenharmony_ci 332962306a36Sopenharmony_ci if (left_nritems) 333062306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, left); 333162306a36Sopenharmony_ci else 333262306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, left); 333362306a36Sopenharmony_ci 333462306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, right); 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_ci btrfs_item_key(right, &disk_key, 0); 333762306a36Sopenharmony_ci btrfs_set_node_key(upper, &disk_key, slot + 1); 333862306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, upper); 333962306a36Sopenharmony_ci 334062306a36Sopenharmony_ci /* then fixup the leaf pointer in the path */ 334162306a36Sopenharmony_ci if (path->slots[0] >= left_nritems) { 334262306a36Sopenharmony_ci path->slots[0] -= left_nritems; 334362306a36Sopenharmony_ci if (btrfs_header_nritems(path->nodes[0]) == 0) 334462306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, path->nodes[0]); 334562306a36Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 334662306a36Sopenharmony_ci free_extent_buffer(path->nodes[0]); 334762306a36Sopenharmony_ci path->nodes[0] = right; 334862306a36Sopenharmony_ci path->slots[1] += 1; 334962306a36Sopenharmony_ci } else { 335062306a36Sopenharmony_ci btrfs_tree_unlock(right); 335162306a36Sopenharmony_ci free_extent_buffer(right); 335262306a36Sopenharmony_ci } 335362306a36Sopenharmony_ci return 0; 335462306a36Sopenharmony_ci 335562306a36Sopenharmony_ciout_unlock: 335662306a36Sopenharmony_ci btrfs_tree_unlock(right); 335762306a36Sopenharmony_ci free_extent_buffer(right); 335862306a36Sopenharmony_ci return 1; 335962306a36Sopenharmony_ci} 336062306a36Sopenharmony_ci 336162306a36Sopenharmony_ci/* 336262306a36Sopenharmony_ci * push some data in the path leaf to the right, trying to free up at 336362306a36Sopenharmony_ci * least data_size bytes. returns zero if the push worked, nonzero otherwise 336462306a36Sopenharmony_ci * 336562306a36Sopenharmony_ci * returns 1 if the push failed because the other node didn't have enough 336662306a36Sopenharmony_ci * room, 0 if everything worked out and < 0 if there were major errors. 336762306a36Sopenharmony_ci * 336862306a36Sopenharmony_ci * this will push starting from min_slot to the end of the leaf. It won't 336962306a36Sopenharmony_ci * push any slot lower than min_slot 337062306a36Sopenharmony_ci */ 337162306a36Sopenharmony_cistatic int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root 337262306a36Sopenharmony_ci *root, struct btrfs_path *path, 337362306a36Sopenharmony_ci int min_data_size, int data_size, 337462306a36Sopenharmony_ci int empty, u32 min_slot) 337562306a36Sopenharmony_ci{ 337662306a36Sopenharmony_ci struct extent_buffer *left = path->nodes[0]; 337762306a36Sopenharmony_ci struct extent_buffer *right; 337862306a36Sopenharmony_ci struct extent_buffer *upper; 337962306a36Sopenharmony_ci int slot; 338062306a36Sopenharmony_ci int free_space; 338162306a36Sopenharmony_ci u32 left_nritems; 338262306a36Sopenharmony_ci int ret; 338362306a36Sopenharmony_ci 338462306a36Sopenharmony_ci if (!path->nodes[1]) 338562306a36Sopenharmony_ci return 1; 338662306a36Sopenharmony_ci 338762306a36Sopenharmony_ci slot = path->slots[1]; 338862306a36Sopenharmony_ci upper = path->nodes[1]; 338962306a36Sopenharmony_ci if (slot >= btrfs_header_nritems(upper) - 1) 339062306a36Sopenharmony_ci return 1; 339162306a36Sopenharmony_ci 339262306a36Sopenharmony_ci btrfs_assert_tree_write_locked(path->nodes[1]); 339362306a36Sopenharmony_ci 339462306a36Sopenharmony_ci right = btrfs_read_node_slot(upper, slot + 1); 339562306a36Sopenharmony_ci if (IS_ERR(right)) 339662306a36Sopenharmony_ci return PTR_ERR(right); 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci __btrfs_tree_lock(right, BTRFS_NESTING_RIGHT); 339962306a36Sopenharmony_ci 340062306a36Sopenharmony_ci free_space = btrfs_leaf_free_space(right); 340162306a36Sopenharmony_ci if (free_space < data_size) 340262306a36Sopenharmony_ci goto out_unlock; 340362306a36Sopenharmony_ci 340462306a36Sopenharmony_ci ret = btrfs_cow_block(trans, root, right, upper, 340562306a36Sopenharmony_ci slot + 1, &right, BTRFS_NESTING_RIGHT_COW); 340662306a36Sopenharmony_ci if (ret) 340762306a36Sopenharmony_ci goto out_unlock; 340862306a36Sopenharmony_ci 340962306a36Sopenharmony_ci left_nritems = btrfs_header_nritems(left); 341062306a36Sopenharmony_ci if (left_nritems == 0) 341162306a36Sopenharmony_ci goto out_unlock; 341262306a36Sopenharmony_ci 341362306a36Sopenharmony_ci if (check_sibling_keys(left, right)) { 341462306a36Sopenharmony_ci ret = -EUCLEAN; 341562306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 341662306a36Sopenharmony_ci btrfs_tree_unlock(right); 341762306a36Sopenharmony_ci free_extent_buffer(right); 341862306a36Sopenharmony_ci return ret; 341962306a36Sopenharmony_ci } 342062306a36Sopenharmony_ci if (path->slots[0] == left_nritems && !empty) { 342162306a36Sopenharmony_ci /* Key greater than all keys in the leaf, right neighbor has 342262306a36Sopenharmony_ci * enough room for it and we're not emptying our leaf to delete 342362306a36Sopenharmony_ci * it, therefore use right neighbor to insert the new item and 342462306a36Sopenharmony_ci * no need to touch/dirty our left leaf. */ 342562306a36Sopenharmony_ci btrfs_tree_unlock(left); 342662306a36Sopenharmony_ci free_extent_buffer(left); 342762306a36Sopenharmony_ci path->nodes[0] = right; 342862306a36Sopenharmony_ci path->slots[0] = 0; 342962306a36Sopenharmony_ci path->slots[1]++; 343062306a36Sopenharmony_ci return 0; 343162306a36Sopenharmony_ci } 343262306a36Sopenharmony_ci 343362306a36Sopenharmony_ci return __push_leaf_right(trans, path, min_data_size, empty, right, 343462306a36Sopenharmony_ci free_space, left_nritems, min_slot); 343562306a36Sopenharmony_ciout_unlock: 343662306a36Sopenharmony_ci btrfs_tree_unlock(right); 343762306a36Sopenharmony_ci free_extent_buffer(right); 343862306a36Sopenharmony_ci return 1; 343962306a36Sopenharmony_ci} 344062306a36Sopenharmony_ci 344162306a36Sopenharmony_ci/* 344262306a36Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at 344362306a36Sopenharmony_ci * least data_size bytes. returns zero if the push worked, nonzero otherwise 344462306a36Sopenharmony_ci * 344562306a36Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items. The 344662306a36Sopenharmony_ci * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the 344762306a36Sopenharmony_ci * items 344862306a36Sopenharmony_ci */ 344962306a36Sopenharmony_cistatic noinline int __push_leaf_left(struct btrfs_trans_handle *trans, 345062306a36Sopenharmony_ci struct btrfs_path *path, int data_size, 345162306a36Sopenharmony_ci int empty, struct extent_buffer *left, 345262306a36Sopenharmony_ci int free_space, u32 right_nritems, 345362306a36Sopenharmony_ci u32 max_slot) 345462306a36Sopenharmony_ci{ 345562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = left->fs_info; 345662306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 345762306a36Sopenharmony_ci struct extent_buffer *right = path->nodes[0]; 345862306a36Sopenharmony_ci int i; 345962306a36Sopenharmony_ci int push_space = 0; 346062306a36Sopenharmony_ci int push_items = 0; 346162306a36Sopenharmony_ci u32 old_left_nritems; 346262306a36Sopenharmony_ci u32 nr; 346362306a36Sopenharmony_ci int ret = 0; 346462306a36Sopenharmony_ci u32 this_item_size; 346562306a36Sopenharmony_ci u32 old_left_item_size; 346662306a36Sopenharmony_ci struct btrfs_map_token token; 346762306a36Sopenharmony_ci 346862306a36Sopenharmony_ci if (empty) 346962306a36Sopenharmony_ci nr = min(right_nritems, max_slot); 347062306a36Sopenharmony_ci else 347162306a36Sopenharmony_ci nr = min(right_nritems - 1, max_slot); 347262306a36Sopenharmony_ci 347362306a36Sopenharmony_ci for (i = 0; i < nr; i++) { 347462306a36Sopenharmony_ci if (!empty && push_items > 0) { 347562306a36Sopenharmony_ci if (path->slots[0] < i) 347662306a36Sopenharmony_ci break; 347762306a36Sopenharmony_ci if (path->slots[0] == i) { 347862306a36Sopenharmony_ci int space = btrfs_leaf_free_space(right); 347962306a36Sopenharmony_ci 348062306a36Sopenharmony_ci if (space + push_space * 2 > free_space) 348162306a36Sopenharmony_ci break; 348262306a36Sopenharmony_ci } 348362306a36Sopenharmony_ci } 348462306a36Sopenharmony_ci 348562306a36Sopenharmony_ci if (path->slots[0] == i) 348662306a36Sopenharmony_ci push_space += data_size; 348762306a36Sopenharmony_ci 348862306a36Sopenharmony_ci this_item_size = btrfs_item_size(right, i); 348962306a36Sopenharmony_ci if (this_item_size + sizeof(struct btrfs_item) + push_space > 349062306a36Sopenharmony_ci free_space) 349162306a36Sopenharmony_ci break; 349262306a36Sopenharmony_ci 349362306a36Sopenharmony_ci push_items++; 349462306a36Sopenharmony_ci push_space += this_item_size + sizeof(struct btrfs_item); 349562306a36Sopenharmony_ci } 349662306a36Sopenharmony_ci 349762306a36Sopenharmony_ci if (push_items == 0) { 349862306a36Sopenharmony_ci ret = 1; 349962306a36Sopenharmony_ci goto out; 350062306a36Sopenharmony_ci } 350162306a36Sopenharmony_ci WARN_ON(!empty && push_items == btrfs_header_nritems(right)); 350262306a36Sopenharmony_ci 350362306a36Sopenharmony_ci /* push data from right to left */ 350462306a36Sopenharmony_ci copy_leaf_items(left, right, btrfs_header_nritems(left), 0, push_items); 350562306a36Sopenharmony_ci 350662306a36Sopenharmony_ci push_space = BTRFS_LEAF_DATA_SIZE(fs_info) - 350762306a36Sopenharmony_ci btrfs_item_offset(right, push_items - 1); 350862306a36Sopenharmony_ci 350962306a36Sopenharmony_ci copy_leaf_data(left, right, leaf_data_end(left) - push_space, 351062306a36Sopenharmony_ci btrfs_item_offset(right, push_items - 1), push_space); 351162306a36Sopenharmony_ci old_left_nritems = btrfs_header_nritems(left); 351262306a36Sopenharmony_ci BUG_ON(old_left_nritems <= 0); 351362306a36Sopenharmony_ci 351462306a36Sopenharmony_ci btrfs_init_map_token(&token, left); 351562306a36Sopenharmony_ci old_left_item_size = btrfs_item_offset(left, old_left_nritems - 1); 351662306a36Sopenharmony_ci for (i = old_left_nritems; i < old_left_nritems + push_items; i++) { 351762306a36Sopenharmony_ci u32 ioff; 351862306a36Sopenharmony_ci 351962306a36Sopenharmony_ci ioff = btrfs_token_item_offset(&token, i); 352062306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, 352162306a36Sopenharmony_ci ioff - (BTRFS_LEAF_DATA_SIZE(fs_info) - old_left_item_size)); 352262306a36Sopenharmony_ci } 352362306a36Sopenharmony_ci btrfs_set_header_nritems(left, old_left_nritems + push_items); 352462306a36Sopenharmony_ci 352562306a36Sopenharmony_ci /* fixup right node */ 352662306a36Sopenharmony_ci if (push_items > right_nritems) 352762306a36Sopenharmony_ci WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, 352862306a36Sopenharmony_ci right_nritems); 352962306a36Sopenharmony_ci 353062306a36Sopenharmony_ci if (push_items < right_nritems) { 353162306a36Sopenharmony_ci push_space = btrfs_item_offset(right, push_items - 1) - 353262306a36Sopenharmony_ci leaf_data_end(right); 353362306a36Sopenharmony_ci memmove_leaf_data(right, 353462306a36Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info) - push_space, 353562306a36Sopenharmony_ci leaf_data_end(right), push_space); 353662306a36Sopenharmony_ci 353762306a36Sopenharmony_ci memmove_leaf_items(right, 0, push_items, 353862306a36Sopenharmony_ci btrfs_header_nritems(right) - push_items); 353962306a36Sopenharmony_ci } 354062306a36Sopenharmony_ci 354162306a36Sopenharmony_ci btrfs_init_map_token(&token, right); 354262306a36Sopenharmony_ci right_nritems -= push_items; 354362306a36Sopenharmony_ci btrfs_set_header_nritems(right, right_nritems); 354462306a36Sopenharmony_ci push_space = BTRFS_LEAF_DATA_SIZE(fs_info); 354562306a36Sopenharmony_ci for (i = 0; i < right_nritems; i++) { 354662306a36Sopenharmony_ci push_space = push_space - btrfs_token_item_size(&token, i); 354762306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, push_space); 354862306a36Sopenharmony_ci } 354962306a36Sopenharmony_ci 355062306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, left); 355162306a36Sopenharmony_ci if (right_nritems) 355262306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, right); 355362306a36Sopenharmony_ci else 355462306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, right); 355562306a36Sopenharmony_ci 355662306a36Sopenharmony_ci btrfs_item_key(right, &disk_key, 0); 355762306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, 1); 355862306a36Sopenharmony_ci 355962306a36Sopenharmony_ci /* then fixup the leaf pointer in the path */ 356062306a36Sopenharmony_ci if (path->slots[0] < push_items) { 356162306a36Sopenharmony_ci path->slots[0] += old_left_nritems; 356262306a36Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 356362306a36Sopenharmony_ci free_extent_buffer(path->nodes[0]); 356462306a36Sopenharmony_ci path->nodes[0] = left; 356562306a36Sopenharmony_ci path->slots[1] -= 1; 356662306a36Sopenharmony_ci } else { 356762306a36Sopenharmony_ci btrfs_tree_unlock(left); 356862306a36Sopenharmony_ci free_extent_buffer(left); 356962306a36Sopenharmony_ci path->slots[0] -= push_items; 357062306a36Sopenharmony_ci } 357162306a36Sopenharmony_ci BUG_ON(path->slots[0] < 0); 357262306a36Sopenharmony_ci return ret; 357362306a36Sopenharmony_ciout: 357462306a36Sopenharmony_ci btrfs_tree_unlock(left); 357562306a36Sopenharmony_ci free_extent_buffer(left); 357662306a36Sopenharmony_ci return ret; 357762306a36Sopenharmony_ci} 357862306a36Sopenharmony_ci 357962306a36Sopenharmony_ci/* 358062306a36Sopenharmony_ci * push some data in the path leaf to the left, trying to free up at 358162306a36Sopenharmony_ci * least data_size bytes. returns zero if the push worked, nonzero otherwise 358262306a36Sopenharmony_ci * 358362306a36Sopenharmony_ci * max_slot can put a limit on how far into the leaf we'll push items. The 358462306a36Sopenharmony_ci * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the 358562306a36Sopenharmony_ci * items 358662306a36Sopenharmony_ci */ 358762306a36Sopenharmony_cistatic int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root 358862306a36Sopenharmony_ci *root, struct btrfs_path *path, int min_data_size, 358962306a36Sopenharmony_ci int data_size, int empty, u32 max_slot) 359062306a36Sopenharmony_ci{ 359162306a36Sopenharmony_ci struct extent_buffer *right = path->nodes[0]; 359262306a36Sopenharmony_ci struct extent_buffer *left; 359362306a36Sopenharmony_ci int slot; 359462306a36Sopenharmony_ci int free_space; 359562306a36Sopenharmony_ci u32 right_nritems; 359662306a36Sopenharmony_ci int ret = 0; 359762306a36Sopenharmony_ci 359862306a36Sopenharmony_ci slot = path->slots[1]; 359962306a36Sopenharmony_ci if (slot == 0) 360062306a36Sopenharmony_ci return 1; 360162306a36Sopenharmony_ci if (!path->nodes[1]) 360262306a36Sopenharmony_ci return 1; 360362306a36Sopenharmony_ci 360462306a36Sopenharmony_ci right_nritems = btrfs_header_nritems(right); 360562306a36Sopenharmony_ci if (right_nritems == 0) 360662306a36Sopenharmony_ci return 1; 360762306a36Sopenharmony_ci 360862306a36Sopenharmony_ci btrfs_assert_tree_write_locked(path->nodes[1]); 360962306a36Sopenharmony_ci 361062306a36Sopenharmony_ci left = btrfs_read_node_slot(path->nodes[1], slot - 1); 361162306a36Sopenharmony_ci if (IS_ERR(left)) 361262306a36Sopenharmony_ci return PTR_ERR(left); 361362306a36Sopenharmony_ci 361462306a36Sopenharmony_ci __btrfs_tree_lock(left, BTRFS_NESTING_LEFT); 361562306a36Sopenharmony_ci 361662306a36Sopenharmony_ci free_space = btrfs_leaf_free_space(left); 361762306a36Sopenharmony_ci if (free_space < data_size) { 361862306a36Sopenharmony_ci ret = 1; 361962306a36Sopenharmony_ci goto out; 362062306a36Sopenharmony_ci } 362162306a36Sopenharmony_ci 362262306a36Sopenharmony_ci ret = btrfs_cow_block(trans, root, left, 362362306a36Sopenharmony_ci path->nodes[1], slot - 1, &left, 362462306a36Sopenharmony_ci BTRFS_NESTING_LEFT_COW); 362562306a36Sopenharmony_ci if (ret) { 362662306a36Sopenharmony_ci /* we hit -ENOSPC, but it isn't fatal here */ 362762306a36Sopenharmony_ci if (ret == -ENOSPC) 362862306a36Sopenharmony_ci ret = 1; 362962306a36Sopenharmony_ci goto out; 363062306a36Sopenharmony_ci } 363162306a36Sopenharmony_ci 363262306a36Sopenharmony_ci if (check_sibling_keys(left, right)) { 363362306a36Sopenharmony_ci ret = -EUCLEAN; 363462306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 363562306a36Sopenharmony_ci goto out; 363662306a36Sopenharmony_ci } 363762306a36Sopenharmony_ci return __push_leaf_left(trans, path, min_data_size, empty, left, 363862306a36Sopenharmony_ci free_space, right_nritems, max_slot); 363962306a36Sopenharmony_ciout: 364062306a36Sopenharmony_ci btrfs_tree_unlock(left); 364162306a36Sopenharmony_ci free_extent_buffer(left); 364262306a36Sopenharmony_ci return ret; 364362306a36Sopenharmony_ci} 364462306a36Sopenharmony_ci 364562306a36Sopenharmony_ci/* 364662306a36Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size 364762306a36Sopenharmony_ci * available for the resulting leaf level of the path. 364862306a36Sopenharmony_ci */ 364962306a36Sopenharmony_cistatic noinline int copy_for_split(struct btrfs_trans_handle *trans, 365062306a36Sopenharmony_ci struct btrfs_path *path, 365162306a36Sopenharmony_ci struct extent_buffer *l, 365262306a36Sopenharmony_ci struct extent_buffer *right, 365362306a36Sopenharmony_ci int slot, int mid, int nritems) 365462306a36Sopenharmony_ci{ 365562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = trans->fs_info; 365662306a36Sopenharmony_ci int data_copy_size; 365762306a36Sopenharmony_ci int rt_data_off; 365862306a36Sopenharmony_ci int i; 365962306a36Sopenharmony_ci int ret; 366062306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 366162306a36Sopenharmony_ci struct btrfs_map_token token; 366262306a36Sopenharmony_ci 366362306a36Sopenharmony_ci nritems = nritems - mid; 366462306a36Sopenharmony_ci btrfs_set_header_nritems(right, nritems); 366562306a36Sopenharmony_ci data_copy_size = btrfs_item_data_end(l, mid) - leaf_data_end(l); 366662306a36Sopenharmony_ci 366762306a36Sopenharmony_ci copy_leaf_items(right, l, 0, mid, nritems); 366862306a36Sopenharmony_ci 366962306a36Sopenharmony_ci copy_leaf_data(right, l, BTRFS_LEAF_DATA_SIZE(fs_info) - data_copy_size, 367062306a36Sopenharmony_ci leaf_data_end(l), data_copy_size); 367162306a36Sopenharmony_ci 367262306a36Sopenharmony_ci rt_data_off = BTRFS_LEAF_DATA_SIZE(fs_info) - btrfs_item_data_end(l, mid); 367362306a36Sopenharmony_ci 367462306a36Sopenharmony_ci btrfs_init_map_token(&token, right); 367562306a36Sopenharmony_ci for (i = 0; i < nritems; i++) { 367662306a36Sopenharmony_ci u32 ioff; 367762306a36Sopenharmony_ci 367862306a36Sopenharmony_ci ioff = btrfs_token_item_offset(&token, i); 367962306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, ioff + rt_data_off); 368062306a36Sopenharmony_ci } 368162306a36Sopenharmony_ci 368262306a36Sopenharmony_ci btrfs_set_header_nritems(l, mid); 368362306a36Sopenharmony_ci btrfs_item_key(right, &disk_key, 0); 368462306a36Sopenharmony_ci ret = insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1); 368562306a36Sopenharmony_ci if (ret < 0) 368662306a36Sopenharmony_ci return ret; 368762306a36Sopenharmony_ci 368862306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, right); 368962306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, l); 369062306a36Sopenharmony_ci BUG_ON(path->slots[0] != slot); 369162306a36Sopenharmony_ci 369262306a36Sopenharmony_ci if (mid <= slot) { 369362306a36Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 369462306a36Sopenharmony_ci free_extent_buffer(path->nodes[0]); 369562306a36Sopenharmony_ci path->nodes[0] = right; 369662306a36Sopenharmony_ci path->slots[0] -= mid; 369762306a36Sopenharmony_ci path->slots[1] += 1; 369862306a36Sopenharmony_ci } else { 369962306a36Sopenharmony_ci btrfs_tree_unlock(right); 370062306a36Sopenharmony_ci free_extent_buffer(right); 370162306a36Sopenharmony_ci } 370262306a36Sopenharmony_ci 370362306a36Sopenharmony_ci BUG_ON(path->slots[0] < 0); 370462306a36Sopenharmony_ci 370562306a36Sopenharmony_ci return 0; 370662306a36Sopenharmony_ci} 370762306a36Sopenharmony_ci 370862306a36Sopenharmony_ci/* 370962306a36Sopenharmony_ci * double splits happen when we need to insert a big item in the middle 371062306a36Sopenharmony_ci * of a leaf. A double split can leave us with 3 mostly empty leaves: 371162306a36Sopenharmony_ci * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] 371262306a36Sopenharmony_ci * A B C 371362306a36Sopenharmony_ci * 371462306a36Sopenharmony_ci * We avoid this by trying to push the items on either side of our target 371562306a36Sopenharmony_ci * into the adjacent leaves. If all goes well we can avoid the double split 371662306a36Sopenharmony_ci * completely. 371762306a36Sopenharmony_ci */ 371862306a36Sopenharmony_cistatic noinline int push_for_double_split(struct btrfs_trans_handle *trans, 371962306a36Sopenharmony_ci struct btrfs_root *root, 372062306a36Sopenharmony_ci struct btrfs_path *path, 372162306a36Sopenharmony_ci int data_size) 372262306a36Sopenharmony_ci{ 372362306a36Sopenharmony_ci int ret; 372462306a36Sopenharmony_ci int progress = 0; 372562306a36Sopenharmony_ci int slot; 372662306a36Sopenharmony_ci u32 nritems; 372762306a36Sopenharmony_ci int space_needed = data_size; 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci slot = path->slots[0]; 373062306a36Sopenharmony_ci if (slot < btrfs_header_nritems(path->nodes[0])) 373162306a36Sopenharmony_ci space_needed -= btrfs_leaf_free_space(path->nodes[0]); 373262306a36Sopenharmony_ci 373362306a36Sopenharmony_ci /* 373462306a36Sopenharmony_ci * try to push all the items after our slot into the 373562306a36Sopenharmony_ci * right leaf 373662306a36Sopenharmony_ci */ 373762306a36Sopenharmony_ci ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot); 373862306a36Sopenharmony_ci if (ret < 0) 373962306a36Sopenharmony_ci return ret; 374062306a36Sopenharmony_ci 374162306a36Sopenharmony_ci if (ret == 0) 374262306a36Sopenharmony_ci progress++; 374362306a36Sopenharmony_ci 374462306a36Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[0]); 374562306a36Sopenharmony_ci /* 374662306a36Sopenharmony_ci * our goal is to get our slot at the start or end of a leaf. If 374762306a36Sopenharmony_ci * we've done so we're done 374862306a36Sopenharmony_ci */ 374962306a36Sopenharmony_ci if (path->slots[0] == 0 || path->slots[0] == nritems) 375062306a36Sopenharmony_ci return 0; 375162306a36Sopenharmony_ci 375262306a36Sopenharmony_ci if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) 375362306a36Sopenharmony_ci return 0; 375462306a36Sopenharmony_ci 375562306a36Sopenharmony_ci /* try to push all the items before our slot into the next leaf */ 375662306a36Sopenharmony_ci slot = path->slots[0]; 375762306a36Sopenharmony_ci space_needed = data_size; 375862306a36Sopenharmony_ci if (slot > 0) 375962306a36Sopenharmony_ci space_needed -= btrfs_leaf_free_space(path->nodes[0]); 376062306a36Sopenharmony_ci ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot); 376162306a36Sopenharmony_ci if (ret < 0) 376262306a36Sopenharmony_ci return ret; 376362306a36Sopenharmony_ci 376462306a36Sopenharmony_ci if (ret == 0) 376562306a36Sopenharmony_ci progress++; 376662306a36Sopenharmony_ci 376762306a36Sopenharmony_ci if (progress) 376862306a36Sopenharmony_ci return 0; 376962306a36Sopenharmony_ci return 1; 377062306a36Sopenharmony_ci} 377162306a36Sopenharmony_ci 377262306a36Sopenharmony_ci/* 377362306a36Sopenharmony_ci * split the path's leaf in two, making sure there is at least data_size 377462306a36Sopenharmony_ci * available for the resulting leaf level of the path. 377562306a36Sopenharmony_ci * 377662306a36Sopenharmony_ci * returns 0 if all went well and < 0 on failure. 377762306a36Sopenharmony_ci */ 377862306a36Sopenharmony_cistatic noinline int split_leaf(struct btrfs_trans_handle *trans, 377962306a36Sopenharmony_ci struct btrfs_root *root, 378062306a36Sopenharmony_ci const struct btrfs_key *ins_key, 378162306a36Sopenharmony_ci struct btrfs_path *path, int data_size, 378262306a36Sopenharmony_ci int extend) 378362306a36Sopenharmony_ci{ 378462306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 378562306a36Sopenharmony_ci struct extent_buffer *l; 378662306a36Sopenharmony_ci u32 nritems; 378762306a36Sopenharmony_ci int mid; 378862306a36Sopenharmony_ci int slot; 378962306a36Sopenharmony_ci struct extent_buffer *right; 379062306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 379162306a36Sopenharmony_ci int ret = 0; 379262306a36Sopenharmony_ci int wret; 379362306a36Sopenharmony_ci int split; 379462306a36Sopenharmony_ci int num_doubles = 0; 379562306a36Sopenharmony_ci int tried_avoid_double = 0; 379662306a36Sopenharmony_ci 379762306a36Sopenharmony_ci l = path->nodes[0]; 379862306a36Sopenharmony_ci slot = path->slots[0]; 379962306a36Sopenharmony_ci if (extend && data_size + btrfs_item_size(l, slot) + 380062306a36Sopenharmony_ci sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(fs_info)) 380162306a36Sopenharmony_ci return -EOVERFLOW; 380262306a36Sopenharmony_ci 380362306a36Sopenharmony_ci /* first try to make some room by pushing left and right */ 380462306a36Sopenharmony_ci if (data_size && path->nodes[1]) { 380562306a36Sopenharmony_ci int space_needed = data_size; 380662306a36Sopenharmony_ci 380762306a36Sopenharmony_ci if (slot < btrfs_header_nritems(l)) 380862306a36Sopenharmony_ci space_needed -= btrfs_leaf_free_space(l); 380962306a36Sopenharmony_ci 381062306a36Sopenharmony_ci wret = push_leaf_right(trans, root, path, space_needed, 381162306a36Sopenharmony_ci space_needed, 0, 0); 381262306a36Sopenharmony_ci if (wret < 0) 381362306a36Sopenharmony_ci return wret; 381462306a36Sopenharmony_ci if (wret) { 381562306a36Sopenharmony_ci space_needed = data_size; 381662306a36Sopenharmony_ci if (slot > 0) 381762306a36Sopenharmony_ci space_needed -= btrfs_leaf_free_space(l); 381862306a36Sopenharmony_ci wret = push_leaf_left(trans, root, path, space_needed, 381962306a36Sopenharmony_ci space_needed, 0, (u32)-1); 382062306a36Sopenharmony_ci if (wret < 0) 382162306a36Sopenharmony_ci return wret; 382262306a36Sopenharmony_ci } 382362306a36Sopenharmony_ci l = path->nodes[0]; 382462306a36Sopenharmony_ci 382562306a36Sopenharmony_ci /* did the pushes work? */ 382662306a36Sopenharmony_ci if (btrfs_leaf_free_space(l) >= data_size) 382762306a36Sopenharmony_ci return 0; 382862306a36Sopenharmony_ci } 382962306a36Sopenharmony_ci 383062306a36Sopenharmony_ci if (!path->nodes[1]) { 383162306a36Sopenharmony_ci ret = insert_new_root(trans, root, path, 1); 383262306a36Sopenharmony_ci if (ret) 383362306a36Sopenharmony_ci return ret; 383462306a36Sopenharmony_ci } 383562306a36Sopenharmony_ciagain: 383662306a36Sopenharmony_ci split = 1; 383762306a36Sopenharmony_ci l = path->nodes[0]; 383862306a36Sopenharmony_ci slot = path->slots[0]; 383962306a36Sopenharmony_ci nritems = btrfs_header_nritems(l); 384062306a36Sopenharmony_ci mid = (nritems + 1) / 2; 384162306a36Sopenharmony_ci 384262306a36Sopenharmony_ci if (mid <= slot) { 384362306a36Sopenharmony_ci if (nritems == 1 || 384462306a36Sopenharmony_ci leaf_space_used(l, mid, nritems - mid) + data_size > 384562306a36Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info)) { 384662306a36Sopenharmony_ci if (slot >= nritems) { 384762306a36Sopenharmony_ci split = 0; 384862306a36Sopenharmony_ci } else { 384962306a36Sopenharmony_ci mid = slot; 385062306a36Sopenharmony_ci if (mid != nritems && 385162306a36Sopenharmony_ci leaf_space_used(l, mid, nritems - mid) + 385262306a36Sopenharmony_ci data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { 385362306a36Sopenharmony_ci if (data_size && !tried_avoid_double) 385462306a36Sopenharmony_ci goto push_for_double; 385562306a36Sopenharmony_ci split = 2; 385662306a36Sopenharmony_ci } 385762306a36Sopenharmony_ci } 385862306a36Sopenharmony_ci } 385962306a36Sopenharmony_ci } else { 386062306a36Sopenharmony_ci if (leaf_space_used(l, 0, mid) + data_size > 386162306a36Sopenharmony_ci BTRFS_LEAF_DATA_SIZE(fs_info)) { 386262306a36Sopenharmony_ci if (!extend && data_size && slot == 0) { 386362306a36Sopenharmony_ci split = 0; 386462306a36Sopenharmony_ci } else if ((extend || !data_size) && slot == 0) { 386562306a36Sopenharmony_ci mid = 1; 386662306a36Sopenharmony_ci } else { 386762306a36Sopenharmony_ci mid = slot; 386862306a36Sopenharmony_ci if (mid != nritems && 386962306a36Sopenharmony_ci leaf_space_used(l, mid, nritems - mid) + 387062306a36Sopenharmony_ci data_size > BTRFS_LEAF_DATA_SIZE(fs_info)) { 387162306a36Sopenharmony_ci if (data_size && !tried_avoid_double) 387262306a36Sopenharmony_ci goto push_for_double; 387362306a36Sopenharmony_ci split = 2; 387462306a36Sopenharmony_ci } 387562306a36Sopenharmony_ci } 387662306a36Sopenharmony_ci } 387762306a36Sopenharmony_ci } 387862306a36Sopenharmony_ci 387962306a36Sopenharmony_ci if (split == 0) 388062306a36Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, ins_key); 388162306a36Sopenharmony_ci else 388262306a36Sopenharmony_ci btrfs_item_key(l, &disk_key, mid); 388362306a36Sopenharmony_ci 388462306a36Sopenharmony_ci /* 388562306a36Sopenharmony_ci * We have to about BTRFS_NESTING_NEW_ROOT here if we've done a double 388662306a36Sopenharmony_ci * split, because we're only allowed to have MAX_LOCKDEP_SUBCLASSES 388762306a36Sopenharmony_ci * subclasses, which is 8 at the time of this patch, and we've maxed it 388862306a36Sopenharmony_ci * out. In the future we could add a 388962306a36Sopenharmony_ci * BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just 389062306a36Sopenharmony_ci * use BTRFS_NESTING_NEW_ROOT. 389162306a36Sopenharmony_ci */ 389262306a36Sopenharmony_ci right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid, 389362306a36Sopenharmony_ci &disk_key, 0, l->start, 0, 389462306a36Sopenharmony_ci num_doubles ? BTRFS_NESTING_NEW_ROOT : 389562306a36Sopenharmony_ci BTRFS_NESTING_SPLIT); 389662306a36Sopenharmony_ci if (IS_ERR(right)) 389762306a36Sopenharmony_ci return PTR_ERR(right); 389862306a36Sopenharmony_ci 389962306a36Sopenharmony_ci root_add_used(root, fs_info->nodesize); 390062306a36Sopenharmony_ci 390162306a36Sopenharmony_ci if (split == 0) { 390262306a36Sopenharmony_ci if (mid <= slot) { 390362306a36Sopenharmony_ci btrfs_set_header_nritems(right, 0); 390462306a36Sopenharmony_ci ret = insert_ptr(trans, path, &disk_key, 390562306a36Sopenharmony_ci right->start, path->slots[1] + 1, 1); 390662306a36Sopenharmony_ci if (ret < 0) { 390762306a36Sopenharmony_ci btrfs_tree_unlock(right); 390862306a36Sopenharmony_ci free_extent_buffer(right); 390962306a36Sopenharmony_ci return ret; 391062306a36Sopenharmony_ci } 391162306a36Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 391262306a36Sopenharmony_ci free_extent_buffer(path->nodes[0]); 391362306a36Sopenharmony_ci path->nodes[0] = right; 391462306a36Sopenharmony_ci path->slots[0] = 0; 391562306a36Sopenharmony_ci path->slots[1] += 1; 391662306a36Sopenharmony_ci } else { 391762306a36Sopenharmony_ci btrfs_set_header_nritems(right, 0); 391862306a36Sopenharmony_ci ret = insert_ptr(trans, path, &disk_key, 391962306a36Sopenharmony_ci right->start, path->slots[1], 1); 392062306a36Sopenharmony_ci if (ret < 0) { 392162306a36Sopenharmony_ci btrfs_tree_unlock(right); 392262306a36Sopenharmony_ci free_extent_buffer(right); 392362306a36Sopenharmony_ci return ret; 392462306a36Sopenharmony_ci } 392562306a36Sopenharmony_ci btrfs_tree_unlock(path->nodes[0]); 392662306a36Sopenharmony_ci free_extent_buffer(path->nodes[0]); 392762306a36Sopenharmony_ci path->nodes[0] = right; 392862306a36Sopenharmony_ci path->slots[0] = 0; 392962306a36Sopenharmony_ci if (path->slots[1] == 0) 393062306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, 1); 393162306a36Sopenharmony_ci } 393262306a36Sopenharmony_ci /* 393362306a36Sopenharmony_ci * We create a new leaf 'right' for the required ins_len and 393462306a36Sopenharmony_ci * we'll do btrfs_mark_buffer_dirty() on this leaf after copying 393562306a36Sopenharmony_ci * the content of ins_len to 'right'. 393662306a36Sopenharmony_ci */ 393762306a36Sopenharmony_ci return ret; 393862306a36Sopenharmony_ci } 393962306a36Sopenharmony_ci 394062306a36Sopenharmony_ci ret = copy_for_split(trans, path, l, right, slot, mid, nritems); 394162306a36Sopenharmony_ci if (ret < 0) { 394262306a36Sopenharmony_ci btrfs_tree_unlock(right); 394362306a36Sopenharmony_ci free_extent_buffer(right); 394462306a36Sopenharmony_ci return ret; 394562306a36Sopenharmony_ci } 394662306a36Sopenharmony_ci 394762306a36Sopenharmony_ci if (split == 2) { 394862306a36Sopenharmony_ci BUG_ON(num_doubles != 0); 394962306a36Sopenharmony_ci num_doubles++; 395062306a36Sopenharmony_ci goto again; 395162306a36Sopenharmony_ci } 395262306a36Sopenharmony_ci 395362306a36Sopenharmony_ci return 0; 395462306a36Sopenharmony_ci 395562306a36Sopenharmony_cipush_for_double: 395662306a36Sopenharmony_ci push_for_double_split(trans, root, path, data_size); 395762306a36Sopenharmony_ci tried_avoid_double = 1; 395862306a36Sopenharmony_ci if (btrfs_leaf_free_space(path->nodes[0]) >= data_size) 395962306a36Sopenharmony_ci return 0; 396062306a36Sopenharmony_ci goto again; 396162306a36Sopenharmony_ci} 396262306a36Sopenharmony_ci 396362306a36Sopenharmony_cistatic noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, 396462306a36Sopenharmony_ci struct btrfs_root *root, 396562306a36Sopenharmony_ci struct btrfs_path *path, int ins_len) 396662306a36Sopenharmony_ci{ 396762306a36Sopenharmony_ci struct btrfs_key key; 396862306a36Sopenharmony_ci struct extent_buffer *leaf; 396962306a36Sopenharmony_ci struct btrfs_file_extent_item *fi; 397062306a36Sopenharmony_ci u64 extent_len = 0; 397162306a36Sopenharmony_ci u32 item_size; 397262306a36Sopenharmony_ci int ret; 397362306a36Sopenharmony_ci 397462306a36Sopenharmony_ci leaf = path->nodes[0]; 397562306a36Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 397662306a36Sopenharmony_ci 397762306a36Sopenharmony_ci BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY && 397862306a36Sopenharmony_ci key.type != BTRFS_EXTENT_CSUM_KEY); 397962306a36Sopenharmony_ci 398062306a36Sopenharmony_ci if (btrfs_leaf_free_space(leaf) >= ins_len) 398162306a36Sopenharmony_ci return 0; 398262306a36Sopenharmony_ci 398362306a36Sopenharmony_ci item_size = btrfs_item_size(leaf, path->slots[0]); 398462306a36Sopenharmony_ci if (key.type == BTRFS_EXTENT_DATA_KEY) { 398562306a36Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 398662306a36Sopenharmony_ci struct btrfs_file_extent_item); 398762306a36Sopenharmony_ci extent_len = btrfs_file_extent_num_bytes(leaf, fi); 398862306a36Sopenharmony_ci } 398962306a36Sopenharmony_ci btrfs_release_path(path); 399062306a36Sopenharmony_ci 399162306a36Sopenharmony_ci path->keep_locks = 1; 399262306a36Sopenharmony_ci path->search_for_split = 1; 399362306a36Sopenharmony_ci ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 399462306a36Sopenharmony_ci path->search_for_split = 0; 399562306a36Sopenharmony_ci if (ret > 0) 399662306a36Sopenharmony_ci ret = -EAGAIN; 399762306a36Sopenharmony_ci if (ret < 0) 399862306a36Sopenharmony_ci goto err; 399962306a36Sopenharmony_ci 400062306a36Sopenharmony_ci ret = -EAGAIN; 400162306a36Sopenharmony_ci leaf = path->nodes[0]; 400262306a36Sopenharmony_ci /* if our item isn't there, return now */ 400362306a36Sopenharmony_ci if (item_size != btrfs_item_size(leaf, path->slots[0])) 400462306a36Sopenharmony_ci goto err; 400562306a36Sopenharmony_ci 400662306a36Sopenharmony_ci /* the leaf has changed, it now has room. return now */ 400762306a36Sopenharmony_ci if (btrfs_leaf_free_space(path->nodes[0]) >= ins_len) 400862306a36Sopenharmony_ci goto err; 400962306a36Sopenharmony_ci 401062306a36Sopenharmony_ci if (key.type == BTRFS_EXTENT_DATA_KEY) { 401162306a36Sopenharmony_ci fi = btrfs_item_ptr(leaf, path->slots[0], 401262306a36Sopenharmony_ci struct btrfs_file_extent_item); 401362306a36Sopenharmony_ci if (extent_len != btrfs_file_extent_num_bytes(leaf, fi)) 401462306a36Sopenharmony_ci goto err; 401562306a36Sopenharmony_ci } 401662306a36Sopenharmony_ci 401762306a36Sopenharmony_ci ret = split_leaf(trans, root, &key, path, ins_len, 1); 401862306a36Sopenharmony_ci if (ret) 401962306a36Sopenharmony_ci goto err; 402062306a36Sopenharmony_ci 402162306a36Sopenharmony_ci path->keep_locks = 0; 402262306a36Sopenharmony_ci btrfs_unlock_up_safe(path, 1); 402362306a36Sopenharmony_ci return 0; 402462306a36Sopenharmony_cierr: 402562306a36Sopenharmony_ci path->keep_locks = 0; 402662306a36Sopenharmony_ci return ret; 402762306a36Sopenharmony_ci} 402862306a36Sopenharmony_ci 402962306a36Sopenharmony_cistatic noinline int split_item(struct btrfs_trans_handle *trans, 403062306a36Sopenharmony_ci struct btrfs_path *path, 403162306a36Sopenharmony_ci const struct btrfs_key *new_key, 403262306a36Sopenharmony_ci unsigned long split_offset) 403362306a36Sopenharmony_ci{ 403462306a36Sopenharmony_ci struct extent_buffer *leaf; 403562306a36Sopenharmony_ci int orig_slot, slot; 403662306a36Sopenharmony_ci char *buf; 403762306a36Sopenharmony_ci u32 nritems; 403862306a36Sopenharmony_ci u32 item_size; 403962306a36Sopenharmony_ci u32 orig_offset; 404062306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 404162306a36Sopenharmony_ci 404262306a36Sopenharmony_ci leaf = path->nodes[0]; 404362306a36Sopenharmony_ci /* 404462306a36Sopenharmony_ci * Shouldn't happen because the caller must have previously called 404562306a36Sopenharmony_ci * setup_leaf_for_split() to make room for the new item in the leaf. 404662306a36Sopenharmony_ci */ 404762306a36Sopenharmony_ci if (WARN_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item))) 404862306a36Sopenharmony_ci return -ENOSPC; 404962306a36Sopenharmony_ci 405062306a36Sopenharmony_ci orig_slot = path->slots[0]; 405162306a36Sopenharmony_ci orig_offset = btrfs_item_offset(leaf, path->slots[0]); 405262306a36Sopenharmony_ci item_size = btrfs_item_size(leaf, path->slots[0]); 405362306a36Sopenharmony_ci 405462306a36Sopenharmony_ci buf = kmalloc(item_size, GFP_NOFS); 405562306a36Sopenharmony_ci if (!buf) 405662306a36Sopenharmony_ci return -ENOMEM; 405762306a36Sopenharmony_ci 405862306a36Sopenharmony_ci read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, 405962306a36Sopenharmony_ci path->slots[0]), item_size); 406062306a36Sopenharmony_ci 406162306a36Sopenharmony_ci slot = path->slots[0] + 1; 406262306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 406362306a36Sopenharmony_ci if (slot != nritems) { 406462306a36Sopenharmony_ci /* shift the items */ 406562306a36Sopenharmony_ci memmove_leaf_items(leaf, slot + 1, slot, nritems - slot); 406662306a36Sopenharmony_ci } 406762306a36Sopenharmony_ci 406862306a36Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, new_key); 406962306a36Sopenharmony_ci btrfs_set_item_key(leaf, &disk_key, slot); 407062306a36Sopenharmony_ci 407162306a36Sopenharmony_ci btrfs_set_item_offset(leaf, slot, orig_offset); 407262306a36Sopenharmony_ci btrfs_set_item_size(leaf, slot, item_size - split_offset); 407362306a36Sopenharmony_ci 407462306a36Sopenharmony_ci btrfs_set_item_offset(leaf, orig_slot, 407562306a36Sopenharmony_ci orig_offset + item_size - split_offset); 407662306a36Sopenharmony_ci btrfs_set_item_size(leaf, orig_slot, split_offset); 407762306a36Sopenharmony_ci 407862306a36Sopenharmony_ci btrfs_set_header_nritems(leaf, nritems + 1); 407962306a36Sopenharmony_ci 408062306a36Sopenharmony_ci /* write the data for the start of the original item */ 408162306a36Sopenharmony_ci write_extent_buffer(leaf, buf, 408262306a36Sopenharmony_ci btrfs_item_ptr_offset(leaf, path->slots[0]), 408362306a36Sopenharmony_ci split_offset); 408462306a36Sopenharmony_ci 408562306a36Sopenharmony_ci /* write the data for the new item */ 408662306a36Sopenharmony_ci write_extent_buffer(leaf, buf + split_offset, 408762306a36Sopenharmony_ci btrfs_item_ptr_offset(leaf, slot), 408862306a36Sopenharmony_ci item_size - split_offset); 408962306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 409062306a36Sopenharmony_ci 409162306a36Sopenharmony_ci BUG_ON(btrfs_leaf_free_space(leaf) < 0); 409262306a36Sopenharmony_ci kfree(buf); 409362306a36Sopenharmony_ci return 0; 409462306a36Sopenharmony_ci} 409562306a36Sopenharmony_ci 409662306a36Sopenharmony_ci/* 409762306a36Sopenharmony_ci * This function splits a single item into two items, 409862306a36Sopenharmony_ci * giving 'new_key' to the new item and splitting the 409962306a36Sopenharmony_ci * old one at split_offset (from the start of the item). 410062306a36Sopenharmony_ci * 410162306a36Sopenharmony_ci * The path may be released by this operation. After 410262306a36Sopenharmony_ci * the split, the path is pointing to the old item. The 410362306a36Sopenharmony_ci * new item is going to be in the same node as the old one. 410462306a36Sopenharmony_ci * 410562306a36Sopenharmony_ci * Note, the item being split must be smaller enough to live alone on 410662306a36Sopenharmony_ci * a tree block with room for one extra struct btrfs_item 410762306a36Sopenharmony_ci * 410862306a36Sopenharmony_ci * This allows us to split the item in place, keeping a lock on the 410962306a36Sopenharmony_ci * leaf the entire time. 411062306a36Sopenharmony_ci */ 411162306a36Sopenharmony_ciint btrfs_split_item(struct btrfs_trans_handle *trans, 411262306a36Sopenharmony_ci struct btrfs_root *root, 411362306a36Sopenharmony_ci struct btrfs_path *path, 411462306a36Sopenharmony_ci const struct btrfs_key *new_key, 411562306a36Sopenharmony_ci unsigned long split_offset) 411662306a36Sopenharmony_ci{ 411762306a36Sopenharmony_ci int ret; 411862306a36Sopenharmony_ci ret = setup_leaf_for_split(trans, root, path, 411962306a36Sopenharmony_ci sizeof(struct btrfs_item)); 412062306a36Sopenharmony_ci if (ret) 412162306a36Sopenharmony_ci return ret; 412262306a36Sopenharmony_ci 412362306a36Sopenharmony_ci ret = split_item(trans, path, new_key, split_offset); 412462306a36Sopenharmony_ci return ret; 412562306a36Sopenharmony_ci} 412662306a36Sopenharmony_ci 412762306a36Sopenharmony_ci/* 412862306a36Sopenharmony_ci * make the item pointed to by the path smaller. new_size indicates 412962306a36Sopenharmony_ci * how small to make it, and from_end tells us if we just chop bytes 413062306a36Sopenharmony_ci * off the end of the item or if we shift the item to chop bytes off 413162306a36Sopenharmony_ci * the front. 413262306a36Sopenharmony_ci */ 413362306a36Sopenharmony_civoid btrfs_truncate_item(struct btrfs_trans_handle *trans, 413462306a36Sopenharmony_ci struct btrfs_path *path, u32 new_size, int from_end) 413562306a36Sopenharmony_ci{ 413662306a36Sopenharmony_ci int slot; 413762306a36Sopenharmony_ci struct extent_buffer *leaf; 413862306a36Sopenharmony_ci u32 nritems; 413962306a36Sopenharmony_ci unsigned int data_end; 414062306a36Sopenharmony_ci unsigned int old_data_start; 414162306a36Sopenharmony_ci unsigned int old_size; 414262306a36Sopenharmony_ci unsigned int size_diff; 414362306a36Sopenharmony_ci int i; 414462306a36Sopenharmony_ci struct btrfs_map_token token; 414562306a36Sopenharmony_ci 414662306a36Sopenharmony_ci leaf = path->nodes[0]; 414762306a36Sopenharmony_ci slot = path->slots[0]; 414862306a36Sopenharmony_ci 414962306a36Sopenharmony_ci old_size = btrfs_item_size(leaf, slot); 415062306a36Sopenharmony_ci if (old_size == new_size) 415162306a36Sopenharmony_ci return; 415262306a36Sopenharmony_ci 415362306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 415462306a36Sopenharmony_ci data_end = leaf_data_end(leaf); 415562306a36Sopenharmony_ci 415662306a36Sopenharmony_ci old_data_start = btrfs_item_offset(leaf, slot); 415762306a36Sopenharmony_ci 415862306a36Sopenharmony_ci size_diff = old_size - new_size; 415962306a36Sopenharmony_ci 416062306a36Sopenharmony_ci BUG_ON(slot < 0); 416162306a36Sopenharmony_ci BUG_ON(slot >= nritems); 416262306a36Sopenharmony_ci 416362306a36Sopenharmony_ci /* 416462306a36Sopenharmony_ci * item0..itemN ... dataN.offset..dataN.size .. data0.size 416562306a36Sopenharmony_ci */ 416662306a36Sopenharmony_ci /* first correct the data pointers */ 416762306a36Sopenharmony_ci btrfs_init_map_token(&token, leaf); 416862306a36Sopenharmony_ci for (i = slot; i < nritems; i++) { 416962306a36Sopenharmony_ci u32 ioff; 417062306a36Sopenharmony_ci 417162306a36Sopenharmony_ci ioff = btrfs_token_item_offset(&token, i); 417262306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, ioff + size_diff); 417362306a36Sopenharmony_ci } 417462306a36Sopenharmony_ci 417562306a36Sopenharmony_ci /* shift the data */ 417662306a36Sopenharmony_ci if (from_end) { 417762306a36Sopenharmony_ci memmove_leaf_data(leaf, data_end + size_diff, data_end, 417862306a36Sopenharmony_ci old_data_start + new_size - data_end); 417962306a36Sopenharmony_ci } else { 418062306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 418162306a36Sopenharmony_ci u64 offset; 418262306a36Sopenharmony_ci 418362306a36Sopenharmony_ci btrfs_item_key(leaf, &disk_key, slot); 418462306a36Sopenharmony_ci 418562306a36Sopenharmony_ci if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) { 418662306a36Sopenharmony_ci unsigned long ptr; 418762306a36Sopenharmony_ci struct btrfs_file_extent_item *fi; 418862306a36Sopenharmony_ci 418962306a36Sopenharmony_ci fi = btrfs_item_ptr(leaf, slot, 419062306a36Sopenharmony_ci struct btrfs_file_extent_item); 419162306a36Sopenharmony_ci fi = (struct btrfs_file_extent_item *)( 419262306a36Sopenharmony_ci (unsigned long)fi - size_diff); 419362306a36Sopenharmony_ci 419462306a36Sopenharmony_ci if (btrfs_file_extent_type(leaf, fi) == 419562306a36Sopenharmony_ci BTRFS_FILE_EXTENT_INLINE) { 419662306a36Sopenharmony_ci ptr = btrfs_item_ptr_offset(leaf, slot); 419762306a36Sopenharmony_ci memmove_extent_buffer(leaf, ptr, 419862306a36Sopenharmony_ci (unsigned long)fi, 419962306a36Sopenharmony_ci BTRFS_FILE_EXTENT_INLINE_DATA_START); 420062306a36Sopenharmony_ci } 420162306a36Sopenharmony_ci } 420262306a36Sopenharmony_ci 420362306a36Sopenharmony_ci memmove_leaf_data(leaf, data_end + size_diff, data_end, 420462306a36Sopenharmony_ci old_data_start - data_end); 420562306a36Sopenharmony_ci 420662306a36Sopenharmony_ci offset = btrfs_disk_key_offset(&disk_key); 420762306a36Sopenharmony_ci btrfs_set_disk_key_offset(&disk_key, offset + size_diff); 420862306a36Sopenharmony_ci btrfs_set_item_key(leaf, &disk_key, slot); 420962306a36Sopenharmony_ci if (slot == 0) 421062306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, 1); 421162306a36Sopenharmony_ci } 421262306a36Sopenharmony_ci 421362306a36Sopenharmony_ci btrfs_set_item_size(leaf, slot, new_size); 421462306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 421562306a36Sopenharmony_ci 421662306a36Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < 0) { 421762306a36Sopenharmony_ci btrfs_print_leaf(leaf); 421862306a36Sopenharmony_ci BUG(); 421962306a36Sopenharmony_ci } 422062306a36Sopenharmony_ci} 422162306a36Sopenharmony_ci 422262306a36Sopenharmony_ci/* 422362306a36Sopenharmony_ci * make the item pointed to by the path bigger, data_size is the added size. 422462306a36Sopenharmony_ci */ 422562306a36Sopenharmony_civoid btrfs_extend_item(struct btrfs_trans_handle *trans, 422662306a36Sopenharmony_ci struct btrfs_path *path, u32 data_size) 422762306a36Sopenharmony_ci{ 422862306a36Sopenharmony_ci int slot; 422962306a36Sopenharmony_ci struct extent_buffer *leaf; 423062306a36Sopenharmony_ci u32 nritems; 423162306a36Sopenharmony_ci unsigned int data_end; 423262306a36Sopenharmony_ci unsigned int old_data; 423362306a36Sopenharmony_ci unsigned int old_size; 423462306a36Sopenharmony_ci int i; 423562306a36Sopenharmony_ci struct btrfs_map_token token; 423662306a36Sopenharmony_ci 423762306a36Sopenharmony_ci leaf = path->nodes[0]; 423862306a36Sopenharmony_ci 423962306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 424062306a36Sopenharmony_ci data_end = leaf_data_end(leaf); 424162306a36Sopenharmony_ci 424262306a36Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < data_size) { 424362306a36Sopenharmony_ci btrfs_print_leaf(leaf); 424462306a36Sopenharmony_ci BUG(); 424562306a36Sopenharmony_ci } 424662306a36Sopenharmony_ci slot = path->slots[0]; 424762306a36Sopenharmony_ci old_data = btrfs_item_data_end(leaf, slot); 424862306a36Sopenharmony_ci 424962306a36Sopenharmony_ci BUG_ON(slot < 0); 425062306a36Sopenharmony_ci if (slot >= nritems) { 425162306a36Sopenharmony_ci btrfs_print_leaf(leaf); 425262306a36Sopenharmony_ci btrfs_crit(leaf->fs_info, "slot %d too large, nritems %d", 425362306a36Sopenharmony_ci slot, nritems); 425462306a36Sopenharmony_ci BUG(); 425562306a36Sopenharmony_ci } 425662306a36Sopenharmony_ci 425762306a36Sopenharmony_ci /* 425862306a36Sopenharmony_ci * item0..itemN ... dataN.offset..dataN.size .. data0.size 425962306a36Sopenharmony_ci */ 426062306a36Sopenharmony_ci /* first correct the data pointers */ 426162306a36Sopenharmony_ci btrfs_init_map_token(&token, leaf); 426262306a36Sopenharmony_ci for (i = slot; i < nritems; i++) { 426362306a36Sopenharmony_ci u32 ioff; 426462306a36Sopenharmony_ci 426562306a36Sopenharmony_ci ioff = btrfs_token_item_offset(&token, i); 426662306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, ioff - data_size); 426762306a36Sopenharmony_ci } 426862306a36Sopenharmony_ci 426962306a36Sopenharmony_ci /* shift the data */ 427062306a36Sopenharmony_ci memmove_leaf_data(leaf, data_end - data_size, data_end, 427162306a36Sopenharmony_ci old_data - data_end); 427262306a36Sopenharmony_ci 427362306a36Sopenharmony_ci data_end = old_data; 427462306a36Sopenharmony_ci old_size = btrfs_item_size(leaf, slot); 427562306a36Sopenharmony_ci btrfs_set_item_size(leaf, slot, old_size + data_size); 427662306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 427762306a36Sopenharmony_ci 427862306a36Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < 0) { 427962306a36Sopenharmony_ci btrfs_print_leaf(leaf); 428062306a36Sopenharmony_ci BUG(); 428162306a36Sopenharmony_ci } 428262306a36Sopenharmony_ci} 428362306a36Sopenharmony_ci 428462306a36Sopenharmony_ci/* 428562306a36Sopenharmony_ci * Make space in the node before inserting one or more items. 428662306a36Sopenharmony_ci * 428762306a36Sopenharmony_ci * @trans: transaction handle 428862306a36Sopenharmony_ci * @root: root we are inserting items to 428962306a36Sopenharmony_ci * @path: points to the leaf/slot where we are going to insert new items 429062306a36Sopenharmony_ci * @batch: information about the batch of items to insert 429162306a36Sopenharmony_ci * 429262306a36Sopenharmony_ci * Main purpose is to save stack depth by doing the bulk of the work in a 429362306a36Sopenharmony_ci * function that doesn't call btrfs_search_slot 429462306a36Sopenharmony_ci */ 429562306a36Sopenharmony_cistatic void setup_items_for_insert(struct btrfs_trans_handle *trans, 429662306a36Sopenharmony_ci struct btrfs_root *root, struct btrfs_path *path, 429762306a36Sopenharmony_ci const struct btrfs_item_batch *batch) 429862306a36Sopenharmony_ci{ 429962306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 430062306a36Sopenharmony_ci int i; 430162306a36Sopenharmony_ci u32 nritems; 430262306a36Sopenharmony_ci unsigned int data_end; 430362306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 430462306a36Sopenharmony_ci struct extent_buffer *leaf; 430562306a36Sopenharmony_ci int slot; 430662306a36Sopenharmony_ci struct btrfs_map_token token; 430762306a36Sopenharmony_ci u32 total_size; 430862306a36Sopenharmony_ci 430962306a36Sopenharmony_ci /* 431062306a36Sopenharmony_ci * Before anything else, update keys in the parent and other ancestors 431162306a36Sopenharmony_ci * if needed, then release the write locks on them, so that other tasks 431262306a36Sopenharmony_ci * can use them while we modify the leaf. 431362306a36Sopenharmony_ci */ 431462306a36Sopenharmony_ci if (path->slots[0] == 0) { 431562306a36Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, &batch->keys[0]); 431662306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, 1); 431762306a36Sopenharmony_ci } 431862306a36Sopenharmony_ci btrfs_unlock_up_safe(path, 1); 431962306a36Sopenharmony_ci 432062306a36Sopenharmony_ci leaf = path->nodes[0]; 432162306a36Sopenharmony_ci slot = path->slots[0]; 432262306a36Sopenharmony_ci 432362306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 432462306a36Sopenharmony_ci data_end = leaf_data_end(leaf); 432562306a36Sopenharmony_ci total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item)); 432662306a36Sopenharmony_ci 432762306a36Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < total_size) { 432862306a36Sopenharmony_ci btrfs_print_leaf(leaf); 432962306a36Sopenharmony_ci btrfs_crit(fs_info, "not enough freespace need %u have %d", 433062306a36Sopenharmony_ci total_size, btrfs_leaf_free_space(leaf)); 433162306a36Sopenharmony_ci BUG(); 433262306a36Sopenharmony_ci } 433362306a36Sopenharmony_ci 433462306a36Sopenharmony_ci btrfs_init_map_token(&token, leaf); 433562306a36Sopenharmony_ci if (slot != nritems) { 433662306a36Sopenharmony_ci unsigned int old_data = btrfs_item_data_end(leaf, slot); 433762306a36Sopenharmony_ci 433862306a36Sopenharmony_ci if (old_data < data_end) { 433962306a36Sopenharmony_ci btrfs_print_leaf(leaf); 434062306a36Sopenharmony_ci btrfs_crit(fs_info, 434162306a36Sopenharmony_ci "item at slot %d with data offset %u beyond data end of leaf %u", 434262306a36Sopenharmony_ci slot, old_data, data_end); 434362306a36Sopenharmony_ci BUG(); 434462306a36Sopenharmony_ci } 434562306a36Sopenharmony_ci /* 434662306a36Sopenharmony_ci * item0..itemN ... dataN.offset..dataN.size .. data0.size 434762306a36Sopenharmony_ci */ 434862306a36Sopenharmony_ci /* first correct the data pointers */ 434962306a36Sopenharmony_ci for (i = slot; i < nritems; i++) { 435062306a36Sopenharmony_ci u32 ioff; 435162306a36Sopenharmony_ci 435262306a36Sopenharmony_ci ioff = btrfs_token_item_offset(&token, i); 435362306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, 435462306a36Sopenharmony_ci ioff - batch->total_data_size); 435562306a36Sopenharmony_ci } 435662306a36Sopenharmony_ci /* shift the items */ 435762306a36Sopenharmony_ci memmove_leaf_items(leaf, slot + batch->nr, slot, nritems - slot); 435862306a36Sopenharmony_ci 435962306a36Sopenharmony_ci /* shift the data */ 436062306a36Sopenharmony_ci memmove_leaf_data(leaf, data_end - batch->total_data_size, 436162306a36Sopenharmony_ci data_end, old_data - data_end); 436262306a36Sopenharmony_ci data_end = old_data; 436362306a36Sopenharmony_ci } 436462306a36Sopenharmony_ci 436562306a36Sopenharmony_ci /* setup the item for the new data */ 436662306a36Sopenharmony_ci for (i = 0; i < batch->nr; i++) { 436762306a36Sopenharmony_ci btrfs_cpu_key_to_disk(&disk_key, &batch->keys[i]); 436862306a36Sopenharmony_ci btrfs_set_item_key(leaf, &disk_key, slot + i); 436962306a36Sopenharmony_ci data_end -= batch->data_sizes[i]; 437062306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, slot + i, data_end); 437162306a36Sopenharmony_ci btrfs_set_token_item_size(&token, slot + i, batch->data_sizes[i]); 437262306a36Sopenharmony_ci } 437362306a36Sopenharmony_ci 437462306a36Sopenharmony_ci btrfs_set_header_nritems(leaf, nritems + batch->nr); 437562306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 437662306a36Sopenharmony_ci 437762306a36Sopenharmony_ci if (btrfs_leaf_free_space(leaf) < 0) { 437862306a36Sopenharmony_ci btrfs_print_leaf(leaf); 437962306a36Sopenharmony_ci BUG(); 438062306a36Sopenharmony_ci } 438162306a36Sopenharmony_ci} 438262306a36Sopenharmony_ci 438362306a36Sopenharmony_ci/* 438462306a36Sopenharmony_ci * Insert a new item into a leaf. 438562306a36Sopenharmony_ci * 438662306a36Sopenharmony_ci * @trans: Transaction handle. 438762306a36Sopenharmony_ci * @root: The root of the btree. 438862306a36Sopenharmony_ci * @path: A path pointing to the target leaf and slot. 438962306a36Sopenharmony_ci * @key: The key of the new item. 439062306a36Sopenharmony_ci * @data_size: The size of the data associated with the new key. 439162306a36Sopenharmony_ci */ 439262306a36Sopenharmony_civoid btrfs_setup_item_for_insert(struct btrfs_trans_handle *trans, 439362306a36Sopenharmony_ci struct btrfs_root *root, 439462306a36Sopenharmony_ci struct btrfs_path *path, 439562306a36Sopenharmony_ci const struct btrfs_key *key, 439662306a36Sopenharmony_ci u32 data_size) 439762306a36Sopenharmony_ci{ 439862306a36Sopenharmony_ci struct btrfs_item_batch batch; 439962306a36Sopenharmony_ci 440062306a36Sopenharmony_ci batch.keys = key; 440162306a36Sopenharmony_ci batch.data_sizes = &data_size; 440262306a36Sopenharmony_ci batch.total_data_size = data_size; 440362306a36Sopenharmony_ci batch.nr = 1; 440462306a36Sopenharmony_ci 440562306a36Sopenharmony_ci setup_items_for_insert(trans, root, path, &batch); 440662306a36Sopenharmony_ci} 440762306a36Sopenharmony_ci 440862306a36Sopenharmony_ci/* 440962306a36Sopenharmony_ci * Given a key and some data, insert items into the tree. 441062306a36Sopenharmony_ci * This does all the path init required, making room in the tree if needed. 441162306a36Sopenharmony_ci */ 441262306a36Sopenharmony_ciint btrfs_insert_empty_items(struct btrfs_trans_handle *trans, 441362306a36Sopenharmony_ci struct btrfs_root *root, 441462306a36Sopenharmony_ci struct btrfs_path *path, 441562306a36Sopenharmony_ci const struct btrfs_item_batch *batch) 441662306a36Sopenharmony_ci{ 441762306a36Sopenharmony_ci int ret = 0; 441862306a36Sopenharmony_ci int slot; 441962306a36Sopenharmony_ci u32 total_size; 442062306a36Sopenharmony_ci 442162306a36Sopenharmony_ci total_size = batch->total_data_size + (batch->nr * sizeof(struct btrfs_item)); 442262306a36Sopenharmony_ci ret = btrfs_search_slot(trans, root, &batch->keys[0], path, total_size, 1); 442362306a36Sopenharmony_ci if (ret == 0) 442462306a36Sopenharmony_ci return -EEXIST; 442562306a36Sopenharmony_ci if (ret < 0) 442662306a36Sopenharmony_ci return ret; 442762306a36Sopenharmony_ci 442862306a36Sopenharmony_ci slot = path->slots[0]; 442962306a36Sopenharmony_ci BUG_ON(slot < 0); 443062306a36Sopenharmony_ci 443162306a36Sopenharmony_ci setup_items_for_insert(trans, root, path, batch); 443262306a36Sopenharmony_ci return 0; 443362306a36Sopenharmony_ci} 443462306a36Sopenharmony_ci 443562306a36Sopenharmony_ci/* 443662306a36Sopenharmony_ci * Given a key and some data, insert an item into the tree. 443762306a36Sopenharmony_ci * This does all the path init required, making room in the tree if needed. 443862306a36Sopenharmony_ci */ 443962306a36Sopenharmony_ciint btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, 444062306a36Sopenharmony_ci const struct btrfs_key *cpu_key, void *data, 444162306a36Sopenharmony_ci u32 data_size) 444262306a36Sopenharmony_ci{ 444362306a36Sopenharmony_ci int ret = 0; 444462306a36Sopenharmony_ci struct btrfs_path *path; 444562306a36Sopenharmony_ci struct extent_buffer *leaf; 444662306a36Sopenharmony_ci unsigned long ptr; 444762306a36Sopenharmony_ci 444862306a36Sopenharmony_ci path = btrfs_alloc_path(); 444962306a36Sopenharmony_ci if (!path) 445062306a36Sopenharmony_ci return -ENOMEM; 445162306a36Sopenharmony_ci ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size); 445262306a36Sopenharmony_ci if (!ret) { 445362306a36Sopenharmony_ci leaf = path->nodes[0]; 445462306a36Sopenharmony_ci ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 445562306a36Sopenharmony_ci write_extent_buffer(leaf, data, ptr, data_size); 445662306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 445762306a36Sopenharmony_ci } 445862306a36Sopenharmony_ci btrfs_free_path(path); 445962306a36Sopenharmony_ci return ret; 446062306a36Sopenharmony_ci} 446162306a36Sopenharmony_ci 446262306a36Sopenharmony_ci/* 446362306a36Sopenharmony_ci * This function duplicates an item, giving 'new_key' to the new item. 446462306a36Sopenharmony_ci * It guarantees both items live in the same tree leaf and the new item is 446562306a36Sopenharmony_ci * contiguous with the original item. 446662306a36Sopenharmony_ci * 446762306a36Sopenharmony_ci * This allows us to split a file extent in place, keeping a lock on the leaf 446862306a36Sopenharmony_ci * the entire time. 446962306a36Sopenharmony_ci */ 447062306a36Sopenharmony_ciint btrfs_duplicate_item(struct btrfs_trans_handle *trans, 447162306a36Sopenharmony_ci struct btrfs_root *root, 447262306a36Sopenharmony_ci struct btrfs_path *path, 447362306a36Sopenharmony_ci const struct btrfs_key *new_key) 447462306a36Sopenharmony_ci{ 447562306a36Sopenharmony_ci struct extent_buffer *leaf; 447662306a36Sopenharmony_ci int ret; 447762306a36Sopenharmony_ci u32 item_size; 447862306a36Sopenharmony_ci 447962306a36Sopenharmony_ci leaf = path->nodes[0]; 448062306a36Sopenharmony_ci item_size = btrfs_item_size(leaf, path->slots[0]); 448162306a36Sopenharmony_ci ret = setup_leaf_for_split(trans, root, path, 448262306a36Sopenharmony_ci item_size + sizeof(struct btrfs_item)); 448362306a36Sopenharmony_ci if (ret) 448462306a36Sopenharmony_ci return ret; 448562306a36Sopenharmony_ci 448662306a36Sopenharmony_ci path->slots[0]++; 448762306a36Sopenharmony_ci btrfs_setup_item_for_insert(trans, root, path, new_key, item_size); 448862306a36Sopenharmony_ci leaf = path->nodes[0]; 448962306a36Sopenharmony_ci memcpy_extent_buffer(leaf, 449062306a36Sopenharmony_ci btrfs_item_ptr_offset(leaf, path->slots[0]), 449162306a36Sopenharmony_ci btrfs_item_ptr_offset(leaf, path->slots[0] - 1), 449262306a36Sopenharmony_ci item_size); 449362306a36Sopenharmony_ci return 0; 449462306a36Sopenharmony_ci} 449562306a36Sopenharmony_ci 449662306a36Sopenharmony_ci/* 449762306a36Sopenharmony_ci * delete the pointer from a given node. 449862306a36Sopenharmony_ci * 449962306a36Sopenharmony_ci * the tree should have been previously balanced so the deletion does not 450062306a36Sopenharmony_ci * empty a node. 450162306a36Sopenharmony_ci * 450262306a36Sopenharmony_ci * This is exported for use inside btrfs-progs, don't un-export it. 450362306a36Sopenharmony_ci */ 450462306a36Sopenharmony_ciint btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, 450562306a36Sopenharmony_ci struct btrfs_path *path, int level, int slot) 450662306a36Sopenharmony_ci{ 450762306a36Sopenharmony_ci struct extent_buffer *parent = path->nodes[level]; 450862306a36Sopenharmony_ci u32 nritems; 450962306a36Sopenharmony_ci int ret; 451062306a36Sopenharmony_ci 451162306a36Sopenharmony_ci nritems = btrfs_header_nritems(parent); 451262306a36Sopenharmony_ci if (slot != nritems - 1) { 451362306a36Sopenharmony_ci if (level) { 451462306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_move(parent, slot, 451562306a36Sopenharmony_ci slot + 1, nritems - slot - 1); 451662306a36Sopenharmony_ci if (ret < 0) { 451762306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 451862306a36Sopenharmony_ci return ret; 451962306a36Sopenharmony_ci } 452062306a36Sopenharmony_ci } 452162306a36Sopenharmony_ci memmove_extent_buffer(parent, 452262306a36Sopenharmony_ci btrfs_node_key_ptr_offset(parent, slot), 452362306a36Sopenharmony_ci btrfs_node_key_ptr_offset(parent, slot + 1), 452462306a36Sopenharmony_ci sizeof(struct btrfs_key_ptr) * 452562306a36Sopenharmony_ci (nritems - slot - 1)); 452662306a36Sopenharmony_ci } else if (level) { 452762306a36Sopenharmony_ci ret = btrfs_tree_mod_log_insert_key(parent, slot, 452862306a36Sopenharmony_ci BTRFS_MOD_LOG_KEY_REMOVE); 452962306a36Sopenharmony_ci if (ret < 0) { 453062306a36Sopenharmony_ci btrfs_abort_transaction(trans, ret); 453162306a36Sopenharmony_ci return ret; 453262306a36Sopenharmony_ci } 453362306a36Sopenharmony_ci } 453462306a36Sopenharmony_ci 453562306a36Sopenharmony_ci nritems--; 453662306a36Sopenharmony_ci btrfs_set_header_nritems(parent, nritems); 453762306a36Sopenharmony_ci if (nritems == 0 && parent == root->node) { 453862306a36Sopenharmony_ci BUG_ON(btrfs_header_level(root->node) != 1); 453962306a36Sopenharmony_ci /* just turn the root into a leaf and break */ 454062306a36Sopenharmony_ci btrfs_set_header_level(root->node, 0); 454162306a36Sopenharmony_ci } else if (slot == 0) { 454262306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 454362306a36Sopenharmony_ci 454462306a36Sopenharmony_ci btrfs_node_key(parent, &disk_key, 0); 454562306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, level + 1); 454662306a36Sopenharmony_ci } 454762306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, parent); 454862306a36Sopenharmony_ci return 0; 454962306a36Sopenharmony_ci} 455062306a36Sopenharmony_ci 455162306a36Sopenharmony_ci/* 455262306a36Sopenharmony_ci * a helper function to delete the leaf pointed to by path->slots[1] and 455362306a36Sopenharmony_ci * path->nodes[1]. 455462306a36Sopenharmony_ci * 455562306a36Sopenharmony_ci * This deletes the pointer in path->nodes[1] and frees the leaf 455662306a36Sopenharmony_ci * block extent. zero is returned if it all worked out, < 0 otherwise. 455762306a36Sopenharmony_ci * 455862306a36Sopenharmony_ci * The path must have already been setup for deleting the leaf, including 455962306a36Sopenharmony_ci * all the proper balancing. path->nodes[1] must be locked. 456062306a36Sopenharmony_ci */ 456162306a36Sopenharmony_cistatic noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, 456262306a36Sopenharmony_ci struct btrfs_root *root, 456362306a36Sopenharmony_ci struct btrfs_path *path, 456462306a36Sopenharmony_ci struct extent_buffer *leaf) 456562306a36Sopenharmony_ci{ 456662306a36Sopenharmony_ci int ret; 456762306a36Sopenharmony_ci 456862306a36Sopenharmony_ci WARN_ON(btrfs_header_generation(leaf) != trans->transid); 456962306a36Sopenharmony_ci ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]); 457062306a36Sopenharmony_ci if (ret < 0) 457162306a36Sopenharmony_ci return ret; 457262306a36Sopenharmony_ci 457362306a36Sopenharmony_ci /* 457462306a36Sopenharmony_ci * btrfs_free_extent is expensive, we want to make sure we 457562306a36Sopenharmony_ci * aren't holding any locks when we call it 457662306a36Sopenharmony_ci */ 457762306a36Sopenharmony_ci btrfs_unlock_up_safe(path, 0); 457862306a36Sopenharmony_ci 457962306a36Sopenharmony_ci root_sub_used(root, leaf->len); 458062306a36Sopenharmony_ci 458162306a36Sopenharmony_ci atomic_inc(&leaf->refs); 458262306a36Sopenharmony_ci btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1); 458362306a36Sopenharmony_ci free_extent_buffer_stale(leaf); 458462306a36Sopenharmony_ci return 0; 458562306a36Sopenharmony_ci} 458662306a36Sopenharmony_ci/* 458762306a36Sopenharmony_ci * delete the item at the leaf level in path. If that empties 458862306a36Sopenharmony_ci * the leaf, remove it from the tree 458962306a36Sopenharmony_ci */ 459062306a36Sopenharmony_ciint btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, 459162306a36Sopenharmony_ci struct btrfs_path *path, int slot, int nr) 459262306a36Sopenharmony_ci{ 459362306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 459462306a36Sopenharmony_ci struct extent_buffer *leaf; 459562306a36Sopenharmony_ci int ret = 0; 459662306a36Sopenharmony_ci int wret; 459762306a36Sopenharmony_ci u32 nritems; 459862306a36Sopenharmony_ci 459962306a36Sopenharmony_ci leaf = path->nodes[0]; 460062306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 460162306a36Sopenharmony_ci 460262306a36Sopenharmony_ci if (slot + nr != nritems) { 460362306a36Sopenharmony_ci const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1); 460462306a36Sopenharmony_ci const int data_end = leaf_data_end(leaf); 460562306a36Sopenharmony_ci struct btrfs_map_token token; 460662306a36Sopenharmony_ci u32 dsize = 0; 460762306a36Sopenharmony_ci int i; 460862306a36Sopenharmony_ci 460962306a36Sopenharmony_ci for (i = 0; i < nr; i++) 461062306a36Sopenharmony_ci dsize += btrfs_item_size(leaf, slot + i); 461162306a36Sopenharmony_ci 461262306a36Sopenharmony_ci memmove_leaf_data(leaf, data_end + dsize, data_end, 461362306a36Sopenharmony_ci last_off - data_end); 461462306a36Sopenharmony_ci 461562306a36Sopenharmony_ci btrfs_init_map_token(&token, leaf); 461662306a36Sopenharmony_ci for (i = slot + nr; i < nritems; i++) { 461762306a36Sopenharmony_ci u32 ioff; 461862306a36Sopenharmony_ci 461962306a36Sopenharmony_ci ioff = btrfs_token_item_offset(&token, i); 462062306a36Sopenharmony_ci btrfs_set_token_item_offset(&token, i, ioff + dsize); 462162306a36Sopenharmony_ci } 462262306a36Sopenharmony_ci 462362306a36Sopenharmony_ci memmove_leaf_items(leaf, slot, slot + nr, nritems - slot - nr); 462462306a36Sopenharmony_ci } 462562306a36Sopenharmony_ci btrfs_set_header_nritems(leaf, nritems - nr); 462662306a36Sopenharmony_ci nritems -= nr; 462762306a36Sopenharmony_ci 462862306a36Sopenharmony_ci /* delete the leaf if we've emptied it */ 462962306a36Sopenharmony_ci if (nritems == 0) { 463062306a36Sopenharmony_ci if (leaf == root->node) { 463162306a36Sopenharmony_ci btrfs_set_header_level(leaf, 0); 463262306a36Sopenharmony_ci } else { 463362306a36Sopenharmony_ci btrfs_clear_buffer_dirty(trans, leaf); 463462306a36Sopenharmony_ci ret = btrfs_del_leaf(trans, root, path, leaf); 463562306a36Sopenharmony_ci if (ret < 0) 463662306a36Sopenharmony_ci return ret; 463762306a36Sopenharmony_ci } 463862306a36Sopenharmony_ci } else { 463962306a36Sopenharmony_ci int used = leaf_space_used(leaf, 0, nritems); 464062306a36Sopenharmony_ci if (slot == 0) { 464162306a36Sopenharmony_ci struct btrfs_disk_key disk_key; 464262306a36Sopenharmony_ci 464362306a36Sopenharmony_ci btrfs_item_key(leaf, &disk_key, 0); 464462306a36Sopenharmony_ci fixup_low_keys(trans, path, &disk_key, 1); 464562306a36Sopenharmony_ci } 464662306a36Sopenharmony_ci 464762306a36Sopenharmony_ci /* 464862306a36Sopenharmony_ci * Try to delete the leaf if it is mostly empty. We do this by 464962306a36Sopenharmony_ci * trying to move all its items into its left and right neighbours. 465062306a36Sopenharmony_ci * If we can't move all the items, then we don't delete it - it's 465162306a36Sopenharmony_ci * not ideal, but future insertions might fill the leaf with more 465262306a36Sopenharmony_ci * items, or items from other leaves might be moved later into our 465362306a36Sopenharmony_ci * leaf due to deletions on those leaves. 465462306a36Sopenharmony_ci */ 465562306a36Sopenharmony_ci if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) { 465662306a36Sopenharmony_ci u32 min_push_space; 465762306a36Sopenharmony_ci 465862306a36Sopenharmony_ci /* push_leaf_left fixes the path. 465962306a36Sopenharmony_ci * make sure the path still points to our leaf 466062306a36Sopenharmony_ci * for possible call to btrfs_del_ptr below 466162306a36Sopenharmony_ci */ 466262306a36Sopenharmony_ci slot = path->slots[1]; 466362306a36Sopenharmony_ci atomic_inc(&leaf->refs); 466462306a36Sopenharmony_ci /* 466562306a36Sopenharmony_ci * We want to be able to at least push one item to the 466662306a36Sopenharmony_ci * left neighbour leaf, and that's the first item. 466762306a36Sopenharmony_ci */ 466862306a36Sopenharmony_ci min_push_space = sizeof(struct btrfs_item) + 466962306a36Sopenharmony_ci btrfs_item_size(leaf, 0); 467062306a36Sopenharmony_ci wret = push_leaf_left(trans, root, path, 0, 467162306a36Sopenharmony_ci min_push_space, 1, (u32)-1); 467262306a36Sopenharmony_ci if (wret < 0 && wret != -ENOSPC) 467362306a36Sopenharmony_ci ret = wret; 467462306a36Sopenharmony_ci 467562306a36Sopenharmony_ci if (path->nodes[0] == leaf && 467662306a36Sopenharmony_ci btrfs_header_nritems(leaf)) { 467762306a36Sopenharmony_ci /* 467862306a36Sopenharmony_ci * If we were not able to push all items from our 467962306a36Sopenharmony_ci * leaf to its left neighbour, then attempt to 468062306a36Sopenharmony_ci * either push all the remaining items to the 468162306a36Sopenharmony_ci * right neighbour or none. There's no advantage 468262306a36Sopenharmony_ci * in pushing only some items, instead of all, as 468362306a36Sopenharmony_ci * it's pointless to end up with a leaf having 468462306a36Sopenharmony_ci * too few items while the neighbours can be full 468562306a36Sopenharmony_ci * or nearly full. 468662306a36Sopenharmony_ci */ 468762306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 468862306a36Sopenharmony_ci min_push_space = leaf_space_used(leaf, 0, nritems); 468962306a36Sopenharmony_ci wret = push_leaf_right(trans, root, path, 0, 469062306a36Sopenharmony_ci min_push_space, 1, 0); 469162306a36Sopenharmony_ci if (wret < 0 && wret != -ENOSPC) 469262306a36Sopenharmony_ci ret = wret; 469362306a36Sopenharmony_ci } 469462306a36Sopenharmony_ci 469562306a36Sopenharmony_ci if (btrfs_header_nritems(leaf) == 0) { 469662306a36Sopenharmony_ci path->slots[1] = slot; 469762306a36Sopenharmony_ci ret = btrfs_del_leaf(trans, root, path, leaf); 469862306a36Sopenharmony_ci if (ret < 0) 469962306a36Sopenharmony_ci return ret; 470062306a36Sopenharmony_ci free_extent_buffer(leaf); 470162306a36Sopenharmony_ci ret = 0; 470262306a36Sopenharmony_ci } else { 470362306a36Sopenharmony_ci /* if we're still in the path, make sure 470462306a36Sopenharmony_ci * we're dirty. Otherwise, one of the 470562306a36Sopenharmony_ci * push_leaf functions must have already 470662306a36Sopenharmony_ci * dirtied this buffer 470762306a36Sopenharmony_ci */ 470862306a36Sopenharmony_ci if (path->nodes[0] == leaf) 470962306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 471062306a36Sopenharmony_ci free_extent_buffer(leaf); 471162306a36Sopenharmony_ci } 471262306a36Sopenharmony_ci } else { 471362306a36Sopenharmony_ci btrfs_mark_buffer_dirty(trans, leaf); 471462306a36Sopenharmony_ci } 471562306a36Sopenharmony_ci } 471662306a36Sopenharmony_ci return ret; 471762306a36Sopenharmony_ci} 471862306a36Sopenharmony_ci 471962306a36Sopenharmony_ci/* 472062306a36Sopenharmony_ci * A helper function to walk down the tree starting at min_key, and looking 472162306a36Sopenharmony_ci * for nodes or leaves that are have a minimum transaction id. 472262306a36Sopenharmony_ci * This is used by the btree defrag code, and tree logging 472362306a36Sopenharmony_ci * 472462306a36Sopenharmony_ci * This does not cow, but it does stuff the starting key it finds back 472562306a36Sopenharmony_ci * into min_key, so you can call btrfs_search_slot with cow=1 on the 472662306a36Sopenharmony_ci * key and get a writable path. 472762306a36Sopenharmony_ci * 472862306a36Sopenharmony_ci * This honors path->lowest_level to prevent descent past a given level 472962306a36Sopenharmony_ci * of the tree. 473062306a36Sopenharmony_ci * 473162306a36Sopenharmony_ci * min_trans indicates the oldest transaction that you are interested 473262306a36Sopenharmony_ci * in walking through. Any nodes or leaves older than min_trans are 473362306a36Sopenharmony_ci * skipped over (without reading them). 473462306a36Sopenharmony_ci * 473562306a36Sopenharmony_ci * returns zero if something useful was found, < 0 on error and 1 if there 473662306a36Sopenharmony_ci * was nothing in the tree that matched the search criteria. 473762306a36Sopenharmony_ci */ 473862306a36Sopenharmony_ciint btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 473962306a36Sopenharmony_ci struct btrfs_path *path, 474062306a36Sopenharmony_ci u64 min_trans) 474162306a36Sopenharmony_ci{ 474262306a36Sopenharmony_ci struct extent_buffer *cur; 474362306a36Sopenharmony_ci struct btrfs_key found_key; 474462306a36Sopenharmony_ci int slot; 474562306a36Sopenharmony_ci int sret; 474662306a36Sopenharmony_ci u32 nritems; 474762306a36Sopenharmony_ci int level; 474862306a36Sopenharmony_ci int ret = 1; 474962306a36Sopenharmony_ci int keep_locks = path->keep_locks; 475062306a36Sopenharmony_ci 475162306a36Sopenharmony_ci ASSERT(!path->nowait); 475262306a36Sopenharmony_ci path->keep_locks = 1; 475362306a36Sopenharmony_ciagain: 475462306a36Sopenharmony_ci cur = btrfs_read_lock_root_node(root); 475562306a36Sopenharmony_ci level = btrfs_header_level(cur); 475662306a36Sopenharmony_ci WARN_ON(path->nodes[level]); 475762306a36Sopenharmony_ci path->nodes[level] = cur; 475862306a36Sopenharmony_ci path->locks[level] = BTRFS_READ_LOCK; 475962306a36Sopenharmony_ci 476062306a36Sopenharmony_ci if (btrfs_header_generation(cur) < min_trans) { 476162306a36Sopenharmony_ci ret = 1; 476262306a36Sopenharmony_ci goto out; 476362306a36Sopenharmony_ci } 476462306a36Sopenharmony_ci while (1) { 476562306a36Sopenharmony_ci nritems = btrfs_header_nritems(cur); 476662306a36Sopenharmony_ci level = btrfs_header_level(cur); 476762306a36Sopenharmony_ci sret = btrfs_bin_search(cur, 0, min_key, &slot); 476862306a36Sopenharmony_ci if (sret < 0) { 476962306a36Sopenharmony_ci ret = sret; 477062306a36Sopenharmony_ci goto out; 477162306a36Sopenharmony_ci } 477262306a36Sopenharmony_ci 477362306a36Sopenharmony_ci /* at the lowest level, we're done, setup the path and exit */ 477462306a36Sopenharmony_ci if (level == path->lowest_level) { 477562306a36Sopenharmony_ci if (slot >= nritems) 477662306a36Sopenharmony_ci goto find_next_key; 477762306a36Sopenharmony_ci ret = 0; 477862306a36Sopenharmony_ci path->slots[level] = slot; 477962306a36Sopenharmony_ci btrfs_item_key_to_cpu(cur, &found_key, slot); 478062306a36Sopenharmony_ci goto out; 478162306a36Sopenharmony_ci } 478262306a36Sopenharmony_ci if (sret && slot > 0) 478362306a36Sopenharmony_ci slot--; 478462306a36Sopenharmony_ci /* 478562306a36Sopenharmony_ci * check this node pointer against the min_trans parameters. 478662306a36Sopenharmony_ci * If it is too old, skip to the next one. 478762306a36Sopenharmony_ci */ 478862306a36Sopenharmony_ci while (slot < nritems) { 478962306a36Sopenharmony_ci u64 gen; 479062306a36Sopenharmony_ci 479162306a36Sopenharmony_ci gen = btrfs_node_ptr_generation(cur, slot); 479262306a36Sopenharmony_ci if (gen < min_trans) { 479362306a36Sopenharmony_ci slot++; 479462306a36Sopenharmony_ci continue; 479562306a36Sopenharmony_ci } 479662306a36Sopenharmony_ci break; 479762306a36Sopenharmony_ci } 479862306a36Sopenharmony_cifind_next_key: 479962306a36Sopenharmony_ci /* 480062306a36Sopenharmony_ci * we didn't find a candidate key in this node, walk forward 480162306a36Sopenharmony_ci * and find another one 480262306a36Sopenharmony_ci */ 480362306a36Sopenharmony_ci if (slot >= nritems) { 480462306a36Sopenharmony_ci path->slots[level] = slot; 480562306a36Sopenharmony_ci sret = btrfs_find_next_key(root, path, min_key, level, 480662306a36Sopenharmony_ci min_trans); 480762306a36Sopenharmony_ci if (sret == 0) { 480862306a36Sopenharmony_ci btrfs_release_path(path); 480962306a36Sopenharmony_ci goto again; 481062306a36Sopenharmony_ci } else { 481162306a36Sopenharmony_ci goto out; 481262306a36Sopenharmony_ci } 481362306a36Sopenharmony_ci } 481462306a36Sopenharmony_ci /* save our key for returning back */ 481562306a36Sopenharmony_ci btrfs_node_key_to_cpu(cur, &found_key, slot); 481662306a36Sopenharmony_ci path->slots[level] = slot; 481762306a36Sopenharmony_ci if (level == path->lowest_level) { 481862306a36Sopenharmony_ci ret = 0; 481962306a36Sopenharmony_ci goto out; 482062306a36Sopenharmony_ci } 482162306a36Sopenharmony_ci cur = btrfs_read_node_slot(cur, slot); 482262306a36Sopenharmony_ci if (IS_ERR(cur)) { 482362306a36Sopenharmony_ci ret = PTR_ERR(cur); 482462306a36Sopenharmony_ci goto out; 482562306a36Sopenharmony_ci } 482662306a36Sopenharmony_ci 482762306a36Sopenharmony_ci btrfs_tree_read_lock(cur); 482862306a36Sopenharmony_ci 482962306a36Sopenharmony_ci path->locks[level - 1] = BTRFS_READ_LOCK; 483062306a36Sopenharmony_ci path->nodes[level - 1] = cur; 483162306a36Sopenharmony_ci unlock_up(path, level, 1, 0, NULL); 483262306a36Sopenharmony_ci } 483362306a36Sopenharmony_ciout: 483462306a36Sopenharmony_ci path->keep_locks = keep_locks; 483562306a36Sopenharmony_ci if (ret == 0) { 483662306a36Sopenharmony_ci btrfs_unlock_up_safe(path, path->lowest_level + 1); 483762306a36Sopenharmony_ci memcpy(min_key, &found_key, sizeof(found_key)); 483862306a36Sopenharmony_ci } 483962306a36Sopenharmony_ci return ret; 484062306a36Sopenharmony_ci} 484162306a36Sopenharmony_ci 484262306a36Sopenharmony_ci/* 484362306a36Sopenharmony_ci * this is similar to btrfs_next_leaf, but does not try to preserve 484462306a36Sopenharmony_ci * and fixup the path. It looks for and returns the next key in the 484562306a36Sopenharmony_ci * tree based on the current path and the min_trans parameters. 484662306a36Sopenharmony_ci * 484762306a36Sopenharmony_ci * 0 is returned if another key is found, < 0 if there are any errors 484862306a36Sopenharmony_ci * and 1 is returned if there are no higher keys in the tree 484962306a36Sopenharmony_ci * 485062306a36Sopenharmony_ci * path->keep_locks should be set to 1 on the search made before 485162306a36Sopenharmony_ci * calling this function. 485262306a36Sopenharmony_ci */ 485362306a36Sopenharmony_ciint btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 485462306a36Sopenharmony_ci struct btrfs_key *key, int level, u64 min_trans) 485562306a36Sopenharmony_ci{ 485662306a36Sopenharmony_ci int slot; 485762306a36Sopenharmony_ci struct extent_buffer *c; 485862306a36Sopenharmony_ci 485962306a36Sopenharmony_ci WARN_ON(!path->keep_locks && !path->skip_locking); 486062306a36Sopenharmony_ci while (level < BTRFS_MAX_LEVEL) { 486162306a36Sopenharmony_ci if (!path->nodes[level]) 486262306a36Sopenharmony_ci return 1; 486362306a36Sopenharmony_ci 486462306a36Sopenharmony_ci slot = path->slots[level] + 1; 486562306a36Sopenharmony_ci c = path->nodes[level]; 486662306a36Sopenharmony_cinext: 486762306a36Sopenharmony_ci if (slot >= btrfs_header_nritems(c)) { 486862306a36Sopenharmony_ci int ret; 486962306a36Sopenharmony_ci int orig_lowest; 487062306a36Sopenharmony_ci struct btrfs_key cur_key; 487162306a36Sopenharmony_ci if (level + 1 >= BTRFS_MAX_LEVEL || 487262306a36Sopenharmony_ci !path->nodes[level + 1]) 487362306a36Sopenharmony_ci return 1; 487462306a36Sopenharmony_ci 487562306a36Sopenharmony_ci if (path->locks[level + 1] || path->skip_locking) { 487662306a36Sopenharmony_ci level++; 487762306a36Sopenharmony_ci continue; 487862306a36Sopenharmony_ci } 487962306a36Sopenharmony_ci 488062306a36Sopenharmony_ci slot = btrfs_header_nritems(c) - 1; 488162306a36Sopenharmony_ci if (level == 0) 488262306a36Sopenharmony_ci btrfs_item_key_to_cpu(c, &cur_key, slot); 488362306a36Sopenharmony_ci else 488462306a36Sopenharmony_ci btrfs_node_key_to_cpu(c, &cur_key, slot); 488562306a36Sopenharmony_ci 488662306a36Sopenharmony_ci orig_lowest = path->lowest_level; 488762306a36Sopenharmony_ci btrfs_release_path(path); 488862306a36Sopenharmony_ci path->lowest_level = level; 488962306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &cur_key, path, 489062306a36Sopenharmony_ci 0, 0); 489162306a36Sopenharmony_ci path->lowest_level = orig_lowest; 489262306a36Sopenharmony_ci if (ret < 0) 489362306a36Sopenharmony_ci return ret; 489462306a36Sopenharmony_ci 489562306a36Sopenharmony_ci c = path->nodes[level]; 489662306a36Sopenharmony_ci slot = path->slots[level]; 489762306a36Sopenharmony_ci if (ret == 0) 489862306a36Sopenharmony_ci slot++; 489962306a36Sopenharmony_ci goto next; 490062306a36Sopenharmony_ci } 490162306a36Sopenharmony_ci 490262306a36Sopenharmony_ci if (level == 0) 490362306a36Sopenharmony_ci btrfs_item_key_to_cpu(c, key, slot); 490462306a36Sopenharmony_ci else { 490562306a36Sopenharmony_ci u64 gen = btrfs_node_ptr_generation(c, slot); 490662306a36Sopenharmony_ci 490762306a36Sopenharmony_ci if (gen < min_trans) { 490862306a36Sopenharmony_ci slot++; 490962306a36Sopenharmony_ci goto next; 491062306a36Sopenharmony_ci } 491162306a36Sopenharmony_ci btrfs_node_key_to_cpu(c, key, slot); 491262306a36Sopenharmony_ci } 491362306a36Sopenharmony_ci return 0; 491462306a36Sopenharmony_ci } 491562306a36Sopenharmony_ci return 1; 491662306a36Sopenharmony_ci} 491762306a36Sopenharmony_ci 491862306a36Sopenharmony_ciint btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, 491962306a36Sopenharmony_ci u64 time_seq) 492062306a36Sopenharmony_ci{ 492162306a36Sopenharmony_ci int slot; 492262306a36Sopenharmony_ci int level; 492362306a36Sopenharmony_ci struct extent_buffer *c; 492462306a36Sopenharmony_ci struct extent_buffer *next; 492562306a36Sopenharmony_ci struct btrfs_fs_info *fs_info = root->fs_info; 492662306a36Sopenharmony_ci struct btrfs_key key; 492762306a36Sopenharmony_ci bool need_commit_sem = false; 492862306a36Sopenharmony_ci u32 nritems; 492962306a36Sopenharmony_ci int ret; 493062306a36Sopenharmony_ci int i; 493162306a36Sopenharmony_ci 493262306a36Sopenharmony_ci /* 493362306a36Sopenharmony_ci * The nowait semantics are used only for write paths, where we don't 493462306a36Sopenharmony_ci * use the tree mod log and sequence numbers. 493562306a36Sopenharmony_ci */ 493662306a36Sopenharmony_ci if (time_seq) 493762306a36Sopenharmony_ci ASSERT(!path->nowait); 493862306a36Sopenharmony_ci 493962306a36Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[0]); 494062306a36Sopenharmony_ci if (nritems == 0) 494162306a36Sopenharmony_ci return 1; 494262306a36Sopenharmony_ci 494362306a36Sopenharmony_ci btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); 494462306a36Sopenharmony_ciagain: 494562306a36Sopenharmony_ci level = 1; 494662306a36Sopenharmony_ci next = NULL; 494762306a36Sopenharmony_ci btrfs_release_path(path); 494862306a36Sopenharmony_ci 494962306a36Sopenharmony_ci path->keep_locks = 1; 495062306a36Sopenharmony_ci 495162306a36Sopenharmony_ci if (time_seq) { 495262306a36Sopenharmony_ci ret = btrfs_search_old_slot(root, &key, path, time_seq); 495362306a36Sopenharmony_ci } else { 495462306a36Sopenharmony_ci if (path->need_commit_sem) { 495562306a36Sopenharmony_ci path->need_commit_sem = 0; 495662306a36Sopenharmony_ci need_commit_sem = true; 495762306a36Sopenharmony_ci if (path->nowait) { 495862306a36Sopenharmony_ci if (!down_read_trylock(&fs_info->commit_root_sem)) { 495962306a36Sopenharmony_ci ret = -EAGAIN; 496062306a36Sopenharmony_ci goto done; 496162306a36Sopenharmony_ci } 496262306a36Sopenharmony_ci } else { 496362306a36Sopenharmony_ci down_read(&fs_info->commit_root_sem); 496462306a36Sopenharmony_ci } 496562306a36Sopenharmony_ci } 496662306a36Sopenharmony_ci ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 496762306a36Sopenharmony_ci } 496862306a36Sopenharmony_ci path->keep_locks = 0; 496962306a36Sopenharmony_ci 497062306a36Sopenharmony_ci if (ret < 0) 497162306a36Sopenharmony_ci goto done; 497262306a36Sopenharmony_ci 497362306a36Sopenharmony_ci nritems = btrfs_header_nritems(path->nodes[0]); 497462306a36Sopenharmony_ci /* 497562306a36Sopenharmony_ci * by releasing the path above we dropped all our locks. A balance 497662306a36Sopenharmony_ci * could have added more items next to the key that used to be 497762306a36Sopenharmony_ci * at the very end of the block. So, check again here and 497862306a36Sopenharmony_ci * advance the path if there are now more items available. 497962306a36Sopenharmony_ci */ 498062306a36Sopenharmony_ci if (nritems > 0 && path->slots[0] < nritems - 1) { 498162306a36Sopenharmony_ci if (ret == 0) 498262306a36Sopenharmony_ci path->slots[0]++; 498362306a36Sopenharmony_ci ret = 0; 498462306a36Sopenharmony_ci goto done; 498562306a36Sopenharmony_ci } 498662306a36Sopenharmony_ci /* 498762306a36Sopenharmony_ci * So the above check misses one case: 498862306a36Sopenharmony_ci * - after releasing the path above, someone has removed the item that 498962306a36Sopenharmony_ci * used to be at the very end of the block, and balance between leafs 499062306a36Sopenharmony_ci * gets another one with bigger key.offset to replace it. 499162306a36Sopenharmony_ci * 499262306a36Sopenharmony_ci * This one should be returned as well, or we can get leaf corruption 499362306a36Sopenharmony_ci * later(esp. in __btrfs_drop_extents()). 499462306a36Sopenharmony_ci * 499562306a36Sopenharmony_ci * And a bit more explanation about this check, 499662306a36Sopenharmony_ci * with ret > 0, the key isn't found, the path points to the slot 499762306a36Sopenharmony_ci * where it should be inserted, so the path->slots[0] item must be the 499862306a36Sopenharmony_ci * bigger one. 499962306a36Sopenharmony_ci */ 500062306a36Sopenharmony_ci if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) { 500162306a36Sopenharmony_ci ret = 0; 500262306a36Sopenharmony_ci goto done; 500362306a36Sopenharmony_ci } 500462306a36Sopenharmony_ci 500562306a36Sopenharmony_ci while (level < BTRFS_MAX_LEVEL) { 500662306a36Sopenharmony_ci if (!path->nodes[level]) { 500762306a36Sopenharmony_ci ret = 1; 500862306a36Sopenharmony_ci goto done; 500962306a36Sopenharmony_ci } 501062306a36Sopenharmony_ci 501162306a36Sopenharmony_ci slot = path->slots[level] + 1; 501262306a36Sopenharmony_ci c = path->nodes[level]; 501362306a36Sopenharmony_ci if (slot >= btrfs_header_nritems(c)) { 501462306a36Sopenharmony_ci level++; 501562306a36Sopenharmony_ci if (level == BTRFS_MAX_LEVEL) { 501662306a36Sopenharmony_ci ret = 1; 501762306a36Sopenharmony_ci goto done; 501862306a36Sopenharmony_ci } 501962306a36Sopenharmony_ci continue; 502062306a36Sopenharmony_ci } 502162306a36Sopenharmony_ci 502262306a36Sopenharmony_ci 502362306a36Sopenharmony_ci /* 502462306a36Sopenharmony_ci * Our current level is where we're going to start from, and to 502562306a36Sopenharmony_ci * make sure lockdep doesn't complain we need to drop our locks 502662306a36Sopenharmony_ci * and nodes from 0 to our current level. 502762306a36Sopenharmony_ci */ 502862306a36Sopenharmony_ci for (i = 0; i < level; i++) { 502962306a36Sopenharmony_ci if (path->locks[level]) { 503062306a36Sopenharmony_ci btrfs_tree_read_unlock(path->nodes[i]); 503162306a36Sopenharmony_ci path->locks[i] = 0; 503262306a36Sopenharmony_ci } 503362306a36Sopenharmony_ci free_extent_buffer(path->nodes[i]); 503462306a36Sopenharmony_ci path->nodes[i] = NULL; 503562306a36Sopenharmony_ci } 503662306a36Sopenharmony_ci 503762306a36Sopenharmony_ci next = c; 503862306a36Sopenharmony_ci ret = read_block_for_search(root, path, &next, level, 503962306a36Sopenharmony_ci slot, &key); 504062306a36Sopenharmony_ci if (ret == -EAGAIN && !path->nowait) 504162306a36Sopenharmony_ci goto again; 504262306a36Sopenharmony_ci 504362306a36Sopenharmony_ci if (ret < 0) { 504462306a36Sopenharmony_ci btrfs_release_path(path); 504562306a36Sopenharmony_ci goto done; 504662306a36Sopenharmony_ci } 504762306a36Sopenharmony_ci 504862306a36Sopenharmony_ci if (!path->skip_locking) { 504962306a36Sopenharmony_ci ret = btrfs_try_tree_read_lock(next); 505062306a36Sopenharmony_ci if (!ret && path->nowait) { 505162306a36Sopenharmony_ci ret = -EAGAIN; 505262306a36Sopenharmony_ci goto done; 505362306a36Sopenharmony_ci } 505462306a36Sopenharmony_ci if (!ret && time_seq) { 505562306a36Sopenharmony_ci /* 505662306a36Sopenharmony_ci * If we don't get the lock, we may be racing 505762306a36Sopenharmony_ci * with push_leaf_left, holding that lock while 505862306a36Sopenharmony_ci * itself waiting for the leaf we've currently 505962306a36Sopenharmony_ci * locked. To solve this situation, we give up 506062306a36Sopenharmony_ci * on our lock and cycle. 506162306a36Sopenharmony_ci */ 506262306a36Sopenharmony_ci free_extent_buffer(next); 506362306a36Sopenharmony_ci btrfs_release_path(path); 506462306a36Sopenharmony_ci cond_resched(); 506562306a36Sopenharmony_ci goto again; 506662306a36Sopenharmony_ci } 506762306a36Sopenharmony_ci if (!ret) 506862306a36Sopenharmony_ci btrfs_tree_read_lock(next); 506962306a36Sopenharmony_ci } 507062306a36Sopenharmony_ci break; 507162306a36Sopenharmony_ci } 507262306a36Sopenharmony_ci path->slots[level] = slot; 507362306a36Sopenharmony_ci while (1) { 507462306a36Sopenharmony_ci level--; 507562306a36Sopenharmony_ci path->nodes[level] = next; 507662306a36Sopenharmony_ci path->slots[level] = 0; 507762306a36Sopenharmony_ci if (!path->skip_locking) 507862306a36Sopenharmony_ci path->locks[level] = BTRFS_READ_LOCK; 507962306a36Sopenharmony_ci if (!level) 508062306a36Sopenharmony_ci break; 508162306a36Sopenharmony_ci 508262306a36Sopenharmony_ci ret = read_block_for_search(root, path, &next, level, 508362306a36Sopenharmony_ci 0, &key); 508462306a36Sopenharmony_ci if (ret == -EAGAIN && !path->nowait) 508562306a36Sopenharmony_ci goto again; 508662306a36Sopenharmony_ci 508762306a36Sopenharmony_ci if (ret < 0) { 508862306a36Sopenharmony_ci btrfs_release_path(path); 508962306a36Sopenharmony_ci goto done; 509062306a36Sopenharmony_ci } 509162306a36Sopenharmony_ci 509262306a36Sopenharmony_ci if (!path->skip_locking) { 509362306a36Sopenharmony_ci if (path->nowait) { 509462306a36Sopenharmony_ci if (!btrfs_try_tree_read_lock(next)) { 509562306a36Sopenharmony_ci ret = -EAGAIN; 509662306a36Sopenharmony_ci goto done; 509762306a36Sopenharmony_ci } 509862306a36Sopenharmony_ci } else { 509962306a36Sopenharmony_ci btrfs_tree_read_lock(next); 510062306a36Sopenharmony_ci } 510162306a36Sopenharmony_ci } 510262306a36Sopenharmony_ci } 510362306a36Sopenharmony_ci ret = 0; 510462306a36Sopenharmony_cidone: 510562306a36Sopenharmony_ci unlock_up(path, 0, 1, 0, NULL); 510662306a36Sopenharmony_ci if (need_commit_sem) { 510762306a36Sopenharmony_ci int ret2; 510862306a36Sopenharmony_ci 510962306a36Sopenharmony_ci path->need_commit_sem = 1; 511062306a36Sopenharmony_ci ret2 = finish_need_commit_sem_search(path); 511162306a36Sopenharmony_ci up_read(&fs_info->commit_root_sem); 511262306a36Sopenharmony_ci if (ret2) 511362306a36Sopenharmony_ci ret = ret2; 511462306a36Sopenharmony_ci } 511562306a36Sopenharmony_ci 511662306a36Sopenharmony_ci return ret; 511762306a36Sopenharmony_ci} 511862306a36Sopenharmony_ci 511962306a36Sopenharmony_ciint btrfs_next_old_item(struct btrfs_root *root, struct btrfs_path *path, u64 time_seq) 512062306a36Sopenharmony_ci{ 512162306a36Sopenharmony_ci path->slots[0]++; 512262306a36Sopenharmony_ci if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) 512362306a36Sopenharmony_ci return btrfs_next_old_leaf(root, path, time_seq); 512462306a36Sopenharmony_ci return 0; 512562306a36Sopenharmony_ci} 512662306a36Sopenharmony_ci 512762306a36Sopenharmony_ci/* 512862306a36Sopenharmony_ci * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps 512962306a36Sopenharmony_ci * searching until it gets past min_objectid or finds an item of 'type' 513062306a36Sopenharmony_ci * 513162306a36Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error 513262306a36Sopenharmony_ci */ 513362306a36Sopenharmony_ciint btrfs_previous_item(struct btrfs_root *root, 513462306a36Sopenharmony_ci struct btrfs_path *path, u64 min_objectid, 513562306a36Sopenharmony_ci int type) 513662306a36Sopenharmony_ci{ 513762306a36Sopenharmony_ci struct btrfs_key found_key; 513862306a36Sopenharmony_ci struct extent_buffer *leaf; 513962306a36Sopenharmony_ci u32 nritems; 514062306a36Sopenharmony_ci int ret; 514162306a36Sopenharmony_ci 514262306a36Sopenharmony_ci while (1) { 514362306a36Sopenharmony_ci if (path->slots[0] == 0) { 514462306a36Sopenharmony_ci ret = btrfs_prev_leaf(root, path); 514562306a36Sopenharmony_ci if (ret != 0) 514662306a36Sopenharmony_ci return ret; 514762306a36Sopenharmony_ci } else { 514862306a36Sopenharmony_ci path->slots[0]--; 514962306a36Sopenharmony_ci } 515062306a36Sopenharmony_ci leaf = path->nodes[0]; 515162306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 515262306a36Sopenharmony_ci if (nritems == 0) 515362306a36Sopenharmony_ci return 1; 515462306a36Sopenharmony_ci if (path->slots[0] == nritems) 515562306a36Sopenharmony_ci path->slots[0]--; 515662306a36Sopenharmony_ci 515762306a36Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 515862306a36Sopenharmony_ci if (found_key.objectid < min_objectid) 515962306a36Sopenharmony_ci break; 516062306a36Sopenharmony_ci if (found_key.type == type) 516162306a36Sopenharmony_ci return 0; 516262306a36Sopenharmony_ci if (found_key.objectid == min_objectid && 516362306a36Sopenharmony_ci found_key.type < type) 516462306a36Sopenharmony_ci break; 516562306a36Sopenharmony_ci } 516662306a36Sopenharmony_ci return 1; 516762306a36Sopenharmony_ci} 516862306a36Sopenharmony_ci 516962306a36Sopenharmony_ci/* 517062306a36Sopenharmony_ci * search in extent tree to find a previous Metadata/Data extent item with 517162306a36Sopenharmony_ci * min objecitd. 517262306a36Sopenharmony_ci * 517362306a36Sopenharmony_ci * returns 0 if something is found, 1 if nothing was found and < 0 on error 517462306a36Sopenharmony_ci */ 517562306a36Sopenharmony_ciint btrfs_previous_extent_item(struct btrfs_root *root, 517662306a36Sopenharmony_ci struct btrfs_path *path, u64 min_objectid) 517762306a36Sopenharmony_ci{ 517862306a36Sopenharmony_ci struct btrfs_key found_key; 517962306a36Sopenharmony_ci struct extent_buffer *leaf; 518062306a36Sopenharmony_ci u32 nritems; 518162306a36Sopenharmony_ci int ret; 518262306a36Sopenharmony_ci 518362306a36Sopenharmony_ci while (1) { 518462306a36Sopenharmony_ci if (path->slots[0] == 0) { 518562306a36Sopenharmony_ci ret = btrfs_prev_leaf(root, path); 518662306a36Sopenharmony_ci if (ret != 0) 518762306a36Sopenharmony_ci return ret; 518862306a36Sopenharmony_ci } else { 518962306a36Sopenharmony_ci path->slots[0]--; 519062306a36Sopenharmony_ci } 519162306a36Sopenharmony_ci leaf = path->nodes[0]; 519262306a36Sopenharmony_ci nritems = btrfs_header_nritems(leaf); 519362306a36Sopenharmony_ci if (nritems == 0) 519462306a36Sopenharmony_ci return 1; 519562306a36Sopenharmony_ci if (path->slots[0] == nritems) 519662306a36Sopenharmony_ci path->slots[0]--; 519762306a36Sopenharmony_ci 519862306a36Sopenharmony_ci btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 519962306a36Sopenharmony_ci if (found_key.objectid < min_objectid) 520062306a36Sopenharmony_ci break; 520162306a36Sopenharmony_ci if (found_key.type == BTRFS_EXTENT_ITEM_KEY || 520262306a36Sopenharmony_ci found_key.type == BTRFS_METADATA_ITEM_KEY) 520362306a36Sopenharmony_ci return 0; 520462306a36Sopenharmony_ci if (found_key.objectid == min_objectid && 520562306a36Sopenharmony_ci found_key.type < BTRFS_EXTENT_ITEM_KEY) 520662306a36Sopenharmony_ci break; 520762306a36Sopenharmony_ci } 520862306a36Sopenharmony_ci return 1; 520962306a36Sopenharmony_ci} 521062306a36Sopenharmony_ci 521162306a36Sopenharmony_ciint __init btrfs_ctree_init(void) 521262306a36Sopenharmony_ci{ 521362306a36Sopenharmony_ci btrfs_path_cachep = kmem_cache_create("btrfs_path", 521462306a36Sopenharmony_ci sizeof(struct btrfs_path), 0, 521562306a36Sopenharmony_ci SLAB_MEM_SPREAD, NULL); 521662306a36Sopenharmony_ci if (!btrfs_path_cachep) 521762306a36Sopenharmony_ci return -ENOMEM; 521862306a36Sopenharmony_ci return 0; 521962306a36Sopenharmony_ci} 522062306a36Sopenharmony_ci 522162306a36Sopenharmony_civoid __cold btrfs_ctree_exit(void) 522262306a36Sopenharmony_ci{ 522362306a36Sopenharmony_ci kmem_cache_destroy(btrfs_path_cachep); 522462306a36Sopenharmony_ci} 5225